diff options
Diffstat (limited to 'net')
48 files changed, 739 insertions, 338 deletions
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 589ff497d50c..321be94c445a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -866,7 +866,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, unsigned long now = jiffies; unsigned char flags = 0; u8 filter_mode; - int err; __mdb_entry_to_br_ip(entry, &group, mdb_attrs); @@ -892,13 +891,9 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -EINVAL; } - mp = br_mdb_ip_get(br, &group); - if (!mp) { - mp = br_multicast_new_group(br, &group); - err = PTR_ERR_OR_ZERO(mp); - if (err) - return err; - } + mp = br_multicast_new_group(br, &group); + if (IS_ERR(mp)) + return PTR_ERR(mp); /* host join */ if (!port) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index db4f2641d1cd..09140bc8c15e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2669,7 +2669,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx, if (!pmctx || igmpv2) continue; - spin_lock_bh(&brmctx->br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto unlock_continue; @@ -2717,7 +2717,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx, if (changed) br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: - spin_unlock_bh(&brmctx->br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } return err; @@ -2807,7 +2807,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx, if (!pmctx || mldv1) continue; - spin_lock_bh(&brmctx->br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto unlock_continue; @@ -2859,7 +2859,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx, if (changed) br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: - spin_unlock_bh(&brmctx->br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } return err; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 55f29c9f9e08..f26f4cfa9e63 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -987,7 +987,7 @@ static int j1939_session_tx_eoma(struct j1939_session *session) /* wait for the EOMA packet to come in */ j1939_tp_set_rxtimeout(session, 1250); - netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session); + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 3be256051e99..fff62068a53d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8822,7 +8822,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data); + size_t size = sizeof(sa->sa_data_min); struct net_device *dev; int ret = 0; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7674bb9f3076..5cdbfbf9a7dc 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data), + min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f64654df71a2..5581d22cc191 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -316,6 +316,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128); + ref_tracker_dir_init(&net->notrefcnt_tracker, 128); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); @@ -436,6 +437,10 @@ static void net_free(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); + + /* There should not be any trackers left there. */ + ref_tracker_dir_exit(&net->notrefcnt_tracker); + kmem_cache_free(net_cachep, net); } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d1a3fa6f3f12..1d84a17eada5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -506,14 +506,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); + osize = kmalloc_size_roundup(size); + data = kmalloc_reserve(osize, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) goto nodata; - /* kmalloc(size) might give us more room than requested. + /* kmalloc_size_roundup() might give us more room than requested. * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ - osize = ksize(data); size = SKB_WITH_OVERHEAD(osize); prefetchw(data + size); @@ -748,6 +748,13 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } +static bool skb_pp_recycle(struct sk_buff *skb, void *data) +{ + if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) + return false; + return page_pool_return_skb_page(virt_to_page(data)); +} + static void skb_free_head(struct sk_buff *skb) { unsigned char *head = skb->head; @@ -1814,10 +1821,11 @@ EXPORT_SYMBOL(__pskb_copy_fclone); int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask) { - int i, osize = skb_end_offset(skb); - int size = osize + nhead + ntail; + unsigned int osize = skb_end_offset(skb); + unsigned int size = osize + nhead + ntail; long off; u8 *data; + int i; BUG_ON(nhead < 0); @@ -1825,15 +1833,16 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_zcopy_downgrade_managed(skb); - size = SKB_DATA_ALIGN(size); - if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask, NUMA_NO_NODE, NULL); + + size = SKB_DATA_ALIGN(size); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = kmalloc_size_roundup(size); + data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; - size = SKB_WITH_OVERHEAD(ksize(data)); + size = SKB_WITH_OVERHEAD(size); /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. @@ -6167,21 +6176,20 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, const int headlen, gfp_t gfp_mask) { int i; - int size = skb_end_offset(skb); + unsigned int size = skb_end_offset(skb); int new_hlen = headlen - off; u8 *data; - size = SKB_DATA_ALIGN(size); - if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - data = kmalloc_reserve(size + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask, NUMA_NO_NODE, NULL); + + size = SKB_DATA_ALIGN(size); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = kmalloc_size_roundup(size); + data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; - - size = SKB_WITH_OVERHEAD(ksize(data)); + size = SKB_WITH_OVERHEAD(size); /* Copy real data, and all frags */ skb_copy_from_linear_data_offset(skb, off, data, new_hlen); @@ -6286,22 +6294,21 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, int pos, gfp_t gfp_mask) { int i, k = 0; - int size = skb_end_offset(skb); + unsigned int size = skb_end_offset(skb); u8 *data; const int nfrags = skb_shinfo(skb)->nr_frags; struct skb_shared_info *shinfo; - size = SKB_DATA_ALIGN(size); - if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - data = kmalloc_reserve(size + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask, NUMA_NO_NODE, NULL); + + size = SKB_DATA_ALIGN(size); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = kmalloc_size_roundup(size); + data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; - - size = SKB_WITH_OVERHEAD(ksize(data)); + size = SKB_WITH_OVERHEAD(size); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); diff --git a/net/core/sock.c b/net/core/sock.c index a3ba0358c77c..4571914a4aa8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1436,7 +1436,7 @@ set_sndbuf: break; } case SO_INCOMING_CPU: - WRITE_ONCE(sk->sk_incoming_cpu, val); + reuseport_update_incoming_cpu(sk, val); break; case SO_CNX_ADVICE: @@ -2094,6 +2094,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) { get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); + } else { + __netns_tracker_alloc(net, &sk->ns_tracker, + false, priority); } sock_net_set(sk, net); @@ -2149,6 +2152,9 @@ static void __sk_destruct(struct rcu_head *head) if (likely(sk->sk_net_refcnt)) put_net_track(sock_net(sk), &sk->ns_tracker); + else + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + sk_prot_free(sk->sk_prot_creator, sk); } @@ -2237,6 +2243,14 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) if (likely(newsk->sk_net_refcnt)) { get_net_track(sock_net(newsk), &newsk->ns_tracker, priority); sock_inuse_add(sock_net(newsk), 1); + } else { + /* Kernel sockets are not elevating the struct net refcount. + * Instead, use a tracker to more easily detect if a layer + * is not properly dismantling its kernel sockets at netns + * destroy time. + */ + __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, + false, priority); } sk_node_init(&newsk->sk_node); sock_lock_init(newsk); @@ -2730,7 +2744,7 @@ failure: } EXPORT_SYMBOL(sock_alloc_send_pskb); -int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, +int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, struct sockcm_cookie *sockc) { u32 tsflags; @@ -2784,7 +2798,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, return -EINVAL; if (cmsg->cmsg_level != SOL_SOCKET) continue; - ret = __sock_cmsg_send(sk, msg, cmsg, sockc); + ret = __sock_cmsg_send(sk, cmsg, sockc); if (ret) return ret; } diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index fb90e1e00773..5a165286e4d8 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -37,6 +37,70 @@ void reuseport_has_conns_set(struct sock *sk) } EXPORT_SYMBOL(reuseport_has_conns_set); +static void __reuseport_get_incoming_cpu(struct sock_reuseport *reuse) +{ + /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ + WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu + 1); +} + +static void __reuseport_put_incoming_cpu(struct sock_reuseport *reuse) +{ + /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ + WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu - 1); +} + +static void reuseport_get_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) +{ + if (sk->sk_incoming_cpu >= 0) + __reuseport_get_incoming_cpu(reuse); +} + +static void reuseport_put_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) +{ + if (sk->sk_incoming_cpu >= 0) + __reuseport_put_incoming_cpu(reuse); +} + +void reuseport_update_incoming_cpu(struct sock *sk, int val) +{ + struct sock_reuseport *reuse; + int old_sk_incoming_cpu; + + if (unlikely(!rcu_access_pointer(sk->sk_reuseport_cb))) { + /* Paired with REAE_ONCE() in sk_incoming_cpu_update() + * and compute_score(). + */ + WRITE_ONCE(sk->sk_incoming_cpu, val); + return; + } + + spin_lock_bh(&reuseport_lock); + + /* This must be done under reuseport_lock to avoid a race with + * reuseport_grow(), which accesses sk->sk_incoming_cpu without + * lock_sock() when detaching a shutdown()ed sk. + * + * Paired with READ_ONCE() in reuseport_select_sock_by_hash(). + */ + old_sk_incoming_cpu = sk->sk_incoming_cpu; + WRITE_ONCE(sk->sk_incoming_cpu, val); + + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + + /* reuseport_grow() has detached a closed sk. */ + if (!reuse) + goto out; + + if (old_sk_incoming_cpu < 0 && val >= 0) + __reuseport_get_incoming_cpu(reuse); + else if (old_sk_incoming_cpu >= 0 && val < 0) + __reuseport_put_incoming_cpu(reuse); + +out: + spin_unlock_bh(&reuseport_lock); +} + static int reuseport_sock_index(struct sock *sk, const struct sock_reuseport *reuse, bool closed) @@ -64,6 +128,7 @@ static void __reuseport_add_sock(struct sock *sk, /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */ smp_wmb(); reuse->num_socks++; + reuseport_get_incoming_cpu(sk, reuse); } static bool __reuseport_detach_sock(struct sock *sk, @@ -76,6 +141,7 @@ static bool __reuseport_detach_sock(struct sock *sk, reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; + reuseport_put_incoming_cpu(sk, reuse); return true; } @@ -86,6 +152,7 @@ static void __reuseport_add_closed_sock(struct sock *sk, reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk; /* paired with READ_ONCE() in inet_csk_bind_conflict() */ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1); + reuseport_get_incoming_cpu(sk, reuse); } static bool __reuseport_detach_closed_sock(struct sock *sk, @@ -99,6 +166,7 @@ static bool __reuseport_detach_closed_sock(struct sock *sk, reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks]; /* paired with READ_ONCE() in inet_csk_bind_conflict() */ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1); + reuseport_put_incoming_cpu(sk, reuse); return true; } @@ -166,6 +234,7 @@ int reuseport_alloc(struct sock *sk, bool bind_inany) reuse->bind_inany = bind_inany; reuse->socks[0] = sk; reuse->num_socks = 1; + reuseport_get_incoming_cpu(sk, reuse); rcu_assign_pointer(sk->sk_reuseport_cb, reuse); out: @@ -209,6 +278,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->reuseport_id = reuse->reuseport_id; more_reuse->bind_inany = reuse->bind_inany; more_reuse->has_conns = reuse->has_conns; + more_reuse->incoming_cpu = reuse->incoming_cpu; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); @@ -458,18 +528,32 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks, static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse, u32 hash, u16 num_socks) { + struct sock *first_valid_sk = NULL; int i, j; i = j = reciprocal_scale(hash, num_socks); - while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { + do { + struct sock *sk = reuse->socks[i]; + + if (sk->sk_state != TCP_ESTABLISHED) { + /* Paired with WRITE_ONCE() in __reuseport_(get|put)_incoming_cpu(). */ + if (!READ_ONCE(reuse->incoming_cpu)) + return sk; + + /* Paired with WRITE_ONCE() in reuseport_update_incoming_cpu(). */ + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) + return sk; + + if (!first_valid_sk) + first_valid_sk = sk; + } + i++; if (i >= num_socks) i = 0; - if (i == j) - return NULL; - } + } while (i != j); - return reuse->socks[i]; + return first_valid_sk; } /** diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 7dfc00c9fb32..9ddc3a9e89e4 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -278,6 +278,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len); +void dccp_destruct_common(struct sock *sk); int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); void dccp_destroy_sock(struct sock *sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e57b43006074..ae62b1591dea 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1021,6 +1021,12 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .sockaddr_len = sizeof(struct sockaddr_in6), }; +static void dccp_v6_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); + inet6_sock_destruct(sk); +} + /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ @@ -1033,17 +1039,12 @@ static int dccp_v6_init_sock(struct sock *sk) if (unlikely(!dccp_v6_ctl_sock_initialized)) dccp_v6_ctl_sock_initialized = 1; inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; + sk->sk_destruct = dccp_v6_sk_destruct; } return err; } -static void dccp_v6_destroy_sock(struct sock *sk) -{ - dccp_destroy_sock(sk); - inet6_destroy_sock(sk); -} - static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; @@ -1066,7 +1067,7 @@ static struct proto dccp_v6_prot = { .accept = inet_csk_accept, .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, - .destroy = dccp_v6_destroy_sock, + .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), diff --git a/net/dccp/proto.c b/net/dccp/proto.c index c548ca3e9b0e..9494b0d224f9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -171,12 +171,18 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); -static void dccp_sk_destruct(struct sock *sk) +void dccp_destruct_common(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_tx_ccid = NULL; +} +EXPORT_SYMBOL_GPL(dccp_destruct_common); + +static void dccp_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); inet_sock_destruct(sk); } diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 566adf85e658..ee3e02da0013 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -202,6 +202,12 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(100, FX, Half), __DEFINE_LINK_MODE_NAME(100, FX, Full), __DEFINE_LINK_MODE_NAME(10, T1L, Full), + __DEFINE_LINK_MODE_NAME(800000, CR8, Full), + __DEFINE_LINK_MODE_NAME(800000, KR8, Full), + __DEFINE_LINK_MODE_NAME(800000, DR8, Full), + __DEFINE_LINK_MODE_NAME(800000, DR8_2, Full), + __DEFINE_LINK_MODE_NAME(800000, SR8, Full), + __DEFINE_LINK_MODE_NAME(800000, VR8, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -238,6 +244,8 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 #define __LINK_MODE_LANES_T1L 1 +#define __LINK_MODE_LANES_VR8 8 +#define __LINK_MODE_LANES_DR8_2 8 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -352,6 +360,12 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(100, FX, Half), __DEFINE_LINK_MODE_PARAMS(100, FX, Full), __DEFINE_LINK_MODE_PARAMS(10, T1L, Full), + __DEFINE_LINK_MODE_PARAMS(800000, CR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, KR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR8_2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, SR8, Full), + __DEFINE_LINK_MODE_PARAMS(800000, VR8, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index de259b5170ab..57546e07e06a 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -129,6 +129,9 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size) wpan_phy_net_set(&rdev->wpan_phy, &init_net); init_waitqueue_head(&rdev->dev_wait); + init_waitqueue_head(&rdev->wpan_phy.sync_txq); + + spin_lock_init(&rdev->wpan_phy.queue_lock); return &rdev->wpan_phy; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6e19cad154f5..5f16807d3235 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -267,7 +267,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, } #endif if (cmsg->cmsg_level == SOL_SOCKET) { - err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); + err = __sock_cmsg_send(sk, cmsg, &ipc->sockc); if (err) return err; continue; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6a320a614e54..89accc3c8bb3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1448,7 +1448,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2) && + if (size < READ_ONCE(up->forward_threshold) && !skb_queue_empty(&up->reader_queue)) return; } else { @@ -1622,7 +1622,7 @@ static void udp_destruct_sock(struct sock *sk) int udp_init_sock(struct sock *sk) { - skb_queue_head_init(&udp_sk(sk)->reader_queue); + udp_lib_init_sock(sk); sk->sk_destruct = udp_destruct_sock; set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; @@ -2672,6 +2672,18 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int err = 0; int is_udplite = IS_UDPLITE(sk); + if (level == SOL_SOCKET) { + err = sk_setsockopt(sk, level, optname, optval, optlen); + + if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) { + sockopt_lock_sock(sk); + /* paired with READ_ONCE in udp_rmem_release() */ + WRITE_ONCE(up->forward_threshold, sk->sk_rcvbuf >> 2); + sockopt_release_sock(sk); + } + return err; + } + if (optlen < sizeof(int)) return -EINVAL; @@ -2785,7 +2797,7 @@ EXPORT_SYMBOL(udp_lib_setsockopt); int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_push_pending_frames); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 024191004982..68075295d587 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -114,6 +114,7 @@ void inet6_sock_destruct(struct sock *sk) inet6_cleanup_sock(sk); inet_sock_destruct(sk); } +EXPORT_SYMBOL_GPL(inet6_sock_destruct); static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) @@ -489,7 +490,7 @@ int inet6_release(struct socket *sock) } EXPORT_SYMBOL(inet6_release); -void inet6_destroy_sock(struct sock *sk) +void inet6_cleanup_sock(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -514,12 +515,6 @@ void inet6_destroy_sock(struct sock *sk) txopt_put(opt); } } -EXPORT_SYMBOL_GPL(inet6_destroy_sock); - -void inet6_cleanup_sock(struct sock *sk) -{ - inet6_destroy_sock(sk); -} EXPORT_SYMBOL_GPL(inet6_cleanup_sock); /* diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5ecb56522f9d..df7e032ce87d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -771,7 +771,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, } if (cmsg->cmsg_level == SOL_SOCKET) { - err = __sock_cmsg_send(sk, msg, cmsg, &ipc6->sockc); + err = __sock_cmsg_send(sk, cmsg, &ipc6->sockc); if (err) return err; continue; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c035a96fba3a..7673e1dd1147 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -870,26 +870,6 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) return 0; } -/** - * ip6gre_tnl_addr_conflict - compare packet addresses to tunnel's own - * @t: the outgoing tunnel device - * @hdr: IPv6 header from the incoming packet - * - * Description: - * Avoid trivial tunneling loop by checking that tunnel exit-point - * doesn't match source of incoming packet. - * - * Return: - * 1 if conflict, - * 0 else - **/ - -static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t, - const struct ipv6hdr *hdr) -{ - return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); -} - static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 532f4478c884..9ce51680290b 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1005,10 +1005,8 @@ unlock: return retv; e_inval: - sockopt_release_sock(sk); - if (needs_rtnl) - rtnl_unlock(); - return -EINVAL; + retv = -EINVAL; + goto unlock; } int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 86c26e48d065..808983bc2ec9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -23,11 +23,6 @@ #include <linux/bpf-cgroup.h> #include <net/ping.h> -static void ping_v6_destroy(struct sock *sk) -{ - inet6_destroy_sock(sk); -} - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -205,7 +200,6 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, - .destroy = ping_v6_destroy, .pre_connect = ping_v6_pre_connect, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 722de9dd0ff7..a06a9f847db5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1173,8 +1173,6 @@ static void raw6_destroy(struct sock *sk) lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - - inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2a3f9296df1e..f676be14e6b6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1966,12 +1966,6 @@ static int tcp_v6_init_sock(struct sock *sk) return 0; } -static void tcp_v6_destroy_sock(struct sock *sk) -{ - tcp_v4_destroy_sock(sk); - inet6_destroy_sock(sk); -} - #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, @@ -2164,7 +2158,7 @@ struct proto tcpv6_prot = { .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, - .destroy = tcp_v6_destroy_sock, + .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 129ec5a9b0eb..297f7cc06044 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -64,7 +64,7 @@ static void udpv6_destruct_sock(struct sock *sk) int udpv6_init_sock(struct sock *sk) { - skb_queue_head_init(&udp_sk(sk)->reader_queue); + udp_lib_init_sock(sk); sk->sk_destruct = udpv6_destruct_sock; return 0; } @@ -1661,8 +1661,6 @@ void udpv6_destroy_sock(struct sock *sk) udp_encap_disable(); } } - - inet6_destroy_sock(sk); } /* @@ -1671,7 +1669,7 @@ void udpv6_destroy_sock(struct sock *sk) int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 9dbd801ddb98..2478aa60145f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -257,8 +257,6 @@ static void l2tp_ip6_destroy_sock(struct sock *sk) if (tunnel) l2tp_tunnel_delete(tunnel); - - inet6_destroy_sock(sk); } static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index 1e4a9f74ed43..dc2d918fac68 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -46,7 +46,7 @@ static int ieee802154_suspend(struct wpan_phy *wpan_phy) if (!local->open_count) goto suspend; - ieee802154_stop_queue(&local->hw); + ieee802154_sync_and_hold_queue(local); synchronize_net(); /* stop hardware - this must stop RX */ @@ -67,12 +67,12 @@ static int ieee802154_resume(struct wpan_phy *wpan_phy) goto wake_up; /* restart hardware */ - ret = drv_start(local); + ret = drv_start(local, local->phy->filtering, &local->addr_filt); if (ret) return ret; wake_up: - ieee802154_wake_queue(&local->hw); + ieee802154_release_queue(local); local->suspended = false; return 0; } diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index d23f0db98015..a7af3f0ddb3e 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -24,203 +24,290 @@ drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb) return local->ops->xmit_sync(&local->hw, skb); } -static inline int drv_start(struct ieee802154_local *local) +static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) { + struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - trace_802154_drv_start(local); - local->started = true; - smp_mb(); - ret = local->ops->start(&local->hw); + if (!local->ops->set_hw_addr_filt) { + WARN_ON(1); + return -EOPNOTSUPP; + } + + filt.pan_id = pan_id; + + trace_802154_drv_set_pan_id(local, pan_id); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_PANID_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } -static inline void drv_stop(struct ieee802154_local *local) +static inline int +drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr) { - might_sleep(); + struct ieee802154_hw_addr_filt filt; + int ret; - trace_802154_drv_stop(local); - local->ops->stop(&local->hw); - trace_802154_drv_return_void(local); + might_sleep(); - /* sync away all work on the tasklet before clearing started */ - tasklet_disable(&local->tasklet); - tasklet_enable(&local->tasklet); + if (!local->ops->set_hw_addr_filt) { + WARN_ON(1); + return -EOPNOTSUPP; + } - barrier(); + filt.ieee_addr = extended_addr; - local->started = false; + trace_802154_drv_set_extended_addr(local, extended_addr); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_IEEEADDR_CHANGED); + trace_802154_drv_return_int(local, ret); + return ret; } static inline int -drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) +drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr) { + struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - trace_802154_drv_set_channel(local, page, channel); - ret = local->ops->set_channel(&local->hw, page, channel); + if (!local->ops->set_hw_addr_filt) { + WARN_ON(1); + return -EOPNOTSUPP; + } + + filt.short_addr = short_addr; + + trace_802154_drv_set_short_addr(local, short_addr); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_SADDR_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm) +static inline int +drv_set_pan_coord(struct ieee802154_local *local, bool is_coord) { + struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_txpower) { + if (!local->ops->set_hw_addr_filt) { WARN_ON(1); return -EOPNOTSUPP; } - trace_802154_drv_set_tx_power(local, mbm); - ret = local->ops->set_txpower(&local->hw, mbm); + filt.pan_coord = is_coord; + + trace_802154_drv_set_pan_coord(local, is_coord); + ret = local->ops->set_hw_addr_filt(&local->hw, &filt, + IEEE802154_AFILT_PANC_CHANGED); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_cca_mode(struct ieee802154_local *local, - const struct wpan_phy_cca *cca) +static inline int +drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) { int ret; might_sleep(); - if (!local->ops->set_cca_mode) { + if (!local->ops->set_promiscuous_mode) { WARN_ON(1); return -EOPNOTSUPP; } - trace_802154_drv_set_cca_mode(local, cca); - ret = local->ops->set_cca_mode(&local->hw, cca); + trace_802154_drv_set_promiscuous_mode(local, on); + ret = local->ops->set_promiscuous_mode(&local->hw, on); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) +static inline int drv_start(struct ieee802154_local *local, + enum ieee802154_filtering_level level, + const struct ieee802154_hw_addr_filt *addr_filt) { int ret; might_sleep(); - if (!local->ops->set_lbt) { + /* setup receive mode parameters e.g. address mode */ + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_pan_id(local, addr_filt->pan_id); + if (ret < 0) + return ret; + + ret = drv_set_short_addr(local, addr_filt->short_addr); + if (ret < 0) + return ret; + + ret = drv_set_extended_addr(local, addr_filt->ieee_addr); + if (ret < 0) + return ret; + } + + switch (level) { + case IEEE802154_FILTERING_NONE: + fallthrough; + case IEEE802154_FILTERING_1_FCS: + fallthrough; + case IEEE802154_FILTERING_2_PROMISCUOUS: + /* TODO: Requires a different receive mode setup e.g. + * at86rf233 hardware. + */ + fallthrough; + case IEEE802154_FILTERING_3_SCAN: + if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { + ret = drv_set_promiscuous_mode(local, true); + if (ret < 0) + return ret; + } else { + return -EOPNOTSUPP; + } + + /* In practice other filtering levels can be requested, but as + * for now most hardware/drivers only support + * IEEE802154_FILTERING_NONE, we fallback to this actual + * filtering level in hardware and make our own additional + * filtering in mac802154 receive path. + * + * TODO: Move this logic to the device drivers as hardware may + * support more higher level filters. Hardware may also require + * a different order how register are set, which could currently + * be buggy, so all received parameters need to be moved to the + * start() callback and let the driver go into the mode before + * it will turn on receive handling. + */ + local->phy->filtering = IEEE802154_FILTERING_NONE; + break; + case IEEE802154_FILTERING_4_FRAME_FIELDS: + /* Do not error out if IEEE802154_HW_PROMISCUOUS because we + * expect the hardware to operate at the level + * IEEE802154_FILTERING_4_FRAME_FIELDS anyway. + */ + if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { + ret = drv_set_promiscuous_mode(local, false); + if (ret < 0) + return ret; + } + + local->phy->filtering = IEEE802154_FILTERING_4_FRAME_FIELDS; + break; + default: WARN_ON(1); - return -EOPNOTSUPP; + return -EINVAL; } - trace_802154_drv_set_lbt_mode(local, mode); - ret = local->ops->set_lbt(&local->hw, mode); + trace_802154_drv_start(local); + local->started = true; + smp_mb(); + ret = local->ops->start(&local->hw); trace_802154_drv_return_int(local, ret); return ret; } +static inline void drv_stop(struct ieee802154_local *local) +{ + might_sleep(); + + trace_802154_drv_stop(local); + local->ops->stop(&local->hw); + trace_802154_drv_return_void(local); + + /* sync away all work on the tasklet before clearing started */ + tasklet_disable(&local->tasklet); + tasklet_enable(&local->tasklet); + + barrier(); + + local->started = false; +} + static inline int -drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm) +drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) { int ret; might_sleep(); - if (!local->ops->set_cca_ed_level) { - WARN_ON(1); - return -EOPNOTSUPP; - } - - trace_802154_drv_set_cca_ed_level(local, mbm); - ret = local->ops->set_cca_ed_level(&local->hw, mbm); + trace_802154_drv_set_channel(local, page, channel); + ret = local->ops->set_channel(&local->hw, page, channel); trace_802154_drv_return_int(local, ret); return ret; } -static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) +static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_txpower) { WARN_ON(1); return -EOPNOTSUPP; } - filt.pan_id = pan_id; - - trace_802154_drv_set_pan_id(local, pan_id); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_PANID_CHANGED); + trace_802154_drv_set_tx_power(local, mbm); + ret = local->ops->set_txpower(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } -static inline int -drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr) +static inline int drv_set_cca_mode(struct ieee802154_local *local, + const struct wpan_phy_cca *cca) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_cca_mode) { WARN_ON(1); return -EOPNOTSUPP; } - filt.ieee_addr = extended_addr; - - trace_802154_drv_set_extended_addr(local, extended_addr); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_IEEEADDR_CHANGED); + trace_802154_drv_set_cca_mode(local, cca); + ret = local->ops->set_cca_mode(&local->hw, cca); trace_802154_drv_return_int(local, ret); return ret; } -static inline int -drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr) +static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_lbt) { WARN_ON(1); return -EOPNOTSUPP; } - filt.short_addr = short_addr; - - trace_802154_drv_set_short_addr(local, short_addr); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_SADDR_CHANGED); + trace_802154_drv_set_lbt_mode(local, mode); + ret = local->ops->set_lbt(&local->hw, mode); trace_802154_drv_return_int(local, ret); return ret; } static inline int -drv_set_pan_coord(struct ieee802154_local *local, bool is_coord) +drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm) { - struct ieee802154_hw_addr_filt filt; int ret; might_sleep(); - if (!local->ops->set_hw_addr_filt) { + if (!local->ops->set_cca_ed_level) { WARN_ON(1); return -EOPNOTSUPP; } - filt.pan_coord = is_coord; - - trace_802154_drv_set_pan_coord(local, is_coord); - ret = local->ops->set_hw_addr_filt(&local->hw, &filt, - IEEE802154_AFILT_PANC_CHANGED); + trace_802154_drv_set_cca_ed_level(local, mbm); + ret = local->ops->set_cca_ed_level(&local->hw, mbm); trace_802154_drv_return_int(local, ret); return ret; } @@ -264,22 +351,4 @@ drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries) return ret; } -static inline int -drv_set_promiscuous_mode(struct ieee802154_local *local, bool on) -{ - int ret; - - might_sleep(); - - if (!local->ops->set_promiscuous_mode) { - WARN_ON(1); - return -EOPNOTSUPP; - } - - trace_802154_drv_set_promiscuous_mode(local, on); - ret = local->ops->set_promiscuous_mode(&local->hw, on); - trace_802154_drv_return_int(local, ret); - return ret; -} - #endif /* __MAC802154_DRIVER_OPS */ diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 1381e6a5e180..509e0172fe82 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -26,6 +26,8 @@ struct ieee802154_local { struct ieee802154_hw hw; const struct ieee802154_ops *ops; + /* hardware address filter */ + struct ieee802154_hw_addr_filt addr_filt; /* ieee802154 phy */ struct wpan_phy *phy; @@ -55,7 +57,7 @@ struct ieee802154_local { struct sk_buff_head skb_queue; struct sk_buff *tx_skb; - struct work_struct tx_work; + struct work_struct sync_tx_work; /* A negative Linux error code or a null/positive MLME error status */ int tx_result; }; @@ -82,6 +84,16 @@ struct ieee802154_sub_if_data { struct ieee802154_local *local; struct net_device *dev; + /* Each interface starts and works in nominal state at a given filtering + * level given by iface_default_filtering, which is set once for all at + * the interface creation and should not evolve over time. For some MAC + * operations however, the filtering level may change temporarily, as + * reflected in the required_filtering field. The actual filtering at + * the PHY level may be different and is shown in struct wpan_phy. + */ + enum ieee802154_filtering_level iface_default_filtering; + enum ieee802154_filtering_level required_filtering; + unsigned long state; char name[IFNAMSIZ]; @@ -123,13 +135,53 @@ ieee802154_sdata_running(struct ieee802154_sub_if_data *sdata) extern struct ieee802154_mlme_ops mac802154_mlme_wpan; void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb); -void ieee802154_xmit_worker(struct work_struct *work); +void ieee802154_xmit_sync_worker(struct work_struct *work); +int ieee802154_sync_and_hold_queue(struct ieee802154_local *local); +int ieee802154_mlme_op_pre(struct ieee802154_local *local); +int ieee802154_mlme_tx(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); +void ieee802154_mlme_op_post(struct ieee802154_local *local); +int ieee802154_mlme_tx_one(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); netdev_tx_t ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer); +/** + * ieee802154_hold_queue - hold ieee802154 queue + * @local: main mac object + * + * Hold a queue by incrementing an atomic counter and requesting the netif + * queues to be stopped. The queues cannot be woken up while the counter has not + * been reset with as any ieee802154_release_queue() calls as needed. + */ +void ieee802154_hold_queue(struct ieee802154_local *local); + +/** + * ieee802154_release_queue - release ieee802154 queue + * @local: main mac object + * + * Release a queue which is held by decrementing an atomic counter and wake it + * up only if the counter reaches 0. + */ +void ieee802154_release_queue(struct ieee802154_local *local); + +/** + * ieee802154_disable_queue - disable ieee802154 queue + * @local: main mac object + * + * When trying to sync the Tx queue, we cannot just stop the queue + * (which is basically a bit being set without proper lock handling) + * because it would be racy. We actually need to call netif_tx_disable() + * instead, which is done by this helper. Restarting the queue can + * however still be done with a regular wake call. + */ +void ieee802154_disable_queue(struct ieee802154_local *local); + /* MIB callbacks */ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 500ed1b81250..d9b50884d34e 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -147,25 +147,12 @@ static int ieee802154_setup_hw(struct ieee802154_sub_if_data *sdata) struct wpan_dev *wpan_dev = &sdata->wpan_dev; int ret; - if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { - ret = drv_set_promiscuous_mode(local, - wpan_dev->promiscuous_mode); - if (ret < 0) - return ret; - } + sdata->required_filtering = sdata->iface_default_filtering; if (local->hw.flags & IEEE802154_HW_AFILT) { - ret = drv_set_pan_id(local, wpan_dev->pan_id); - if (ret < 0) - return ret; - - ret = drv_set_extended_addr(local, wpan_dev->extended_addr); - if (ret < 0) - return ret; - - ret = drv_set_short_addr(local, wpan_dev->short_addr); - if (ret < 0) - return ret; + local->addr_filt.pan_id = wpan_dev->pan_id; + local->addr_filt.ieee_addr = wpan_dev->extended_addr; + local->addr_filt.short_addr = wpan_dev->short_addr; } if (local->hw.flags & IEEE802154_HW_LBT) { @@ -206,7 +193,8 @@ static int mac802154_slave_open(struct net_device *dev) if (res) goto err; - res = drv_start(local); + res = drv_start(local, sdata->required_filtering, + &local->addr_filt); if (res) goto err; } @@ -223,15 +211,16 @@ err: static int ieee802154_check_mac_settings(struct ieee802154_local *local, - struct wpan_dev *wpan_dev, - struct wpan_dev *nwpan_dev) + struct ieee802154_sub_if_data *sdata, + struct ieee802154_sub_if_data *nsdata) { + struct wpan_dev *nwpan_dev = &nsdata->wpan_dev; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + ASSERT_RTNL(); - if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { - if (wpan_dev->promiscuous_mode != nwpan_dev->promiscuous_mode) - return -EBUSY; - } + if (sdata->iface_default_filtering != nsdata->iface_default_filtering) + return -EBUSY; if (local->hw.flags & IEEE802154_HW_AFILT) { if (wpan_dev->pan_id != nwpan_dev->pan_id || @@ -285,8 +274,7 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, /* check all phy mac sublayer settings are the same. * We have only one phy, different values makes trouble. */ - ret = ieee802154_check_mac_settings(local, wpan_dev, - &nsdata->wpan_dev); + ret = ieee802154_check_mac_settings(local, sdata, nsdata); if (ret < 0) return ret; } @@ -586,7 +574,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, sdata->dev->priv_destructor = mac802154_wpan_free; sdata->dev->netdev_ops = &mac802154_wpan_ops; sdata->dev->ml_priv = &mac802154_mlme_wpan; - wpan_dev->promiscuous_mode = false; + sdata->iface_default_filtering = IEEE802154_FILTERING_4_FRAME_FIELDS; wpan_dev->header_ops = &ieee802154_header_ops; mutex_init(&sdata->sec_mtx); @@ -600,7 +588,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, case NL802154_IFTYPE_MONITOR: sdata->dev->needs_free_netdev = true; sdata->dev->netdev_ops = &mac802154_monitor_ops; - wpan_dev->promiscuous_mode = true; + sdata->iface_default_filtering = IEEE802154_FILTERING_NONE; break; default: BUG(); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index bd7bdb1219dd..40fab08df24b 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -95,7 +95,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) skb_queue_head_init(&local->skb_queue); - INIT_WORK(&local->tx_work, ieee802154_xmit_worker); + INIT_WORK(&local->sync_tx_work, ieee802154_xmit_sync_worker); /* init supported flags with 802.15.4 default ranges */ phy->supported.max_minbe = 8; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 726b47a4611b..0724aac8f48c 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -34,6 +34,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, struct sk_buff *skb, const struct ieee802154_hdr *hdr) { struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct wpan_phy *wpan_phy = sdata->local->hw.phy; __le16 span, sshort; int rc; @@ -42,6 +43,17 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, span = wpan_dev->pan_id; sshort = wpan_dev->short_addr; + /* Level 3 filtering: Only beacons are accepted during scans */ + if (sdata->required_filtering == IEEE802154_FILTERING_3_SCAN && + sdata->required_filtering > wpan_phy->filtering) { + if (mac_cb(skb)->type != IEEE802154_FC_TYPE_BEACON) { + dev_dbg(&sdata->dev->dev, + "drop non-beacon frame (0x%x) during scan\n", + mac_cb(skb)->type); + goto fail; + } + } + switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: if (hdr->source.mode != IEEE802154_ADDR_NONE) @@ -114,8 +126,10 @@ fail: static void ieee802154_print_addr(const char *name, const struct ieee802154_addr *addr) { - if (addr->mode == IEEE802154_ADDR_NONE) + if (addr->mode == IEEE802154_ADDR_NONE) { pr_debug("%s not present\n", name); + return; + } pr_debug("%s PAN ID: %04x\n", name, le16_to_cpu(addr->pan_id)); if (addr->mode == IEEE802154_ADDR_SHORT) { @@ -209,6 +223,13 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, if (!ieee802154_sdata_running(sdata)) continue; + /* Do not deliver packets received on interfaces expecting + * AACK=1 if the address filters where disabled. + */ + if (local->hw.phy->filtering < IEEE802154_FILTERING_4_FRAME_FIELDS && + sdata->required_filtering == IEEE802154_FILTERING_4_FRAME_FIELDS) + continue; + ieee802154_subif_frame(sdata, skb, &hdr); skb = NULL; break; @@ -268,10 +289,8 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb) ieee802154_monitors_rx(local, skb); - /* Check if transceiver doesn't validate the checksum. - * If not we validate the checksum here. - */ - if (local->hw.flags & IEEE802154_HW_RX_DROP_BAD_CKSUM) { + /* Level 1 filtering: Check the FCS by software when relevant */ + if (local->hw.phy->filtering == IEEE802154_FILTERING_NONE) { crc = crc_ccitt(0, skb->data, skb->len); if (crc) { rcu_read_unlock(); diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index c829e4a75325..9d8d43cf1e64 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -22,10 +22,10 @@ #include "ieee802154_i.h" #include "driver-ops.h" -void ieee802154_xmit_worker(struct work_struct *work) +void ieee802154_xmit_sync_worker(struct work_struct *work) { struct ieee802154_local *local = - container_of(work, struct ieee802154_local, tx_work); + container_of(work, struct ieee802154_local, sync_tx_work); struct sk_buff *skb = local->tx_skb; struct net_device *dev = skb->dev; int res; @@ -43,7 +43,9 @@ void ieee802154_xmit_worker(struct work_struct *work) err_tx: /* Restart the netif queue on each sub_if_data object. */ - ieee802154_wake_queue(&local->hw); + ieee802154_release_queue(local); + if (atomic_dec_and_test(&local->phy->ongoing_txs)) + wake_up(&local->phy->sync_txq); kfree_skb(skb); netdev_dbg(dev, "transmission failed\n"); } @@ -65,7 +67,7 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) consume_skb(skb); skb = nskb; } else { - goto err_tx; + goto err_free_skb; } } @@ -74,32 +76,134 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) } /* Stop the netif queue on each sub_if_data object. */ - ieee802154_stop_queue(&local->hw); + ieee802154_hold_queue(local); + atomic_inc(&local->phy->ongoing_txs); - /* async is priority, otherwise sync is fallback */ + /* Drivers should preferably implement the async callback. In some rare + * cases they only provide a sync callback which we will use as a + * fallback. + */ if (local->ops->xmit_async) { unsigned int len = skb->len; ret = drv_xmit_async(local, skb); - if (ret) { - ieee802154_wake_queue(&local->hw); - goto err_tx; - } + if (ret) + goto err_wake_netif_queue; dev->stats.tx_packets++; dev->stats.tx_bytes += len; } else { local->tx_skb = skb; - queue_work(local->workqueue, &local->tx_work); + queue_work(local->workqueue, &local->sync_tx_work); } return NETDEV_TX_OK; -err_tx: +err_wake_netif_queue: + ieee802154_release_queue(local); + if (atomic_dec_and_test(&local->phy->ongoing_txs)) + wake_up(&local->phy->sync_txq); +err_free_skb: kfree_skb(skb); return NETDEV_TX_OK; } +static int ieee802154_sync_queue(struct ieee802154_local *local) +{ + int ret; + + ieee802154_hold_queue(local); + ieee802154_disable_queue(local); + wait_event(local->phy->sync_txq, !atomic_read(&local->phy->ongoing_txs)); + ret = local->tx_result; + ieee802154_release_queue(local); + + return ret; +} + +int ieee802154_sync_and_hold_queue(struct ieee802154_local *local) +{ + int ret; + + ieee802154_hold_queue(local); + ret = ieee802154_sync_queue(local); + set_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags); + + return ret; +} + +int ieee802154_mlme_op_pre(struct ieee802154_local *local) +{ + return ieee802154_sync_and_hold_queue(local); +} + +int ieee802154_mlme_tx(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + int ret; + + /* Avoid possible calls to ->ndo_stop() when we asynchronously perform + * MLME transmissions. + */ + rtnl_lock(); + + /* Ensure the device was not stopped, otherwise error out */ + if (!local->open_count) { + rtnl_unlock(); + return -ENETDOWN; + } + + /* Warn if the ieee802154 core thinks MLME frames can be sent while the + * net interface expects this cannot happen. + */ + if (WARN_ON_ONCE(!netif_running(sdata->dev))) { + rtnl_unlock(); + return -ENETDOWN; + } + + ieee802154_tx(local, skb); + ret = ieee802154_sync_queue(local); + + rtnl_unlock(); + + return ret; +} + +void ieee802154_mlme_op_post(struct ieee802154_local *local) +{ + ieee802154_release_queue(local); +} + +int ieee802154_mlme_tx_one(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + int ret; + + ieee802154_mlme_op_pre(local); + ret = ieee802154_mlme_tx(local, sdata, skb); + ieee802154_mlme_op_post(local); + + return ret; +} + +static bool ieee802154_queue_is_stopped(struct ieee802154_local *local) +{ + return test_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags); +} + +static netdev_tx_t +ieee802154_hot_tx(struct ieee802154_local *local, struct sk_buff *skb) +{ + /* Warn if the net interface tries to transmit frames while the + * ieee802154 core assumes the queue is stopped. + */ + WARN_ON_ONCE(ieee802154_queue_is_stopped(local)); + + return ieee802154_tx(local, skb); +} + netdev_tx_t ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -107,7 +211,7 @@ ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->skb_iif = dev->ifindex; - return ieee802154_tx(sdata->local, skb); + return ieee802154_hot_tx(sdata->local, skb); } netdev_tx_t @@ -129,5 +233,5 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->skb_iif = dev->ifindex; - return ieee802154_tx(sdata->local, skb); + return ieee802154_hot_tx(sdata->local, skb); } diff --git a/net/mac802154/util.c b/net/mac802154/util.c index 9f024d85563b..ebc9a8521765 100644 --- a/net/mac802154/util.c +++ b/net/mac802154/util.c @@ -13,12 +13,23 @@ /* privid for wpan_phys to determine whether they belong to us or not */ const void *const mac802154_wpan_phy_privid = &mac802154_wpan_phy_privid; -void ieee802154_wake_queue(struct ieee802154_hw *hw) +/** + * ieee802154_wake_queue - wake ieee802154 queue + * @hw: main hardware object + * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we had to stop the queue to + * avoid new skb to come during the transmission. The queue then needs to be + * woken up after the operation. + */ +static void ieee802154_wake_queue(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); struct ieee802154_sub_if_data *sdata; rcu_read_lock(); + clear_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags); list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!sdata->dev) continue; @@ -27,9 +38,18 @@ void ieee802154_wake_queue(struct ieee802154_hw *hw) } rcu_read_unlock(); } -EXPORT_SYMBOL(ieee802154_wake_queue); -void ieee802154_stop_queue(struct ieee802154_hw *hw) +/** + * ieee802154_stop_queue - stop ieee802154 queue + * @hw: main hardware object + * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we need to tell upper layers to + * stop giving us new skbs while we are busy with the transmitted one. The queue + * must then be stopped before transmitting. + */ +static void ieee802154_stop_queue(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); struct ieee802154_sub_if_data *sdata; @@ -43,14 +63,47 @@ void ieee802154_stop_queue(struct ieee802154_hw *hw) } rcu_read_unlock(); } -EXPORT_SYMBOL(ieee802154_stop_queue); + +void ieee802154_hold_queue(struct ieee802154_local *local) +{ + unsigned long flags; + + spin_lock_irqsave(&local->phy->queue_lock, flags); + if (!atomic_fetch_inc(&local->phy->hold_txs)) + ieee802154_stop_queue(&local->hw); + spin_unlock_irqrestore(&local->phy->queue_lock, flags); +} + +void ieee802154_release_queue(struct ieee802154_local *local) +{ + unsigned long flags; + + spin_lock_irqsave(&local->phy->queue_lock, flags); + if (atomic_dec_and_test(&local->phy->hold_txs)) + ieee802154_wake_queue(&local->hw); + spin_unlock_irqrestore(&local->phy->queue_lock, flags); +} + +void ieee802154_disable_queue(struct ieee802154_local *local) +{ + struct ieee802154_sub_if_data *sdata; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!sdata->dev) + continue; + + netif_tx_disable(sdata->dev); + } + rcu_read_unlock(); +} enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer) { struct ieee802154_local *local = container_of(timer, struct ieee802154_local, ifs_timer); - ieee802154_wake_queue(&local->hw); + ieee802154_release_queue(local); return HRTIMER_NORESTART; } @@ -84,10 +137,12 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, hw->phy->sifs_period * NSEC_PER_USEC, HRTIMER_MODE_REL); } else { - ieee802154_wake_queue(hw); + ieee802154_release_queue(local); } dev_consume_skb_any(skb); + if (atomic_dec_and_test(&hw->phy->ongoing_txs)) + wake_up(&hw->phy->sync_txq); } EXPORT_SYMBOL(ieee802154_xmit_complete); @@ -97,8 +152,10 @@ void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, struct ieee802154_local *local = hw_to_local(hw); local->tx_result = reason; - ieee802154_wake_queue(hw); + ieee802154_release_queue(local); dev_kfree_skb_any(skb); + if (atomic_dec_and_test(&hw->phy->ongoing_txs)) + wake_up(&hw->phy->sync_txq); } EXPORT_SYMBOL(ieee802154_xmit_error); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b6dc6e260334..109eea2c65ff 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2730,6 +2730,8 @@ static int mptcp_init_sock(struct sock *sk) if (ret) return ret; + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); + /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will * propagate the correct value */ @@ -3707,6 +3709,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct mptcp_subflow_context *subflow; struct sock *newsk = newsock->sk; + set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + lock_sock(newsk); /* PM/worker can now acquire the first subflow socket @@ -3920,12 +3924,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { static struct proto mptcp_v6_prot; -static void mptcp_v6_destroy(struct sock *sk) -{ - mptcp_destroy(sk); - inet6_destroy_sock(sk); -} - static struct inet_protosw mptcp_v6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_MPTCP, @@ -3941,7 +3939,6 @@ int __init mptcp_proto_v6_init(void) mptcp_v6_prot = mptcp_prot; strcpy(mptcp_v6_prot.name, "MPTCPv6"); mptcp_v6_prot.slab = NULL; - mptcp_v6_prot.destroy = mptcp_v6_destroy; mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock); err = proto_register(&mptcp_v6_prot, 1); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index c7cb68c725b2..f85e9bbfe86f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -560,6 +560,7 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_TX_DELAY: case TCP_INQ: case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_NO_COOKIE: return true; } @@ -568,8 +569,8 @@ static bool mptcp_supported_sockopt(int level, int optname) /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, * TCP_REPAIR_WINDOW are not supported, better avoid this mess */ - /* TCP_FASTOPEN_KEY, TCP_FASTOPEN, TCP_FASTOPEN_NO_COOKIE, - * are not supported fastopen is currently unsupported + /* TCP_FASTOPEN_KEY, TCP_FASTOPEN are not supported because + * fastopen for the listener side is currently unsupported */ } return false; @@ -757,29 +758,17 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; } -static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) -{ - struct socket *listener; - - listener = __mptcp_nmpc_socket(msk); - if (!listener) - return 0; /* TCP_DEFER_ACCEPT does not fail */ - - return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen); -} - -static int mptcp_setsockopt_sol_tcp_fastopen_connect(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct socket *sock; - /* Limit to first subflow */ + /* Limit to first subflow, before the connection establishment */ sock = __mptcp_nmpc_socket(msk); if (!sock) return -EINVAL; - return tcp_setsockopt(sock->sk, SOL_TCP, TCP_FASTOPEN_CONNECT, optval, optlen); + return tcp_setsockopt(sock->sk, level, optname, optval, optlen); } static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, @@ -809,9 +798,13 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_NODELAY: return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: - return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); + /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ + mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); + return 0; case TCP_FASTOPEN_CONNECT: - return mptcp_setsockopt_sol_tcp_fastopen_connect(msk, optval, optlen); + case TCP_FASTOPEN_NO_COOKIE: + return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, + optval, optlen); } return -EOPNOTSUPP; @@ -1174,6 +1167,7 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_CC_INFO: case TCP_DEFER_ACCEPT: case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_NO_COOKIE: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); case TCP_INQ: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 02a54d59697b..437a283ba6ea 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1602,7 +1602,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) /* kernel sockets do not by default acquire net ref, but TCP timer * needs it. + * Update ns_tracker to current stack trace and refcounted tracker. */ + __netns_tracker_free(net, &sf->sk->ns_tracker, false); sf->sk->sk_net_refcnt = 1; get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a662e8a5ff84..f0c94d394ab1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -812,6 +812,17 @@ static int netlink_release(struct socket *sock) } sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + + /* Because struct net might disappear soon, do not keep a pointer. */ + if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { + __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); + /* Because of deferred_put_nlk_sk and use of work queue, + * it is possible netns will be freed before this socket. + */ + sock_net_set(sk, &init_net); + __netns_tracker_alloc(&init_net, &sk->ns_tracker, + false, GFP_KERNEL); + } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4a07ab094a84..ead5418c126e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2309,7 +2309,7 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size) WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); + sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ce8dd19f33c..8c5b3da0c29f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3277,7 +3277,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality @@ -3288,8 +3288,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } @@ -3561,11 +3561,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); diff --git a/net/rds/message.c b/net/rds/message.c index 44dbc612ef54..b47e4f0a1639 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -366,7 +366,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * struct scatterlist *sg; int ret = 0; int length = iov_iter_count(from); - int total_copied = 0; struct rds_msg_zcopy_info *info; rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); @@ -404,7 +403,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * ret = -EFAULT; goto err; } - total_copied += copied; length -= copied; sg_set_page(sg, pages, copied, start); rm->data.op_nents++; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 4444fd82b66d..c5b86066ff66 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -503,6 +503,9 @@ bool rds_tcp_tune(struct socket *sock) release_sock(sk); return false; } + /* Update ns_tracker to current stack trace and refcounted tracker */ + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 7f598784fd30..1710780c908a 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -148,6 +148,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, } if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { + if (is_tcf_skbedit_ingress(act_flags) && + !(act_flags & TCA_ACT_FLAGS_SKIP_SW)) { + NL_SET_ERR_MSG_MOD(extack, "\"queue_mapping\" option on receive side is hardware only, use skip_sw"); + return -EOPNOTSUPP; + } flags |= SKBEDIT_F_QUEUE_MAPPING; queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } @@ -374,9 +379,12 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data } else if (is_tcf_skbedit_priority(act)) { entry->id = FLOW_ACTION_PRIORITY; entry->priority = tcf_skbedit_priority(act); - } else if (is_tcf_skbedit_queue_mapping(act)) { - NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used"); + } else if (is_tcf_skbedit_tx_queue_mapping(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used on transmit side"); return -EOPNOTSUPP; + } else if (is_tcf_skbedit_rx_queue_mapping(act)) { + entry->id = FLOW_ACTION_RX_QUEUE_MAPPING; + entry->rx_queue = tcf_skbedit_rx_queue_mapping(act); } else if (is_tcf_skbedit_inheritdsfield(act)) { NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used"); return -EOPNOTSUPP; @@ -394,6 +402,8 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data fl_action->id = FLOW_ACTION_PTYPE; else if (is_tcf_skbedit_priority(act)) fl_action->id = FLOW_ACTION_PRIORITY; + else if (is_tcf_skbedit_rx_queue_mapping(act)) + fl_action->id = FLOW_ACTION_RX_QUEUE_MAPPING; else return -EOPNOTSUPP; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 50566db45949..23d1cfa4f58c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1953,6 +1953,11 @@ static void tfilter_put(struct tcf_proto *tp, void *fh) tp->ops->put(tp, fh); } +static bool is_qdisc_ingress(__u32 classid) +{ + return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS)); +} + static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -2144,6 +2149,8 @@ replay: flags |= TCA_ACT_FLAGS_REPLACE; if (!rtnl_held) flags |= TCA_ACT_FLAGS_NO_RTNL; + if (is_qdisc_ingress(parent)) + flags |= TCA_ACT_FLAGS_AT_INGRESS; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack); if (err == 0) { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 3460abceba44..63ba5551c13f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -226,8 +226,7 @@ static struct sctp_association *sctp_association_init( /* Create an output queue. */ sctp_outq_init(asoc, &asoc->outqueue); - if (!sctp_ulpq_init(&asoc->ulpq, asoc)) - goto fail_init; + sctp_ulpq_init(&asoc->ulpq, asoc); if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) goto stream_free; @@ -277,7 +276,6 @@ static struct sctp_association *sctp_association_init( stream_free: sctp_stream_free(&asoc->stream); -fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); return NULL; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 83628c347744..3e83963d1b8a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5098,13 +5098,17 @@ static void sctp_destroy_sock(struct sock *sk) } /* Triggered when there are no references on the socket anymore */ -static void sctp_destruct_sock(struct sock *sk) +static void sctp_destruct_common(struct sock *sk) { struct sctp_sock *sp = sctp_sk(sk); /* Free up the HMAC transform. */ crypto_free_shash(sp->hmac); +} +static void sctp_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); inet_sock_destruct(sk); } @@ -9427,7 +9431,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, sctp_sk(newsk)->reuse = sp->reuse; newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = sctp_destruct_sock; + newsk->sk_destruct = sk->sk_destruct; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; @@ -9662,11 +9666,20 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) -#include <net/transp_v6.h> -static void sctp_v6_destroy_sock(struct sock *sk) +static void sctp_v6_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +static int sctp_v6_init_sock(struct sock *sk) { - sctp_destroy_sock(sk); - inet6_destroy_sock(sk); + int ret = sctp_init_sock(sk); + + if (!ret) + sk->sk_destruct = sctp_v6_destruct_sock; + + return ret; } struct proto sctpv6_prot = { @@ -9676,8 +9689,8 @@ struct proto sctpv6_prot = { .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, - .init = sctp_init_sock, - .destroy = sctp_v6_destroy_sock, + .init = sctp_v6_init_sock, + .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index bb22b71df7a3..94727feb07b3 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -490,11 +490,8 @@ static int sctp_enqueue_event(struct sctp_ulpq *ulpq, if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe)) goto out_free; - if (skb_list) - skb_queue_splice_tail_init(skb_list, - &sk->sk_receive_queue); - else - __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_splice_tail_init(skb_list, + &sk->sk_receive_queue); if (!sp->data_ready_signalled) { sp->data_ready_signalled = 1; @@ -504,10 +501,7 @@ static int sctp_enqueue_event(struct sctp_ulpq *ulpq, return 1; out_free: - if (skb_list) - sctp_queue_purge_ulpevents(skb_list); - else - sctp_ulpevent_free(event); + sctp_queue_purge_ulpevents(skb_list); return 0; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 0a8510a0c5e6..b05daafd369a 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -38,8 +38,7 @@ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); /* 1st Level Abstractions */ /* Initialize a ULP queue from a block of memory. */ -struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, - struct sctp_association *asoc) +void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc) { memset(ulpq, 0, sizeof(struct sctp_ulpq)); @@ -48,8 +47,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, skb_queue_head_init(&ulpq->reasm_uo); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; - - return ulpq; } @@ -259,10 +256,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list) return 1; out_free: - if (skb_list) - sctp_queue_purge_ulpevents(skb_list); - else - sctp_ulpevent_free(event); + sctp_queue_purge_ulpevents(skb_list); return 0; } diff --git a/net/socket.c b/net/socket.c index 00da9ce3dba0..55c5d536e5f6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2199,13 +2199,7 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, static bool sock_use_custom_sol_socket(const struct socket *sock) { - const struct sock *sk = sock->sk; - - /* Use sock->ops->setsockopt() for MPTCP */ - return IS_ENABLED(CONFIG_MPTCP) && - sk->sk_protocol == IPPROTO_MPTCP && - sk->sk_type == SOCK_STREAM && - (sk->sk_family == AF_INET || sk->sk_family == AF_INET6); + return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); } /* |