diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 11 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 10 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 1 | ||||
-rw-r--r-- | net/core/filter.c | 10 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 32 | ||||
-rw-r--r-- | net/core/flow_offload.c | 48 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 2 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 3 | ||||
-rw-r--r-- | net/core/skmsg.c | 23 | ||||
-rw-r--r-- | net/core/sock.c | 6 | ||||
-rw-r--r-- | net/core/sock_map.c | 53 | ||||
-rw-r--r-- | net/core/sock_reuseport.c | 1 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 2 | ||||
-rw-r--r-- | net/core/xdp.c | 1 |
14 files changed, 140 insertions, 63 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 6bc2388141f6..7a774ebf64e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4192,10 +4192,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) local_bh_disable(); + dev_xmit_recursion_inc(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); + dev_xmit_recursion_dec(); local_bh_enable(); @@ -5599,7 +5601,7 @@ static void flush_backlog(struct work_struct *work) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + dev_kfree_skb_irq(skb); input_queue_head_incr(sd); } } @@ -9547,6 +9549,13 @@ int register_netdevice(struct net_device *dev) rcu_barrier(); dev->reg_state = NETREG_UNREGISTERED; + /* We should put the kobject that hold in + * netdev_unregister_kobject(), otherwise + * the net device cannot be freed when + * driver calls free_netdev(), because the + * kobject is being hold. + */ + kobject_put(&dev->dev.kobj); } /* * Prevent userspace races by waiting until the network diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6393ba930097..54cd568e7c2f 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -690,6 +690,15 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two + * reasons: + * 1) This is always called without any addr_list_lock, so as the + * outermost one here, it must be 0. + * 2) This is called by some callers after unlinking the upper device, + * so the dev->lower_level becomes 1 again. + * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or + * larger. + */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); @@ -911,6 +920,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 2ee7bc4c9e03..b09bebeadf0b 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1721,3 +1721,4 @@ module_exit(exit_net_drop_monitor); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); MODULE_ALIAS_GENL_FAMILY("NET_DM"); +MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts"); diff --git a/net/core/filter.c b/net/core/filter.c index 73395384afe2..82e1b5b06167 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5853,12 +5853,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; - if (skb->protocol == cpu_to_be16(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): iphdr_len = sizeof(struct iphdr); - else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) + break; + case cpu_to_be16(ETH_P_IPV6): iphdr_len = sizeof(struct ipv6hdr); - else + break; + default: return 0; + } if (skb_headlen(skb) < iphdr_len) return 0; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d02df0b6d0d9..142a8824f0a8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, EXPORT_SYMBOL(skb_flow_dissector_init); #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; - struct bpf_prog *attached; if (net == &init_net) { /* BPF flow dissector in the root namespace overrides @@ -86,26 +86,17 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) for_each_net(ns) { if (ns == &init_net) continue; - if (rcu_access_pointer(ns->bpf.progs[type])) + if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST; } } else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace. */ - if (rcu_access_pointer(init_net.bpf.progs[type])) + if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST; } - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (attached == prog) - /* The same program cannot be attached twice */ - return -EINVAL; - - rcu_assign_pointer(net->bpf.progs[type], prog); - if (attached) - bpf_prog_put(attached); return 0; } #endif /* CONFIG_BPF_SYSCALL */ @@ -903,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; @@ -960,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; + struct bpf_prog_array *run_array; rcu_read_lock(); - attached = rcu_dereference(init_net.bpf.progs[type]); - - if (!attached) - attached = rcu_dereference(net->bpf.progs[type]); + run_array = rcu_dereference(init_net.bpf.run_array[type]); + if (!run_array) + run_array = rcu_dereference(net->bpf.run_array[type]); - if (attached) { + if (run_array) { struct bpf_flow_keys flow_keys; struct bpf_flow_dissector ctx = { .flow_keys = &flow_keys, @@ -975,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net, .data_end = data + hlen, }; __be16 n_proto = proto; + struct bpf_prog *prog; if (skb) { ctx.skb = skb; @@ -985,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net, n_proto = skb->protocol; } - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + prog = READ_ONCE(run_array->items[0].prog); + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 0cfc35e6be28..2076219b8ba5 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -4,6 +4,7 @@ #include <net/flow_offload.h> #include <linux/rtnetlink.h> #include <linux/mutex.h> +#include <linux/rhashtable.h> struct flow_rule *flow_rule_alloc(unsigned int num_actions) { @@ -372,14 +373,15 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) } EXPORT_SYMBOL(flow_indr_dev_register); -static void __flow_block_indr_cleanup(flow_setup_cb_t *setup_cb, void *cb_priv, +static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), + void *cb_priv, struct list_head *cleanup_list) { struct flow_block_cb *this, *next; list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { - if (this->cb == setup_cb && - this->cb_priv == cb_priv) { + if (this->release == release && + this->indr.cb_priv == cb_priv) { list_move(&this->indr.list, cleanup_list); return; } @@ -397,7 +399,7 @@ static void flow_block_indr_notify(struct list_head *cleanup_list) } void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, - flow_setup_cb_t *setup_cb) + void (*release)(void *cb_priv)) { struct flow_indr_dev *this, *next, *indr_dev = NULL; LIST_HEAD(cleanup_list); @@ -418,7 +420,7 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, return; } - __flow_block_indr_cleanup(setup_cb, cb_priv, &cleanup_list); + __flow_block_indr_cleanup(release, cb_priv, &cleanup_list); mutex_unlock(&flow_indr_block_lock); flow_block_indr_notify(&cleanup_list); @@ -429,32 +431,37 @@ EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, struct net_device *dev, void *data, + void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { flow_block->indr.binder_type = bo->binder_type; flow_block->indr.data = data; + flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; flow_block->indr.cleanup = cleanup; } -static void __flow_block_indr_binding(struct flow_block_offload *bo, - struct net_device *dev, void *data, - void (*cleanup)(struct flow_block_cb *block_cb)) +struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, + void (*release)(void *cb_priv), + struct flow_block_offload *bo, + struct net_device *dev, void *data, + void *indr_cb_priv, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_block_cb *block_cb; - list_for_each_entry(block_cb, &bo->cb_list, list) { - switch (bo->command) { - case FLOW_BLOCK_BIND: - flow_block_indr_init(block_cb, bo, dev, data, cleanup); - list_add(&block_cb->indr.list, &flow_block_indr_list); - break; - case FLOW_BLOCK_UNBIND: - list_del(&block_cb->indr.list); - break; - } - } + block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, release); + if (IS_ERR(block_cb)) + goto out; + + flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup); + list_add(&block_cb->indr.list, &flow_block_indr_list); + +out: + return block_cb; } +EXPORT_SYMBOL(flow_indr_block_cb_alloc); int flow_indr_dev_setup_offload(struct net_device *dev, enum tc_setup_type type, void *data, @@ -465,9 +472,8 @@ int flow_indr_dev_setup_offload(struct net_device *dev, mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, this->cb_priv, type, bo); + this->cb(dev, this->cb_priv, type, bo, data, cleanup); - __flow_block_indr_binding(bo, dev, data, cleanup); mutex_unlock(&flow_indr_block_lock); return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e353b822bb15..7bd6440c63bf 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1108,7 +1108,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) trans_timeout = queue->trans_timeout; spin_unlock_irq(&queue->_xmit_lock); - return sprintf(buf, "%lu", trans_timeout); + return sprintf(buf, fmt_ulong, trans_timeout); } static unsigned int get_netdev_queue_index(struct netdev_queue *queue) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9aedc15736ad..85a4b0101f76 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3343,7 +3343,8 @@ replay: */ if (err < 0) { /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED) + if (dev->reg_state == NETREG_UNINITIALIZED || + dev->reg_state == NETREG_UNREGISTERED) free_netdev(dev); goto out; } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 351afbf6bfba..6a32a1fd34f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp) return container_of(parser, struct sk_psock, parser); } -static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) +static void sk_psock_skb_redirect(struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; @@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) } } -static void sk_psock_tls_verdict_apply(struct sk_psock *psock, - struct sk_buff *skb, int verdict) +static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) { switch (verdict) { case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_PASS: case __SK_DROP: @@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } + sk_psock_tls_verdict_apply(skb, ret); rcu_read_unlock(); - sk_psock_tls_verdict_apply(psock, skb, ret); return ret; } EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); @@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, } goto out_free; case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_DROP: /* fall-through */ @@ -782,11 +781,18 @@ out_free: static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { - struct sk_psock *psock = sk_psock_from_strp(strp); + struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; + struct sock *sk; rcu_read_lock(); + sk = strp->sk; + psock = sk_psock(sk); + if (unlikely(!psock)) { + kfree_skb(skb); + goto out; + } prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { skb_orphan(skb); @@ -794,8 +800,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } - rcu_read_unlock(); sk_psock_verdict_apply(psock, skb, ret); +out: + rcu_read_unlock(); } static int sk_psock_strp_read_done(struct strparser *strp, int err) diff --git a/net/core/sock.c b/net/core/sock.c index 6c4acf1f0220..2e5b7870e5d3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -718,7 +718,7 @@ bool sk_mc_loop(struct sock *sk) return inet6_sk(sk)->mc_loop; #endif } - WARN_ON(1); + WARN_ON_ONCE(1); return true; } EXPORT_SYMBOL(sk_mc_loop); @@ -1767,6 +1767,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); + sk_tx_queue_clear(sk); } return sk; @@ -1925,7 +1926,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - cgroup_sk_alloc(&newsk->sk_cgrp_data); + cgroup_sk_clone(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); @@ -1990,6 +1991,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); + sk_tx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4059f94e9bb5..0971f17e8e54 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -70,11 +70,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) struct fd f; int ret; + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, attr->attach_type); + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + fdput(f); + return ret; +} + +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) +{ + u32 ufd = attr->target_fd; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + int ret; + + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + prog = bpf_prog_get(attr->attach_bpf_fd); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto put_map; + } + + if (prog->type != ptype) { + ret = -EINVAL; + goto put_prog; + } + + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); +put_prog: + bpf_prog_put(prog); +put_map: fdput(f); return ret; } @@ -1203,27 +1241,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) } int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - u32 which) + struct bpf_prog *old, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - psock_set_prog(&progs->msg_parser, prog); + pprog = &progs->msg_parser; break; case BPF_SK_SKB_STREAM_PARSER: - psock_set_prog(&progs->skb_parser, prog); + pprog = &progs->skb_parser; break; case BPF_SK_SKB_STREAM_VERDICT: - psock_set_prog(&progs->skb_verdict, prog); + pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + if (old) + return psock_replace_prog(pprog, prog, old); + + psock_set_prog(pprog, prog); return 0; } diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index adcb3aea576d..bbdd3c7b6cb5 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -101,6 +101,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->prog = reuse->prog; more_reuse->reuseport_id = reuse->reuseport_id; more_reuse->bind_inany = reuse->bind_inany; + more_reuse->has_conns = reuse->has_conns; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f93f8ace6c56..6ada114bbcca 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || - (jit_enable == 2 && bpf_dump_raw_ok())) { + (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); diff --git a/net/core/xdp.c b/net/core/xdp.c index 90f44f382115..3c45f99e26d5 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) xdpf->len = totsize - metasize; xdpf->headroom = 0; xdpf->metasize = metasize; + xdpf->frame_sz = PAGE_SIZE; xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; xsk_buff_free(xdp); |