diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-01-27 19:46:13 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-01-27 19:46:14 -0800 |
commit | e7d786331c62f260fa5174ff6dde788181f3bf6b (patch) | |
tree | 48e95b0161655f432a0d3b3172130bfb9090cec8 | |
parent | d344e55356c2ce883a764bb2324fd635c5af3f50 (diff) | |
parent | 31ed2261e88fbd1eb62fc870ef2c6b2cf2951336 (diff) |
Merge branch 'udp-ipv6-optimisations'
Pavel Begunkov says:
====================
udp/ipv6 optimisations
Shed some weight from udp/ipv6. Zerocopy benchmarks over dummy showed
~5% tx/s improvement, should be similar for small payload non-zc
cases.
The performance comes from killing 4 atomics and a couple of big struct
memcpy/memset. 1/10 removes a pair of atomics on dst refcounting for
cork->skb setup, 9/10 saves another pair on cork init. 5/10 and 8/10
kill extra 88B memset and memcpy respectively.
====================
Link: https://lore.kernel.org/r/cover.1643243772.git.asml.silence@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/net/ipv6.h | 14 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 8 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 99 | ||||
-rw-r--r-- | net/ipv6/udp.c | 103 |
4 files changed, 118 insertions, 106 deletions
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3afcb128e064..082f30256f59 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -437,8 +437,16 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, struct ipv6_opt_hdr *newopt); -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt); +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt); + +static inline struct ipv6_txoptions * +ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) +{ + if (!opt) + return NULL; + return __ipv6_fixup_options(opt_space, opt); +} bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt); @@ -1020,7 +1028,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 77e34aec7e82..658d5eabaf7e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -1344,14 +1344,14 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return opt2; } -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt) +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt) { /* * ignore the dest before srcrt unless srcrt is being included. * --yoshfuji */ - if (opt && opt->dst0opt && !opt->srcrt) { + if (opt->dst0opt && !opt->srcrt) { if (opt_space != opt) { memcpy(opt_space, opt, sizeof(*opt_space)); opt = opt_space; @@ -1362,7 +1362,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, return opt; } -EXPORT_SYMBOL_GPL(ipv6_fixup_options); +EXPORT_SYMBOL_GPL(__ipv6_fixup_options); /** * fl6_update_dst - update flowi destination address with info given diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2995f8d89e7e..0c6c971ce0a5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1350,11 +1350,16 @@ static void ip6_append_data_mtu(unsigned int *mtu, static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, - struct rt6_info *rt, struct flowi6 *fl6) + struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu; - struct ipv6_txoptions *opt = ipc6->opt; + struct ipv6_txoptions *nopt, *opt = ipc6->opt; + + /* callers pass dst together with a reference, set it first so + * ip6_cork_release() can put it down even in case of an error. + */ + cork->base.dst = &rt->dst; /* * setup for corking @@ -1363,39 +1368,32 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (WARN_ON(v6_cork->opt)) return -EINVAL; - v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); - if (unlikely(!v6_cork->opt)) + nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); + if (unlikely(!nopt)) return -ENOBUFS; - v6_cork->opt->tot_len = sizeof(*opt); - v6_cork->opt->opt_flen = opt->opt_flen; - v6_cork->opt->opt_nflen = opt->opt_nflen; + nopt->tot_len = sizeof(*opt); + nopt->opt_flen = opt->opt_flen; + nopt->opt_nflen = opt->opt_nflen; - v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, - sk->sk_allocation); - if (opt->dst0opt && !v6_cork->opt->dst0opt) + nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); + if (opt->dst0opt && !nopt->dst0opt) return -ENOBUFS; - v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, - sk->sk_allocation); - if (opt->dst1opt && !v6_cork->opt->dst1opt) + nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); + if (opt->dst1opt && !nopt->dst1opt) return -ENOBUFS; - v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, - sk->sk_allocation); - if (opt->hopopt && !v6_cork->opt->hopopt) + nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); + if (opt->hopopt && !nopt->hopopt) return -ENOBUFS; - v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, - sk->sk_allocation); - if (opt->srcrt && !v6_cork->opt->srcrt) + nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); + if (opt->srcrt && !nopt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } - dst_hold(&rt->dst); - cork->base.dst = &rt->dst; - cork->fl.u.ip6 = *fl6; v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) @@ -1426,9 +1424,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, } static int __ip6_append_data(struct sock *sk, - struct flowi6 *fl6, struct sk_buff_head *queue, - struct inet_cork *cork, + struct inet_cork_full *cork_full, struct inet6_cork *v6_cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, @@ -1437,6 +1434,8 @@ static int __ip6_append_data(struct sock *sk, unsigned int flags, struct ipcm6_cookie *ipc6) { struct sk_buff *skb, *skb_prev = NULL; + struct inet_cork *cork = &cork_full->base; + struct flowi6 *fl6 = &cork_full->fl.u.ip6; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; struct ubuf_info *uarg = NULL; int exthdrlen = 0; @@ -1788,34 +1787,46 @@ int ip6_append_data(struct sock *sk, /* * setup for corking */ + dst_hold(&rt->dst); err = ip6_setup_cork(sk, &inet->cork, &np->cork, - ipc6, rt, fl6); + ipc6, rt); if (err) return err; + inet->cork.fl.u.ip6 = *fl6; exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { - fl6 = &inet->cork.fl.u.ip6; transhdrlen = 0; } - return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, + return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, from, length, transhdrlen, flags, ipc6); } EXPORT_SYMBOL_GPL(ip6_append_data); +static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) +{ + struct dst_entry *dst = cork->base.dst; + + cork->base.dst = NULL; + cork->base.flags &= ~IPCORK_ALLFRAG; + skb_dst_set(skb, dst); +} + static void ip6_cork_release(struct inet_cork_full *cork, struct inet6_cork *v6_cork) { if (v6_cork->opt) { - kfree(v6_cork->opt->dst0opt); - kfree(v6_cork->opt->dst1opt); - kfree(v6_cork->opt->hopopt); - kfree(v6_cork->opt->srcrt); - kfree(v6_cork->opt); + struct ipv6_txoptions *opt = v6_cork->opt; + + kfree(opt->dst0opt); + kfree(opt->dst1opt); + kfree(opt->hopopt); + kfree(opt->srcrt); + kfree(opt); v6_cork->opt = NULL; } @@ -1824,7 +1835,6 @@ static void ip6_cork_release(struct inet_cork_full *cork, cork->base.dst = NULL; cork->base.flags &= ~IPCORK_ALLFRAG; } - memset(&cork->fl, 0, sizeof(cork->fl)); } struct sk_buff *__ip6_make_skb(struct sock *sk, @@ -1834,7 +1844,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; - struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; + struct in6_addr *final_dst; struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; @@ -1864,9 +1874,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, /* Allow local fragmentation. */ skb->ignore_df = ip6_sk_ignore_df(sk); - - *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); + + final_dst = &fl6->daddr; if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) @@ -1886,10 +1896,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = sk->sk_priority; skb->mark = cork->base.mark; - skb->tstamp = cork->base.transmit_time; - skb_dst_set(skb, dst_clone(&rt->dst)); + ip6_cork_steal_dst(skb, cork); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -1961,26 +1970,26 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, - struct inet_cork_full *cork) + struct ipcm6_cookie *ipc6, struct rt6_info *rt, + unsigned int flags, struct inet_cork_full *cork) { struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); int err; - if (flags & MSG_PROBE) + if (flags & MSG_PROBE) { + dst_release(&rt->dst); return NULL; + } __skb_queue_head_init(&queue); cork->base.flags = 0; cork->base.addr = 0; cork->base.opt = NULL; - cork->base.dst = NULL; v6_cork.opt = NULL; - err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); + err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); if (err) { ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); @@ -1988,7 +1997,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; - err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, + err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags, ipc6); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 528b81ef19c9..c6872596b408 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1266,23 +1266,17 @@ static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udp_sock *up = udp_sk(sk); - struct flowi6 fl6; int err = 0; if (up->pending == AF_INET) return udp_push_pending_frames(sk); - /* ip6_finish_skb will release the cork, so make a copy of - * fl6 here. - */ - fl6 = inet_sk(sk)->cork.fl.u.ip6; - skb = ip6_finish_skb(sk); if (!skb) goto out; - err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base); - + err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6, + &inet_sk(sk)->cork.base); out: up->len = 0; up->pending = 0; @@ -1300,7 +1294,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi6 fl6; + struct inet_cork_full cork; + struct flowi6 *fl6 = &cork.fl.u.ip6; struct dst_entry *dst; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; @@ -1363,9 +1358,6 @@ do_udp_sendmsg: } } - if (up->pending == AF_INET) - return udp_sendmsg(sk, msg, len); - /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ @@ -1374,6 +1366,8 @@ do_udp_sendmsg: getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { + if (up->pending == AF_INET) + return udp_sendmsg(sk, msg, len); /* * There are pending frames. * The socket lock must be held while it's corked. @@ -1391,19 +1385,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl6, 0, sizeof(fl6)); + memset(fl6, 0, sizeof(*fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl6.fl6_dport = sin6->sin6_port; + fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1420,24 +1414,24 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) - fl6.flowi6_oif = sin6->sin6_scope_id; + fl6->flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl6.fl6_dport = inet->inet_dport; + fl6->fl6_dport = inet->inet_dport; daddr = &sk->sk_v6_daddr; - fl6.flowlabel = np->flow_label; + fl6->flowlabel = np->flow_label; connected = true; } - if (!fl6.flowi6_oif) - fl6.flowi6_oif = sk->sk_bound_dev_if; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = sk->sk_bound_dev_if; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; @@ -1447,14 +1441,14 @@ do_udp_sendmsg: err = udp_cmsg_send(sk, msg, &ipc6.gso_size); if (err > 0) - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1471,16 +1465,17 @@ do_udp_sendmsg: opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; - fl6.flowi6_proto = sk->sk_protocol; - fl6.flowi6_mark = ipc6.sockc.mark; - fl6.daddr = *daddr; - if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) - fl6.saddr = np->saddr; - fl6.fl6_sport = inet->inet_sport; + fl6->flowi6_proto = sk->sk_protocol; + fl6->flowi6_mark = ipc6.sockc.mark; + fl6->daddr = *daddr; + if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) + fl6->saddr = np->saddr; + fl6->fl6_sport = inet->inet_sport; if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, - (struct sockaddr *)sin6, &fl6.saddr); + (struct sockaddr *)sin6, + &fl6->saddr); if (err) goto out_no_dst; if (sin6) { @@ -1496,32 +1491,32 @@ do_udp_sendmsg: err = -EINVAL; goto out_no_dst; } - fl6.fl6_dport = sin6->sin6_port; - fl6.daddr = sin6->sin6_addr; + fl6->fl6_dport = sin6->sin6_port; + fl6->daddr = sin6->sin6_addr; } } - if (ipv6_addr_any(&fl6.daddr)) - fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6->daddr)) + fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(fl6, opt, &final); if (final_p) connected = false; - if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { - fl6.flowi6_oif = np->mcast_oif; + if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { + fl6->flowi6_oif = np->mcast_oif; connected = false; - } else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + } else if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected); + dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -1529,7 +1524,7 @@ do_udp_sendmsg: } if (ipc6.hlimit < 0) - ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -1537,17 +1532,17 @@ back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { - struct inet_cork_full cork; struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, - &fl6, (struct rt6_info *)dst, + (struct rt6_info *)dst, msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_v6_send_skb(skb, &fl6, &cork.base); - goto out; + err = udp_v6_send_skb(skb, fl6, &cork.base); + /* ip6_make_skb steals dst reference */ + goto out_no_dst; } lock_sock(sk); @@ -1568,7 +1563,7 @@ do_append_data: ipc6.dontfrag = np->dontfrag; up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, &fl6, (struct rt6_info *)dst, + &ipc6, fl6, (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); @@ -1603,7 +1598,7 @@ out_no_dst: do_confirm: if (msg->msg_flags & MSG_PROBE) - dst_confirm_neigh(dst, &fl6.daddr); + dst_confirm_neigh(dst, &fl6->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; |