summaryrefslogtreecommitdiff
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c92
1 files changed, 53 insertions, 39 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1fe794967211..a2705d454fd6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -75,8 +75,8 @@
#include <net/checksum.h>
#include <net/gso.h>
#include <net/inetpeer.h>
-#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
+#include <net/inet_dscp.h>
#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
@@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
@@ -475,26 +475,18 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
goto packet_routed;
/* Make sure we can route this packet. */
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
if (!rt) {
- __be32 daddr;
+ inet_sk_init_flowi4(inet, fl4);
- /* Use correct destination address if we have options. */
- daddr = inet->inet_daddr;
- if (inet_opt && inet_opt->opt.srr)
- daddr = inet_opt->opt.faddr;
+ /* sctp_v4_xmit() uses its own DSCP value */
+ fl4->flowi4_tos = tos & INET_DSCP_MASK;
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
* itself out.
*/
- rt = ip_route_output_ports(net, fl4, sk,
- daddr, inet->inet_saddr,
- inet->inet_dport,
- inet->inet_sport,
- sk->sk_protocol,
- RT_TOS(tos),
- sk->sk_bound_dev_if);
+ rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
sk_setup_caps(sk, &rt->dst);
@@ -764,7 +756,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
{
struct iphdr *iph;
struct sk_buff *skb2;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
@@ -856,7 +848,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
}
}
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);
if (!err)
@@ -912,7 +904,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, skb2);
if (err)
goto fail;
@@ -971,8 +963,8 @@ static int __ip_append_data(struct sock *sk,
bool zc = false;
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
- struct rtable *rt = (struct rtable *)cork->dst;
- bool paged, hold_tskey, extra_uref = false;
+ struct rtable *rt = dst_rtable(cork->dst);
+ bool paged, hold_tskey = false, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
@@ -1022,7 +1014,8 @@ static int __ip_append_data(struct sock *sk,
uarg = msg->msg_ubuf;
}
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
- uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb),
+ false);
if (!uarg)
return -ENOBUFS;
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
@@ -1048,10 +1041,15 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
- hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
- if (hold_tskey)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (cork->flags & IPCORK_TS_OPT_ID) {
+ tskey = cork->ts_opt_id;
+ } else {
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ hold_tskey = true;
+ }
+ }
/* So, what's going on in the loop below?
*
@@ -1163,7 +1161,10 @@ alloc_new_skb:
/* [!] NOTE: copy will be negative if pagedlen>0
* because then the equation reduces to -fraggap.
*/
- if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+ if (copy > 0 &&
+ INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, data + transhdrlen, offset,
+ copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
@@ -1207,8 +1208,9 @@ alloc_new_skb:
unsigned int off;
off = skb->len;
- if (getfrag(from, skb_put(skb, copy),
- offset, copy, off, skb) < 0) {
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
@@ -1246,7 +1248,8 @@ alloc_new_skb:
get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (getfrag(from,
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from,
page_address(pfrag->page) + pfrag->offset,
offset, copy, skb->len, skb) < 0)
goto error_efault;
@@ -1322,10 +1325,14 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->ttl = ipc->ttl;
cork->tos = ipc->tos;
cork->mark = ipc->sockc.mark;
- cork->priority = ipc->priority;
+ cork->priority = ipc->sockc.priority;
cork->transmit_time = ipc->sockc.transmit_time;
cork->tx_flags = 0;
- sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
+ sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags);
+ if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
+ cork->flags |= IPCORK_TS_OPT_ID;
+ cork->ts_opt_id = ipc->sockc.ts_opt_id;
+ }
return 0;
}
@@ -1390,7 +1397,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
struct iphdr *iph;
u8 pmtudisc, ttl;
__be16 df = 0;
@@ -1455,9 +1462,12 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_options_build(skb, opt, cork->addr, rt);
}
- skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
+ skb->priority = cork->priority;
skb->mark = cork->mark;
- skb->tstamp = cork->transmit_time;
+ if (sk_is_tcp(sk))
+ skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
+ else
+ skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid);
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
* on dst refcount
@@ -1473,7 +1483,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* by icmp_hdr(skb)->type.
*/
if (sk->sk_type == SOCK_RAW &&
- !inet_test_bit(HDRINCL, sk))
+ !(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH))
icmp_type = fl4->fl4_icmp_type;
else
icmp_type = icmp_hdr(skb)->type;
@@ -1583,7 +1593,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
* Generic function to send a packet as reply to another packet.
* Used to send some TCP resets/acks so far.
*/
-void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
+ struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
@@ -1618,7 +1629,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
- RT_TOS(arg->tos),
+ arg->tos & INET_DSCP_MASK,
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
daddr, saddr,
@@ -1629,7 +1640,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
if (IS_ERR(rt))
return;
- inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
+ inet_sk(sk)->tos = arg->tos;
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
@@ -1649,7 +1660,10 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
- nskb->mono_delivery_time = !!transmit_time;
+ if (orig_sk)
+ skb_set_owner_edemux(nskb, (struct sock *)orig_sk);
+ if (transmit_time)
+ nskb->tstamp_type = SKB_CLOCK_MONOTONIC;
if (txhash)
skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
ip_push_pending_frames(sk, &fl4);