summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/arp.c48
-rw-r--r--net/ipv4/esp4.c5
-rw-r--r--net/ipv4/fib_frontend.c15
-rw-r--r--net/ipv4/fib_semantics.c17
-rw-r--r--net/ipv4/fib_trie.c26
-rw-r--r--net/ipv4/icmp.c8
-rw-r--r--net/ipv4/igmp.c22
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_tunnel.c6
-rw-r--r--net/ipv4/ipmr.c52
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/tcp.c19
-rw-r--r--net/ipv4/tcp_cong.c1
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udp_impl.h1
17 files changed, 172 insertions, 78 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f3dad1661343..58925b6597de 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1043,7 +1043,7 @@ static struct inet_protosw inetsw_array[] =
.type = SOCK_DGRAM,
.protocol = IPPROTO_ICMP,
.prot = &ping_prot,
- .ops = &inet_dgram_ops,
+ .ops = &inet_sockraw_ops,
.flags = INET_PROTOSW_REUSE,
},
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0937b34c27ca..e9f3386a528b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -641,6 +641,32 @@ void arp_xmit(struct sk_buff *skb)
}
EXPORT_SYMBOL(arp_xmit);
+static bool arp_is_garp(struct net *net, struct net_device *dev,
+ int *addr_type, __be16 ar_op,
+ __be32 sip, __be32 tip,
+ unsigned char *sha, unsigned char *tha)
+{
+ bool is_garp = tip == sip;
+
+ /* Gratuitous ARP _replies_ also require target hwaddr to be
+ * the same as source.
+ */
+ if (is_garp && ar_op == htons(ARPOP_REPLY))
+ is_garp =
+ /* IPv4 over IEEE 1394 doesn't provide target
+ * hardware address field in its ARP payload.
+ */
+ tha &&
+ !memcmp(tha, sha, dev->addr_len);
+
+ if (is_garp) {
+ *addr_type = inet_addr_type_dev_table(net, dev, sip);
+ if (*addr_type != RTN_UNICAST)
+ is_garp = false;
+ }
+ return is_garp;
+}
+
/*
* Process an arp request.
*/
@@ -653,6 +679,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
unsigned char *arp_ptr;
struct rtable *rt;
unsigned char *sha;
+ unsigned char *tha = NULL;
__be32 sip, tip;
u16 dev_type = dev->type;
int addr_type;
@@ -724,6 +751,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
break;
#endif
default:
+ tha = arp_ptr;
arp_ptr += dev->addr_len;
}
memcpy(&tip, arp_ptr, 4);
@@ -835,19 +863,25 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
- if (IN_DEV_ARP_ACCEPT(in_dev)) {
- unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
+ addr_type = -1;
+ if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
+ is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
+ sip, tip, sha, tha);
+ }
+ if (IN_DEV_ARP_ACCEPT(in_dev)) {
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
*/
- is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
- addr_type == RTN_UNICAST;
-
if (!n &&
- ((arp->ar_op == htons(ARPOP_REPLY) &&
- addr_type == RTN_UNICAST) || is_garp))
+ (is_garp ||
+ (arp->ar_op == htons(ARPOP_REPLY) &&
+ (addr_type == RTN_UNICAST ||
+ (addr_type < 0 &&
+ /* postpone calculation to as late as possible */
+ inet_addr_type_dev_table(net, dev, sip) ==
+ RTN_UNICAST)))))
n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 65cc02bd82bc..93322f895eab 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -248,6 +248,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
u8 *tail;
u8 *vaddr;
int nfrags;
+ int esph_offset;
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
@@ -313,11 +314,13 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
}
cow:
+ esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb);
+
nfrags = skb_cow_data(skb, tailen, &trailer);
if (nfrags < 0)
goto out;
tail = skb_tail_pointer(trailer);
- esp->esph = ip_esp_hdr(skb);
+ esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset);
skip_cow:
esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 39bd1edee676..83e3ed258467 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -763,7 +763,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
- int dumped = 0;
+ int dumped = 0, err;
if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
@@ -783,20 +783,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (dumped)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
- if (fib_table_dump(tb, skb, cb) < 0)
- goto out;
+ err = fib_table_dump(tb, skb, cb);
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
dumped = 1;
next:
e++;
}
}
out:
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
- return skb->len;
+ return err;
}
/* Prepare and feed intra-kernel routing request.
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da449ddb8cc1..ad9ad4aab5da 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -203,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
+ struct dst_metrics *m;
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
@@ -213,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
- if (fi->fib_metrics != (u32 *) dst_default_metrics)
- kfree(fi->fib_metrics);
+ m = fi->fib_metrics;
+ if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+ kfree(m);
kfree(fi);
}
@@ -971,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
val = 255;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
return -EINVAL;
- fi->fib_metrics[type - 1] = val;
+ fi->fib_metrics->metrics[type - 1] = val;
}
if (ecn_ca)
- fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+ fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
return 0;
}
@@ -1033,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
fib_info_cnt++;
if (cfg->fc_mx) {
- fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
if (!fi->fib_metrics)
goto failure;
+ atomic_set(&fi->fib_metrics->refcnt, 1);
} else
- fi->fib_metrics = (u32 *) dst_default_metrics;
+ fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
@@ -1238,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_priority &&
nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
goto nla_put_failure;
- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+ if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
goto nla_put_failure;
if (fi->fib_prefsrc &&
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1201409ba1dc..51182ff2b441 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1983,6 +1983,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
/* rcu_read_lock is hold by caller */
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ int err;
+
if (i < s_i) {
i++;
continue;
@@ -1993,17 +1995,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
continue;
}
- if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->tb_id,
- fa->fa_type,
- xkey,
- KEYLENGTH - fa->fa_slen,
- fa->fa_tos,
- fa->fa_info, NLM_F_MULTI) < 0) {
+ err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+ tb->tb_id, fa->fa_type,
+ xkey, KEYLENGTH - fa->fa_slen,
+ fa->fa_tos, fa->fa_info, NLM_F_MULTI);
+ if (err < 0) {
cb->args[4] = i;
- return -1;
+ return err;
}
i++;
}
@@ -2025,10 +2024,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
t_key key = cb->args[3];
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+ int err;
+
+ err = fn_trie_dump_leaf(l, tb, skb, cb);
+ if (err < 0) {
cb->args[3] = key;
cb->args[2] = count;
- return -1;
+ return err;
}
++count;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 43318b5f5647..9144fa7df2ad 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -657,8 +657,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
/* Needed by both icmp_global_allow and icmp_xmit_lock */
local_bh_disable();
- /* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!icmpv4_global_allow(net, type, code))
+ /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
+ * incoming dev is loopback. If outgoing dev change to not be
+ * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
+ */
+ if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
+ !icmpv4_global_allow(net, type, code))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 44fd86de2823..ec9a396fa466 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1112,6 +1112,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
if (!pmc)
return;
+ spin_lock_init(&pmc->lock);
spin_lock_bh(&im->lock);
pmc->interface = im->interface;
in_dev_hold(in_dev);
@@ -2071,21 +2072,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
static void ip_mc_clear_src(struct ip_mc_list *pmc)
{
- struct ip_sf_list *psf, *nextpsf;
+ struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
- for (psf = pmc->tomb; psf; psf = nextpsf) {
+ spin_lock_bh(&pmc->lock);
+ tomb = pmc->tomb;
+ pmc->tomb = NULL;
+ sources = pmc->sources;
+ pmc->sources = NULL;
+ pmc->sfmode = MCAST_EXCLUDE;
+ pmc->sfcount[MCAST_INCLUDE] = 0;
+ pmc->sfcount[MCAST_EXCLUDE] = 1;
+ spin_unlock_bh(&pmc->lock);
+
+ for (psf = tomb; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->tomb = NULL;
- for (psf = pmc->sources; psf; psf = nextpsf) {
+ for (psf = sources; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->sources = NULL;
- pmc->sfmode = MCAST_EXCLUDE;
- pmc->sfcount[MCAST_INCLUDE] = 0;
- pmc->sfcount[MCAST_EXCLUDE] = 1;
}
/* Join a multicast group
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7a3fd25e8913..532b36e9ce2a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+ if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+ (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index b878ecbc0608..129d1a3616f8 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -446,6 +446,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
return 0;
drop:
+ if (tun_dst)
+ dst_release((struct dst_entry *)tun_dst);
kfree_skb(skb);
return 0;
}
@@ -967,7 +969,6 @@ static void ip_tunnel_dev_free(struct net_device *dev)
gro_cells_destroy(&tunnel->gro_cells);
dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
- free_netdev(dev);
}
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
@@ -1155,7 +1156,8 @@ int ip_tunnel_init(struct net_device *dev)
struct iphdr *iph = &tunnel->parms.iph;
int err;
- dev->destructor = ip_tunnel_dev_free;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip_tunnel_dev_free;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3a02d52ed50e..8ae425cad818 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local);
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -501,7 +501,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
dev->features |= NETIF_F_NETNS_LOCAL;
}
@@ -988,7 +988,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
- ip_mr_forward(net, mrt, skb, c, 0);
+ ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
}
}
}
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
/* Queue a packet for resolution. It gets locked cache entry! */
static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
- struct sk_buff *skb)
+ struct sk_buff *skb, struct net_device *dev)
{
const struct iphdr *iph = ip_hdr(skb);
struct mfc_cache *c;
@@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
kfree_skb(skb);
err = -ENOBUFS;
} else {
+ if (dev) {
+ skb->dev = dev;
+ skb->skb_iif = dev->ifindex;
+ }
skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1828,10 +1832,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local)
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc_cache *cache, int local)
{
- int true_vifi = ipmr_find_vif(mrt, skb->dev);
+ int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
@@ -1853,13 +1857,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
}
/* Wrong interface: drop packet and (maybe) send PIM assert. */
- if (mrt->vif_table[vif].dev != skb->dev) {
- struct net_device *mdev;
-
- mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
- if (mdev == skb->dev)
- goto forward;
-
+ if (mrt->vif_table[vif].dev != dev) {
if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
* Very complicated situation...
@@ -1980,6 +1978,20 @@ int ip_mr_input(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
struct mr_table *mrt;
+ struct net_device *dev;
+
+ /* skb->dev passed in is the loX master dev for vrfs.
+ * As there are no vifs associated with loopback devices,
+ * get the proper interface that does have a vif associated with it.
+ */
+ dev = skb->dev;
+ if (netif_is_l3_master(skb->dev)) {
+ dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
+ if (!dev) {
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+ }
/* Packet is looped back after forward, it should not be
* forwarded second time, but still can be delivered locally.
@@ -2017,7 +2029,7 @@ int ip_mr_input(struct sk_buff *skb)
/* already under rcu_read_lock() */
cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
if (!cache) {
- int vif = ipmr_find_vif(mrt, skb->dev);
+ int vif = ipmr_find_vif(mrt, dev);
if (vif >= 0)
cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
@@ -2037,9 +2049,9 @@ int ip_mr_input(struct sk_buff *skb)
}
read_lock(&mrt_lock);
- vif = ipmr_find_vif(mrt, skb->dev);
+ vif = ipmr_find_vif(mrt, dev);
if (vif >= 0) {
- int err2 = ipmr_cache_unresolved(mrt, vif, skb);
+ int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
read_unlock(&mrt_lock);
return err2;
@@ -2050,7 +2062,7 @@ int ip_mr_input(struct sk_buff *skb)
}
read_lock(&mrt_lock);
- ip_mr_forward(net, mrt, skb, cache, local);
+ ip_mr_forward(net, mrt, dev, skb, cache, local);
read_unlock(&mrt_lock);
if (local)
@@ -2224,7 +2236,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
iph->saddr = saddr;
iph->daddr = daddr;
iph->version = 0;
- err = ipmr_cache_unresolved(mrt, vif, skb2);
+ err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9eebe43e..6883b3d4ba8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1385,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
+ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *) dst;
+ if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ kfree(p);
+
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;
@@ -1438,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+ if (fi->fib_metrics != &dst_default_metrics) {
+ rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+ atomic_inc(&fi->fib_metrics->refcnt);
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e4c76d2b827..40aca7803cf2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
+ struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+ if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+ uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
if (tp->fastopen_req)
return -EALREADY; /* Another Fast Open is in progress */
@@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
}
}
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
- err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+ err = __inet_stream_connect(sk->sk_socket, uaddr,
msg->msg_namelen, flags, 1);
/* fastopen_req could already be freed in __inet_stream_connect
* if the connection times out or gets rst
@@ -2320,9 +2323,15 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
+ /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+ * issue in __tcp_select_window()
+ */
+ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
+ dst_release(sk->sk_rx_dst);
+ sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
/* Clean up fastopen related fields */
@@ -2374,9 +2383,10 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l
return 0;
}
-static int tcp_repair_options_est(struct tcp_sock *tp,
+static int tcp_repair_options_est(struct sock *sk,
struct tcp_repair_opt __user *optbuf, unsigned int len)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct tcp_repair_opt opt;
while (len >= sizeof(opt)) {
@@ -2389,6 +2399,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
switch (opt.opt_code) {
case TCPOPT_MSS:
tp->rx_opt.mss_clamp = opt.opt_val;
+ tcp_mtup_init(sk);
break;
case TCPOPT_WINDOW:
{
@@ -2548,7 +2559,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (!tp->repair)
err = -EINVAL;
else if (sk->sk_state == TCP_ESTABLISHED)
- err = tcp_repair_options_est(tp,
+ err = tcp_repair_options_est(sk,
(struct tcp_repair_opt __user *)optval,
optlen);
else
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6e3c512054a6..324c9bcc5456 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -180,6 +180,7 @@ void tcp_init_congestion_control(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ tcp_sk(sk)->prior_ssthresh = 0;
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5a3ad09e2786..174d4376baa5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1179,13 +1179,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
*/
if (pkt_len > mss) {
unsigned int new_len = (pkt_len / mss) * mss;
- if (!in_sack && new_len < pkt_len) {
+ if (!in_sack && new_len < pkt_len)
new_len += mss;
- if (new_len >= skb->len)
- return 0;
- }
pkt_len = new_len;
}
+
+ if (pkt_len >= skb->len && !in_sack)
+ return 0;
+
err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
@@ -3189,7 +3190,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets)
+ if (reord < prior_fackets && reord <= tp->fackets_out)
tcp_update_reordering(sk, tp->fackets_out - reord, 0);
delta = tcp_is_fack(tp) ? pkts_acked :
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ea6e4cff9faf..1d6219bf2d6b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1612,7 +1612,7 @@ static void udp_v4_rehash(struct sock *sk)
udp_lib_rehash(sk, new_hash);
}
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
@@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(udp_encap_enable);
* Note that in the success and error cases, the skb is assumed to
* have either been requeued or freed.
*/
-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index feb50a16398d..a8cf8c6fb60c 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
-int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS