summaryrefslogtreecommitdiff
path: root/net/ipv4/ip_fragment.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r--net/ipv4/ip_fragment.c112
1 files changed, 59 insertions, 53 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cfeb8890f94e..f7012479713b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -76,21 +76,27 @@ static u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
- struct sk_buff *prev_tail, struct net_device *dev);
+ struct sk_buff *prev_tail, struct net_device *dev,
+ int *refs);
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
- struct net *net = q->fqdir->net;
-
const struct frag_v4_compare_key *key = a;
+ struct net *net = q->fqdir->net;
+ struct inet_peer *p = NULL;
q->key.v4 = *key;
qp->ecn = 0;
- qp->peer = q->fqdir->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
- NULL;
+ if (q->fqdir->max_dist) {
+ rcu_read_lock();
+ p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif);
+ if (p && !refcount_inc_not_zero(&p->refcnt))
+ p = NULL;
+ rcu_read_unlock();
+ }
+ qp->peer = p;
}
static void ip4_frag_free(struct inet_frag_queue *q)
@@ -102,22 +108,6 @@ static void ip4_frag_free(struct inet_frag_queue *q)
inet_putpeer(qp->peer);
}
-
-/* Destruction primitives. */
-
-static void ipq_put(struct ipq *ipq)
-{
- inet_frag_put(&ipq->q);
-}
-
-/* Kill ipq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static void ipq_kill(struct ipq *ipq)
-{
- inet_frag_kill(&ipq->q);
-}
-
static bool frag_expire_skip_icmp(u32 user)
{
return user == IP_DEFRAG_AF_PACKET ||
@@ -132,19 +122,21 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(struct timer_list *t)
{
- struct inet_frag_queue *frag = from_timer(frag, t, timer);
+ enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
+ struct inet_frag_queue *frag = timer_container_of(frag, t, timer);
const struct iphdr *iph;
struct sk_buff *head = NULL;
struct net *net;
struct ipq *qp;
- int err;
+ int refs = 1;
qp = container_of(frag, struct ipq, q);
net = qp->q.fqdir->net;
rcu_read_lock();
- if (qp->q.fqdir->dead)
+ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(qp->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&qp->q.lock);
@@ -152,7 +144,8 @@ static void ip_expire(struct timer_list *t)
if (qp->q.flags & INET_FRAG_COMPLETE)
goto out;
- ipq_kill(qp);
+ qp->q.flags |= INET_FRAG_DROP;
+ inet_frag_kill(&qp->q, &refs);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
@@ -173,14 +166,15 @@ static void ip_expire(struct timer_list *t)
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
- iph->tos, head->dev);
- if (err)
+ reason = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), head->dev);
+ if (reason)
goto out;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
+ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
goto out;
@@ -193,8 +187,8 @@ out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- kfree_skb(head);
- ipq_put(qp);
+ kfree_skb_reason(head, reason);
+ inet_frag_putn(&qp->q, refs);
}
/* Find the correct entry in the "incomplete datagrams" queue for
@@ -253,7 +247,8 @@ static int ip_frag_reinit(struct ipq *qp)
return -ETIMEDOUT;
}
- sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
+ sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments,
+ SKB_DROP_REASON_FRAG_TOO_FAR);
sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
qp->q.flags = 0;
@@ -269,7 +264,7 @@ static int ip_frag_reinit(struct ipq *qp)
}
/* Add new segment to existing queue. */
-static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb, int *refs)
{
struct net *net = qp->q.fqdir->net;
int ihl, end, flags, offset;
@@ -277,15 +272,19 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
struct net_device *dev;
unsigned int fragsize;
int err = -ENOENT;
+ SKB_DR(reason);
u8 ecn;
- if (qp->q.flags & INET_FRAG_COMPLETE)
+ /* If reassembly is already done, @skb must be a duplicate frag. */
+ if (qp->q.flags & INET_FRAG_COMPLETE) {
+ SKB_DR_SET(reason, DUP_FRAG);
goto err;
+ }
if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
unlikely(ip_frag_too_far(qp)) &&
unlikely(err = ip_frag_reinit(qp))) {
- ipq_kill(qp);
+ inet_frag_kill(&qp->q, refs);
goto err;
}
@@ -348,6 +347,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->iif = dev->ifindex;
qp->q.stamp = skb->tstamp;
+ qp->q.tstamp_type = skb->tstamp_type;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
@@ -368,28 +368,30 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- err = ip_frag_reasm(qp, skb, prev_tail, dev);
+ err = ip_frag_reasm(qp, skb, prev_tail, dev, refs);
skb->_skb_refdst = orefdst;
if (err)
- inet_frag_kill(&qp->q);
+ inet_frag_kill(&qp->q, refs);
return err;
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
if (err == IPFRAG_DUP) {
- kfree_skb(skb);
- return -EINVAL;
+ SKB_DR_SET(reason, DUP_FRAG);
+ err = -EINVAL;
+ goto err;
}
err = -EINVAL;
__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
discard_qp:
- inet_frag_kill(&qp->q);
+ inet_frag_kill(&qp->q, refs);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
err:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return err;
}
@@ -400,7 +402,8 @@ static bool ip_frag_coalesce_ok(const struct ipq *qp)
/* Build a new IP datagram from all its fragments. */
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
- struct sk_buff *prev_tail, struct net_device *dev)
+ struct sk_buff *prev_tail, struct net_device *dev,
+ int *refs)
{
struct net *net = qp->q.fqdir->net;
struct iphdr *iph;
@@ -408,7 +411,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
int len, err;
u8 ecn;
- ipq_kill(qp);
+ inet_frag_kill(&qp->q, refs);
ecn = ip_frag_ecn_table[qp->ecn];
if (unlikely(ecn == 0xff)) {
@@ -473,26 +476,30 @@ out_fail:
/* Process an incoming IP datagram fragment. */
int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
- struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
- int vif = l3mdev_master_ifindex_rcu(dev);
+ struct net_device *dev;
struct ipq *qp;
+ int vif;
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
- skb_orphan(skb);
/* Lookup (or create) queue header */
+ rcu_read_lock();
+ dev = skb->dev ? : skb_dst_dev_rcu(skb);
+ vif = l3mdev_master_ifindex_rcu(dev);
qp = ip_find(net, ip_hdr(skb), user, vif);
if (qp) {
- int ret;
+ int ret, refs = 0;
spin_lock(&qp->q.lock);
- ret = ip_frag_queue(qp, skb);
+ ret = ip_frag_queue(qp, skb, &refs);
spin_unlock(&qp->q.lock);
- ipq_put(qp);
+ rcu_read_unlock();
+ inet_frag_putn(&qp->q, refs);
return ret;
}
+ rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
@@ -571,7 +578,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &dist_min,
},
- { }
};
/* secret interval has been deprecated */
@@ -584,7 +590,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip4_frags_ns_ctl_register(struct net *net)
@@ -606,7 +611,8 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
table[2].data = &net->ipv4.fqdir->timeout;
table[3].data = &net->ipv4.fqdir->max_dist;
- hdr = register_net_sysctl(net, "net/ipv4", table);
+ hdr = register_net_sysctl_sz(net, "net/ipv4", table,
+ ARRAY_SIZE(ip4_frags_ns_ctl_table));
if (!hdr)
goto err_reg;
@@ -622,7 +628,7 @@ err_alloc:
static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->ipv4.frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.frags_hdr);