From a9341512c372fcc628dabc619898d910a06c54bc Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 26 Mar 2013 15:48:38 -0700 Subject: openvswitch: Preallocate reply skb in ovs_vport_cmd_set(). Allocation of the Netlink notification skb can potentially fail after changing vport configuration. In general, we try to avoid this by undoing any change we made but that is difficult for existing objects. This avoids the problem by preallocating the buffer (which is fixed size). Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a4b724708a1a..6980c3e6f066 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1593,10 +1593,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, return ERR_PTR(-ENOMEM); retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } + BUG_ON(retval < 0); + return skb; } @@ -1726,24 +1724,32 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) err = -EINVAL; + reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!reply) { + err = -ENOMEM; + goto exit_unlock; + } + if (!err && a[OVS_VPORT_ATTR_OPTIONS]) err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_unlock; + goto exit_free; + if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + rtnl_unlock(); + return 0; + +exit_free: + kfree_skb(reply); exit_unlock: rtnl_unlock(); return err; -- cgit From d3e1101c9b75574e68380b5cb10c9395fd8855de Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Wed, 27 Mar 2013 20:41:17 +0800 Subject: openvswitch: correct an invalid BUG_ON table->count is uint32_t Signed-off-by: Hong Zhiguo Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fe0e4215c73d..67a2b783fe70 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -795,9 +795,9 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { + BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[table->node_ver]); table->count--; - BUG_ON(table->count < 0); } /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -- cgit From a58e0be6f6b3eb2079b0b8fedc9df6fa86869f1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Mar 2013 12:52:59 -0400 Subject: SUNRPC: Remove extra xprt_put() While testing error cases where rpc_new_client() fails, I saw some oopses. If rpc_new_client() fails, it already invokes xprt_put(). Thus __rpc_clone_client() does not need to invoke it again. Introduced by commit 1b63a751 "SUNRPC: Refactor rpc_clone_client()" Fri Sep 14, 2012. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org [>=3.7] Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dcc446e7fbf6..9df26b785dc7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -512,7 +512,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new = rpc_new_client(args, xprt); if (IS_ERR(new)) { err = PTR_ERR(new); - goto out_put; + goto out_err; } atomic_inc(&clnt->cl_count); @@ -525,8 +525,6 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_chatty = clnt->cl_chatty; return new; -out_put: - xprt_put(xprt); out_err: dprintk("RPC: %s: returned error %d\n", __func__, err); return ERR_PTR(err); -- cgit From f05c124a70a4953a66acbd6d6c601ea1eb5d0fa7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 14:13:21 -0400 Subject: SUNRPC: Fix a potential memory leak in rpc_new_client If the call to rpciod_up() fails, we currently leak a reference to the struct rpc_xprt. As part of the fix, we also remove the redundant check for xprt!=NULL. This is already taken care of by the callers. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9df26b785dc7..d5f35f15af98 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -304,10 +304,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru err = rpciod_up(); if (err) goto out_no_rpciod; - err = -EINVAL; - if (!xprt) - goto out_no_xprt; + err = -EINVAL; if (args->version >= program->nrvers) goto out_err; version = program->version[args->version]; @@ -382,10 +380,9 @@ out_no_principal: out_no_stats: kfree(clnt); out_err: - xprt_put(xprt); -out_no_xprt: rpciod_down(); out_no_rpciod: + xprt_put(xprt); return ERR_PTR(err); } -- cgit From 3a7b21eaf4fb3c971bdb47a98f570550ddfe4471 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 5 Apr 2013 08:13:30 +0000 Subject: netfilter: nf_ct_sip: don't drop packets with offsets pointing outside the packet Some Cisco phones create huge messages that are spread over multiple packets. After calculating the offset of the SIP body, it is validated to be within the packet and the packet is dropped otherwise. This breaks operation of these phones. Since connection tracking is supposed to be passive, just let those packets pass unmodified and untracked. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0e7d423324c3..e0c4373b4747 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1593,10 +1593,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, end += strlen("\r\n\r\n") + clen; msglen = origlen = end - dptr; - if (msglen > datalen) { - nf_ct_helper_log(skb, ct, "incomplete/bad SIP message"); - return NF_DROP; - } + if (msglen > datalen) + return NF_ACCEPT; ret = process_sip_msg(skb, ct, protoff, dataoff, &dptr, &msglen); -- cgit From 62a40a15554d6924a58b3e9f8756e0d683dc9c0c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Apr 2013 11:52:34 +0200 Subject: mac80211: fix LED in idle handling feng xiangjun reports that my commit 382a103b2b528a3085cde4ac56fc69d92a828b72 Author: Johannes Berg Date: Fri Mar 22 22:30:09 2013 +0100 mac80211: fix idle handling sequence broke the wireless status LED. The reason is that we now call ieee80211_idle_off() when the channel context is assigned, and that doesn't recalculate the LED state. Fix this by making that function a wrapper around most of idle recalculation while forcing active. Reported-by: feng xiangjun Tested-by: feng xiangjun Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 58150f877ec3..9ed49ad0380f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -u32 ieee80211_idle_off(struct ieee80211_local *local) +static u32 __ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; @@ -87,7 +87,7 @@ u32 ieee80211_idle_off(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -static u32 ieee80211_idle_on(struct ieee80211_local *local) +static u32 __ieee80211_idle_on(struct ieee80211_local *local) { if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; @@ -98,16 +98,18 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -void ieee80211_recalc_idle(struct ieee80211_local *local) +static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, + bool force_active) { bool working = false, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; struct ieee80211_roc_work *roc; - u32 change; lockdep_assert_held(&local->mtx); - active = !list_empty(&local->chanctx_list) || local->monitors; + active = force_active || + !list_empty(&local->chanctx_list) || + local->monitors; if (!local->ops->remain_on_channel) { list_for_each_entry(roc, &local->roc_list, list) { @@ -132,9 +134,18 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); if (working || scanning || active) - change = ieee80211_idle_off(local); - else - change = ieee80211_idle_on(local); + return __ieee80211_idle_off(local); + return __ieee80211_idle_on(local); +} + +u32 ieee80211_idle_off(struct ieee80211_local *local) +{ + return __ieee80211_recalc_idle(local, true); +} + +void ieee80211_recalc_idle(struct ieee80211_local *local) +{ + u32 change = __ieee80211_recalc_idle(local, false); if (change) ieee80211_hw_config(local, change); } -- cgit From 05a324b9c50c3edbe0ce48ee3e37b210859ef1ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 23:39:38 +0000 Subject: net: ipv4: reset check_lifetime_work after changing lifetime This will result in calling check_lifetime in nearest opportunity and that function will adjust next time to call check_lifetime correctly. Without this, check_lifetime is called in time computed by previous run, not affecting modified lifetime. Introduced by: commit 5c766d642bcaf "ipv4: introduce address lifetime" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 96083b7a436b..00386e02e708 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -804,6 +804,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg return -EEXIST; ifa = ifa_existing; set_ifa_lifetime(ifa, valid_lft, prefered_lft); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } -- cgit From c988d1e8cbf722e34ee6124b8b89d47fec655b51 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 23:39:39 +0000 Subject: net: ipv4: fix schedule while atomic bug in check_lifetime() move might_sleep operations out of the rcu_read_lock() section. Also fix iterating over ifa_dev->ifa_list Introduced by: commit 5c766d642bcaf "ipv4: introduce address lifetime" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 58 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 00386e02e708..c6287cd978c2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -587,13 +587,16 @@ static void check_lifetime(struct work_struct *work) { unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; + struct hlist_node *n; int i; now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - rcu_read_lock(); for (i = 0; i < IN4_ADDR_HSIZE; i++) { + bool change_needed = false; + + rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age; @@ -606,16 +609,7 @@ static void check_lifetime(struct work_struct *work) if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && age >= ifa->ifa_valid_lft) { - struct in_ifaddr **ifap ; - - rtnl_lock(); - for (ifap = &ifa->ifa_dev->ifa_list; - *ifap != NULL; ifap = &ifa->ifa_next) { - if (*ifap == ifa) - inet_del_ifa(ifa->ifa_dev, - ifap, 1); - } - rtnl_unlock(); + change_needed = true; } else if (ifa->ifa_preferred_lft == INFINITY_LIFE_TIME) { continue; @@ -625,10 +619,8 @@ static void check_lifetime(struct work_struct *work) next = ifa->ifa_tstamp + ifa->ifa_valid_lft * HZ; - if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { - ifa->ifa_flags |= IFA_F_DEPRECATED; - rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); - } + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) + change_needed = true; } else if (time_before(ifa->ifa_tstamp + ifa->ifa_preferred_lft * HZ, next)) { @@ -636,8 +628,42 @@ static void check_lifetime(struct work_struct *work) ifa->ifa_preferred_lft * HZ; } } + rcu_read_unlock(); + if (!change_needed) + continue; + rtnl_lock(); + hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap; + + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &(*ifap)->ifa_next) { + if (*ifap == ifa) { + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + break; + } + } + } else if (ifa->ifa_preferred_lft != + INFINITY_LIFE_TIME && + age >= ifa->ifa_preferred_lft && + !(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } + rtnl_unlock(); } - rcu_read_unlock(); next_sec = round_jiffies_up(next); next_sched = next; -- cgit From 88c5b5ce5cb57af6ca2a7cf4d5715fa320448ff9 Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Mon, 8 Apr 2013 05:45:26 +0000 Subject: rtnetlink: Call nlmsg_parse() with correct header length Signed-off-by: Michael Riesch Cc: "David S. Miller" Cc: Greg Kroah-Hartman Cc: Jiri Benc Cc: "Theodore Ts'o" Cc: linux-kernel@vger.kernel.org Acked-by: Mark Rustad Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b65441da74ab..23854b51a259 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1072,7 +1072,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) @@ -1922,7 +1922,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; - if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); -- cgit From f9c41a62bba3f3f7ef3541b2a025e3371bcbba97 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Sun, 7 Apr 2013 22:19:26 +0000 Subject: af_iucv: fix recvmsg by replacing skb_pull() function When receiving data messages, the "BUG_ON(skb->len < skb->data_len)" in the skb_pull() function triggers a kernel panic. Replace the skb_pull logic by a per skb offset as advised by Eric Dumazet. Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Reviewed-by: Hendrik Brueckner Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index bf6935820001..206ce6db2c36 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -49,12 +49,6 @@ static const u8 iprm_shutdown[8] = #define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) -/* macros to set/get socket control buffer at correct offset */ -#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ -#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) -#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ -#define CB_TRGCLS_LEN (TRGCLS_SIZE) - #define __iucv_sock_wait(sk, condition, timeo, ret) \ do { \ DEFINE_WAIT(__wait); \ @@ -1141,7 +1135,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + IUCV_SKB_CB(skb)->tag = txmsg.tag; if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_sent); @@ -1224,7 +1218,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) return -ENOMEM; /* copy target class to control buffer of new skb */ - memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class; /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); @@ -1256,7 +1250,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, /* store msg target class in the second 4 bytes of skb ctrl buffer */ /* Note: the first 4 bytes are reserved for msg tag */ - memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = msg->class; /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ if ((msg->flags & IUCV_IPRMDATA) && len > 7) { @@ -1292,6 +1286,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } } + IUCV_SKB_CB(skb)->offset = 0; if (sock_queue_rcv_skb(sk, skb)) skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); } @@ -1327,6 +1322,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned int copied, rlen; struct sk_buff *skb, *rskb, *cskb; int err = 0; + u32 offset; msg->msg_namelen = 0; @@ -1348,13 +1344,14 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - rlen = skb->len; /* real length of skb */ + offset = IUCV_SKB_CB(skb)->offset; + rlen = skb->len - offset; /* real length of skb */ copied = min_t(unsigned int, rlen, len); if (!rlen) sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN; cskb = skb; - if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { + if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1372,7 +1369,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, * get the trgcls from the control buffer of the skb due to * fragmentation of original iucv message. */ err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, - CB_TRGCLS_LEN, CB_TRGCLS(skb)); + sizeof(IUCV_SKB_CB(skb)->class), + (void *)&IUCV_SKB_CB(skb)->class); if (err) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); @@ -1384,9 +1382,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, /* SOCK_STREAM: re-queue skb if it contains unreceived data */ if (sk->sk_type == SOCK_STREAM) { - skb_pull(skb, copied); - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); + if (copied < rlen) { + IUCV_SKB_CB(skb)->offset = offset + copied; goto done; } } @@ -1405,6 +1402,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, spin_lock_bh(&iucv->message_q.lock); rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { + IUCV_SKB_CB(rskb)->offset = 0; if (sock_queue_rcv_skb(sk, rskb)) { skb_queue_head(&iucv->backlog_skb_q, rskb); @@ -1832,7 +1830,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { + if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } @@ -2093,6 +2091,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_transport_header(skb); skb_reset_network_header(skb); + IUCV_SKB_CB(skb)->offset = 0; spin_lock(&iucv->message_q.lock); if (skb_queue_empty(&iucv->backlog_skb_q)) { if (sock_queue_rcv_skb(sk, skb)) { @@ -2197,8 +2196,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, /* fall through and receive zero length data */ case 0: /* plain data frame */ - memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class, - CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class; err = afiucv_hs_callback_rx(sk, skb); break; default: -- cgit From c802d759623acbd6e1ee9fbdabae89159a513913 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Apr 2013 10:07:19 +0800 Subject: netrom: fix invalid use of sizeof in nr_recvmsg() sizeof() when applied to a pointer typed expression gives the size of the pointer, not that of the pointed data. Introduced by commit 3ce5ef(netrom: fix info leak via msg_name in nr_recvmsg) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7fcb307dea47..103bd704b5fc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1173,7 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, } if (sax != NULL) { - memset(sax, 0, sizeof(sax)); + memset(sax, 0, sizeof(*sax)); sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); -- cgit From 3480a2125923e4b7a56d79efc76743089bf273fc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Apr 2013 14:16:04 +0800 Subject: can: gw: use kmem_cache_free() instead of kfree() Memory allocated by kmem_cache_alloc() should be freed using kmem_cache_free(), not kfree(). Cc: linux-stable # >= v3.2 Signed-off-by: Wei Yongjun Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/can/gw.c b/net/can/gw.c index 2d117dc5ebea..117814a7e73c 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -466,7 +466,7 @@ static int cgw_notifier(struct notifier_block *nb, if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } } @@ -864,7 +864,7 @@ static void cgw_remove_all_jobs(void) hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } @@ -920,7 +920,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); err = 0; break; } -- cgit From ca10b9e9a8ca7342ee07065289cbe74ac128c169 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Apr 2013 17:58:11 +0000 Subject: selinux: add a skb_owned_by() hook Commit 90ba9b1986b5ac (tcp: tcp_make_synack() can use alloc_skb()) broke certain SELinux/NetLabel configurations by no longer correctly assigning the sock to the outgoing SYNACK packet. Cost of atomic operations on the LISTEN socket is quite big, and we would like it to happen only if really needed. This patch introduces a new security_ops->skb_owned_by() method, that is a void operation unless selinux is active. Reported-by: Miroslav Vadkerti Diagnosed-by: Paul Moore Signed-off-by: Eric Dumazet Cc: "David S. Miller" Cc: linux-security-module@vger.kernel.org Acked-by: James Morris Tested-by: Paul Moore Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5d0b4387cba6..b44cf81d8178 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2709,6 +2709,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); + security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) -- cgit From 02f815cb6d3f57914228be84df9613ee5a01c2e6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 9 Apr 2013 08:57:19 +0000 Subject: netfilter: ipset: list:set: fix reference counter update The last element can be replaced or pushed off and in both cases the reference counter must be updated. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_list_set.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 8371c2bac2e4..09c744aa8982 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -174,9 +174,13 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, { const struct set_elem *e = list_set_elem(map, i); - if (i == map->size - 1 && e->id != IPSET_INVALID_ID) - /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + if (e->id != IPSET_INVALID_ID) { + const struct set_elem *x = list_set_elem(map, map->size - 1); + + /* Last element replaced or pushed off */ + if (x->id != IPSET_INVALID_ID) + ip_set_put_byindex(x->id); + } if (with_timeout(map->timeout)) list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else -- cgit From 6eb4c7e96e19fd2c38a103472048fc0e0e0a3ec3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 9 Apr 2013 08:57:20 +0000 Subject: netfilter: ipset: hash:*net*: nomatch flag not excluded on set resize If a resize is triggered the nomatch flag is not excluded at hashing, which leads to the element missed at lookup in the resized set. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 18 ++++++++++++++++++ net/netfilter/ipset/ip_set_hash_net.c | 22 ++++++++++++++++++++-- net/netfilter/ipset/ip_set_hash_netiface.c | 22 ++++++++++++++++++++-- net/netfilter/ipset/ip_set_hash_netport.c | 18 ++++++++++++++++++ 4 files changed, 76 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f2627226a087..10a30b4fc7db 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -104,6 +104,15 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) { @@ -414,6 +423,15 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 4b677cf6bf7d..d6a59154d710 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -87,7 +87,16 @@ hash_net4_data_copy(struct hash_net4_elem *dst, static inline void hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net4_data_reset_flags(struct hash_net4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -308,7 +317,16 @@ hash_net6_data_copy(struct hash_net6_elem *dst, static inline void hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net6_data_reset_flags(struct hash_net6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 6ba985f1c96f..f2b0a3c30130 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -198,7 +198,16 @@ hash_netiface4_data_copy(struct hash_netiface4_elem *dst, static inline void hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netiface4_data_reset_flags(struct hash_netiface4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -494,7 +503,7 @@ hash_netiface6_data_copy(struct hash_netiface6_elem *dst, static inline void hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } static inline int @@ -503,6 +512,15 @@ hash_netiface6_data_match(const struct hash_netiface6_elem *elem) return elem->nomatch ? -ENOTEMPTY : 1; } +static inline void +hash_netiface6_data_reset_flags(struct hash_netiface6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline void hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index af20c0c5ced2..349deb672a2d 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -104,6 +104,15 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport4_data_reset_flags(struct hash_netport4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport4_data_match(const struct hash_netport4_elem *elem) { @@ -375,6 +384,15 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport6_data_reset_flags(struct hash_netport6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport6_data_match(const struct hash_netport6_elem *elem) { -- cgit From 7b119dc06d871405fc7c3e9a73a6c987409ba639 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Apr 2013 21:38:36 +0200 Subject: mac80211: fix cfg80211 interaction on auth/assoc request If authentication (or association with FT) is requested by userspace, mac80211 currently doesn't tell cfg80211 that it disconnected from the AP. That leaves inconsistent state: cfg80211 thinks it's connected while mac80211 thinks it's not. Typically this won't last long, as soon as mac80211 reports the new association to cfg80211 the old one goes away. If, however, the new authentication or association doesn't succeed, then cfg80211 will forever think the old one still exists and will refuse attempts to authenticate or associate with the AP it thinks it's connected to. Anders reported that this leads to it taking a very long time to reconnect to a network, or never even succeeding. I tested this with an AP hacked to never respond to auth frames, and one that works, and with just those two the system never recovers because one won't work and cfg80211 thinks it's connected to the other so refuses connections to it. To fix this, simply make mac80211 tell cfg80211 when it is no longer connected to the old AP, while authenticating or associating to a new one. Cc: stable@vger.kernel.org Reported-by: Anders Kaseorg Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82cc30318a86..346ad4cfb013 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3964,8 +3964,16 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* prep auth_data so we don't go into idle on disassoc */ ifmgd->auth_data = auth_data; - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4025,8 +4033,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } if (ifmgd->auth_data && !ifmgd->auth_data->done) { err = -EBUSY; -- cgit From d66954a066158781ccf9c13c91d0316970fe57b6 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Thu, 11 Apr 2013 08:55:07 +0000 Subject: tcp: incoming connections might use wrong route under synflood There is a bug in cookie_v4_check (net/ipv4/syncookies.c): flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); Here we do not respect sk->sk_bound_dev_if, therefore wrong dst_entry may be taken. This dst_entry is used by new socket (get_cookie_sock -> tcp_v4_syn_recv_sock), so its packets may take the wrong path. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377fb11c..397e0f69435f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -349,8 +349,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); -- cgit From 50bceae9bd3569d56744882f3012734d48a1d413 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 11 Apr 2013 10:57:18 +0000 Subject: tcp: Reallocate headroom if it would overflow csum_start If a TCP retransmission gets partially ACKed and collapsed multiple times it is possible for the headroom to grow beyond 64K which will overflow the 16bit skb->csum_start which is based on the start of the headroom. It has been observed rarely in the wild with IPoIB due to the 64K MTU. Verify if the acking and collapsing resulted in a headroom exceeding what csum_start can cover and reallocate the headroom if so. A big thank you to Jim Foraker and the team at LLNL for helping out with the investigation and testing. Reported-by: Jim Foraker Signed-off-by: Thomas Graf Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b44cf81d8178..509912a5ff98 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2388,8 +2388,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - /* make sure skb->data is aligned on arches that require it */ - if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { + /* make sure skb->data is aligned on arches that require it + * and check if ack-trimming & collapsing extended the headroom + * beyond what csum_start can cover. + */ + if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || + skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -- cgit From c2d421e171868586939c328dfb91bab840fe4c49 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2013 04:22:39 +0000 Subject: netfilter: nf_nat: fix race when unloading protocol modules following oops was reported: RIP: 0010:[] [] nf_nat_cleanup_conntrack+0x42/0x70 [nf_nat] RSP: 0018:ffff880202c63d40 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8801ac7bec28 RCX: ffff8801d0eedbe0 RDX: dead000000200200 RSI: 0000000000000011 RDI: ffffffffa03265b8 [..] Call Trace: [..] [] destroy_conntrack+0xbd/0x110 [nf_conntrack] Happens when a conntrack timeout expires right after first part of the nat cleanup has completed (bysrc hash removal), but before part 2 has completed (re-initialization of nat area). [ destroy callback tries to delete bysrc again ] Patrick suggested to just remove the affected conntracks -- the connections won't work properly anyway without nat transformation. So, lets do that. Reported-by: CAI Qian Cc: Patrick McHardy Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 8d5769c6d16e..ad24be070e53 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -467,33 +467,22 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; - bool hash; }; -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int nf_nat_proto_clean(struct nf_conn *i, void *data) +/* kill conntracks with affected NAT section */ +static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; struct nf_conn_nat *nat = nfct_nat(i); if (!nat) return 0; - if (!(i->status & IPS_SRC_NAT_DONE)) - return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; - if (clean->hash) { - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); - } else { - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | - IPS_SEQ_ADJUST); - } - return 0; + return i->status & IPS_NAT_MASK ? 1 : 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) @@ -505,16 +494,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - - /* Step 2 - clean NAT section */ - clean.hash = false; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -526,16 +507,9 @@ static void nf_nat_l3proto_clean(u8 l3proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - /* Step 2 - clean NAT section */ - clean.hash = false; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -773,7 +747,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } -- cgit From fb745e9a037895321781d066aa24757ceabf9df9 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 11 Apr 2013 13:47:15 +0000 Subject: net/802/mrp: fix possible race condition when calling mrp_pdu_queue() (Adapted from a very similar change to net/802/garp.c by Cong Wang.) mrp_pdu_queue() should ways be called with the applicant spin lock. mrp_uninit_applicant() only holds the rtnl lock which is not enough; a race is possible because mrp_rcv() is called in BH context: mrp_rcv() |->mrp_pdu_parse_msg() |->mrp_pdu_parse_vecattr() |->mrp_pdu_parse_vecattr_event() |-> mrp_attr_event() |-> mrp_pdu_append_vecattr_event() |-> mrp_pdu_queue() Cc: Cong Wang Cc: Eric Dumazet Signed-off-by: David Ward Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/802/mrp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/802/mrp.c b/net/802/mrp.c index a4cc3229952a..e085bcc754f6 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -870,8 +870,12 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + + spin_lock(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); mrp_pdu_queue(app); + spin_unlock(&app->lock); + mrp_queue_xmit(app); dev_mc_del(dev, appl->group_address); -- cgit From f88c91ddba958e9a5dd4a5ee8c52a0faa790f586 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 14 Apr 2013 23:18:43 +0800 Subject: ipv6: statically link register_inet6addr_notifier() Tomas reported the following build error: net/built-in.o: In function `ieee80211_unregister_hw': (.text+0x10f0e1): undefined reference to `unregister_inet6addr_notifier' net/built-in.o: In function `ieee80211_register_hw': (.text+0x10f610): undefined reference to `register_inet6addr_notifier' make: *** [vmlinux] Error 1 when built IPv6 as a module. So we have to statically link these symbols. Reported-by: Tomas Melin Cc: Tomas Melin Cc: "David S. Miller" Cc: YOSHIFUJI Hidaki Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 24 +++--------------------- net/ipv6/addrconf_core.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a459c4f5b769..dae802c0af7c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -168,8 +168,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev); -static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); - static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, @@ -837,7 +835,7 @@ out2: rcu_read_unlock_bh(); if (likely(err == 0)) - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); + inet6addr_notifier_call_chain(NETDEV_UP, ifa); else { kfree(ifa); ifa = ERR_PTR(err); @@ -927,7 +925,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_DELADDR, ifp); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); /* * Purge or update corresponding prefix @@ -2988,7 +2986,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } in6_ifa_put(ifa); @@ -4869,22 +4867,6 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -/* - * Device notifier - */ - -int register_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(register_inet6addr_notifier); - -int unregister_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(unregister_inet6addr_notifier); - static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d051e5f4bf34..72104562c864 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -78,3 +78,22 @@ int __ipv6_addr_type(const struct in6_addr *addr) } EXPORT_SYMBOL(__ipv6_addr_type); +static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); + +int register_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(register_inet6addr_notifier); + +int unregister_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(unregister_inet6addr_notifier); + +int inet6addr_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&inet6addr_chain, val, v); +} +EXPORT_SYMBOL(inet6addr_notifier_call_chain); -- cgit From 8f3359bdc83f1abb1989e0817ab0e0b18667c828 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sat, 13 Apr 2013 14:06:07 +0000 Subject: bridge: make user modified path cost sticky Keep a STP port path cost value if it was set by a user. Don't replace it with the link-speed based path cost whenever the link goes down and comes back up. Reported-by: Roopa Prabhu Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 3 ++- net/bridge/br_private.h | 1 + net/bridge/br_stp_if.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ef1b91431c6b..459dab22b3f6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -67,7 +67,8 @@ void br_port_carrier_check(struct net_bridge_port *p) struct net_device *dev = p->dev; struct net_bridge *br = p->br; - if (netif_running(dev) && netif_oper_up(dev)) + if (!(p->flags & BR_ADMIN_COST) && + netif_running(dev) && netif_oper_up(dev)) p->path_cost = port_cost(dev); if (!netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3cbf5beb3d4b..d2c043a857b6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -156,6 +156,7 @@ struct net_bridge_port #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 +#define BR_ADMIN_COST 0x00000010 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 0bdb4ebd362b..d45e760141bb 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -288,6 +288,7 @@ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) path_cost > BR_MAX_PATH_COST) return -ERANGE; + p->flags |= BR_ADMIN_COST; p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); -- cgit From 06848c10f720cbc20e3b784c0df24930b7304b93 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 13 Apr 2013 15:49:03 +0000 Subject: esp4: fix error return code in esp_output() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3b4f0cd2e63e..4cfe34d4cc96 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - err = -ENOMEM; - esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); -- cgit From 97599dc792b45b1669c3cdb9a4b365aad0232f65 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Apr 2013 12:55:41 +0000 Subject: net: drop dst before queueing fragments Commit 4a94445c9a5c (net: Use ip_route_input_noref() in input path) added a bug in IP defragmentation handling, as non refcounted dst could escape an RCU protected section. Commit 64f3b9e203bd068 (net: ip_expire() must revalidate route) fixed the case of timeouts, but not the general problem. Tom Parkin noticed crashes in UDP stack and provided a patch, but further analysis permitted us to pinpoint the root cause. Before queueing a packet into a frag list, we must drop its dst, as this dst has limited lifetime (RCU protected) When/if a packet is finally reassembled, we use the dst of the very last skb, still protected by RCU and valid, as the dst of the reassembled packet. Use same logic in IPv6, as there is no need to hold dst references. Reported-by: Tom Parkin Tested-by: Tom Parkin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 14 ++++++++++---- net/ipv6/reassembly.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a6445b843ef4..52c273ea05c3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -248,8 +248,7 @@ static void ip_expire(unsigned long arg) if (!head->dev) goto out_rcu_unlock; - /* skb dst is stale, drop it, and perform route lookup again */ - skb_dst_drop(head); + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); @@ -523,9 +522,16 @@ found: qp->q.max_size = skb->len + ihl; if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - qp->q.meat == qp->q.len) - return ip_frag_reasm(qp, prev, dev); + qp->q.meat == qp->q.len) { + unsigned long orefdst = skb->_skb_refdst; + skb->_skb_refdst = 0UL; + err = ip_frag_reasm(qp, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); inet_frag_lru_move(&qp->q); return -EINPROGRESS; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 196ab9347ad1..0ba10e53a629 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -330,9 +330,17 @@ found: } if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) - return ip6_frag_reasm(fq, prev, dev); + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + res = ip6_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; + } + skb_dst_drop(skb); inet_frag_lru_move(&fq->q); return -1; -- cgit From fe8a93b95145c66adf196eea4a919dfe0b7c57db Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 3 Apr 2013 19:10:26 +0200 Subject: batman-adv: make is_my_mac() check for the current mesh only On a multi-mesh node (a node running more than one batman-adv virtual interface) batadv_is_my_mac() has to check MAC addresses of hard interfaces belonging to the current mesh only. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/main.c | 5 ++++- net/batman-adv/main.h | 2 +- net/batman-adv/routing.c | 38 ++++++++++++++++++++------------------ net/batman-adv/translation-table.c | 2 +- net/batman-adv/vis.c | 4 ++-- 5 files changed, 28 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 0488d70c8c35..fa563e497c48 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -169,7 +169,7 @@ void batadv_mesh_free(struct net_device *soft_iface) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } -int batadv_is_my_mac(const uint8_t *addr) +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; @@ -178,6 +178,9 @@ int batadv_is_my_mac(const uint8_t *addr) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ced08b936a96..d40910dfc8ea 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -162,7 +162,7 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -int batadv_is_my_mac(const uint8_t *addr); +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 5ee21cebbbb0..319f2906c71a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -402,7 +402,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, goto out; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) goto out; icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; @@ -416,7 +416,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, } /* packet for me */ - if (batadv_is_my_mac(icmp_packet->dst)) + if (batadv_is_my_mac(bat_priv, icmp_packet->dst)) return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); /* TTL exceeded */ @@ -548,7 +548,8 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } -static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) +static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) { struct ethhdr *ethhdr; @@ -567,7 +568,7 @@ static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) return -1; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return -1; return 0; @@ -582,7 +583,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) char tt_flag; size_t packet_size; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; /* I could need to modify it */ @@ -614,7 +615,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) case BATADV_TT_RESPONSE: batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); - if (batadv_is_my_mac(tt_query->dst)) { + if (batadv_is_my_mac(bat_priv, tt_query->dst)) { /* packet needs to be linearized to access the TT * changes */ @@ -657,14 +658,15 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_roam_adv_packet *roam_adv_packet; struct batadv_orig_node *orig_node; - if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, + sizeof(*roam_adv_packet)) < 0) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data; - if (!batadv_is_my_mac(roam_adv_packet->dst)) + if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst)) return batadv_route_unicast_packet(skb, recv_if); /* check if it is a backbone gateway. we don't accept @@ -967,7 +969,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * last time) the packet had an updated information or not */ curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - if (!batadv_is_my_mac(unicast_packet->dest)) { + if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); /* if it is not possible to find the orig_node representing the @@ -1044,14 +1046,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) return NET_RX_DROP; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { if (is4addr) { batadv_dat_inc_counter(bat_priv, unicast_4addr_packet->subtype); @@ -1088,7 +1090,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, struct sk_buff *new_skb = NULL; int ret; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) @@ -1097,7 +1099,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) @@ -1151,13 +1153,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto out; /* ignore broadcasts sent by myself */ - if (batadv_is_my_mac(ethhdr->h_source)) + if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto out; bcast_packet = (struct batadv_bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ - if (batadv_is_my_mac(bcast_packet->orig)) + if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; if (bcast_packet->header.ttl < 2) @@ -1243,14 +1245,14 @@ int batadv_recv_vis_packet(struct sk_buff *skb, ethhdr = (struct ethhdr *)skb_mac_header(skb); /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return NET_RX_DROP; /* ignore own packets */ - if (batadv_is_my_mac(vis_packet->vis_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig)) return NET_RX_DROP; - if (batadv_is_my_mac(vis_packet->sender_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig)) return NET_RX_DROP; switch (vis_packet->vis_type) { diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 98a66a021a60..7abee19567e9 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1953,7 +1953,7 @@ out: bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tt_query_packet *tt_request) { - if (batadv_is_my_mac(tt_request->dst)) { + if (batadv_is_my_mac(bat_priv, tt_request->dst)) { /* don't answer backbone gws! */ if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src)) return true; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index c053244b97bd..6a1e646be96d 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -477,7 +477,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, /* Are we the target for this VIS packet? */ if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && - batadv_is_my_mac(vis_packet->target_orig)) + batadv_is_my_mac(bat_priv, vis_packet->target_orig)) are_target = 1; spin_lock_bh(&bat_priv->vis.hash_lock); @@ -496,7 +496,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, batadv_send_list_add(bat_priv, info); /* ... we're not the recipient (and thus need to forward). */ - } else if (!batadv_is_my_mac(packet->target_orig)) { + } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) { batadv_send_list_add(bat_priv, info); } -- cgit From 5add189a125e6b497e31bffdaaed8145ec6d4984 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Apr 2013 14:30:54 +0200 Subject: netfilter: ipset: bitmap:ip,mac: fix listing with timeout The type when timeout support was enabled, could not list all elements, just the first ones which could fit into one netlink message: it just did not continue listing after the first message. Reported-by: Yoann JUET Signed-off-by: Jozsef Kadlecsik Tested-by: Yoann JUET Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 0f92dc24cb89..d7df6ac2c6f1 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -339,7 +339,11 @@ bitmap_ipmac_tlist(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); ipset_nest_end(skb, atd); - return -EMSGSIZE; + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; } static int -- cgit From f83a7ea2075ca896f2dbf07672bac9cf3682ff74 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Apr 2013 22:45:24 +0000 Subject: netfilter: xt_rpfilter: skip locally generated broadcast/multicast, too Alex Efros reported rpfilter module doesn't match following packets: IN=br.qemu SRC=192.168.2.1 DST=192.168.2.255 [ .. ] (netfilter bugzilla #814). Problem is that network stack arranges for the locally generated broadcasts to appear on the interface they were sent out, so the IFF_LOOPBACK check doesn't trigger. As -m rpfilter is restricted to PREROUTING, we can check for existing rtable instead, it catches locally-generated broad/multicast case, too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_rpfilter.c | 8 +++++++- net/ipv6/netfilter/ip6t_rpfilter.c | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c30130062cd6..c49dcd0284a0 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, return dev_match; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + return rt && (rt->rt_flags & RTCF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info; @@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5060d54199ab..e0983f3648a6 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -71,6 +71,12 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, return ret; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *) skb_dst(skb); + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; @@ -78,7 +84,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ipv6_hdr(skb); -- cgit From 12fb3dd9dc3c64ba7d64cec977cca9b5fb7b1d4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Apr 2013 07:19:48 +0000 Subject: tcp: call tcp_replace_ts_recent() from tcp_ack() commit bd090dfc634d (tcp: tcp_replace_ts_recent() should not be called from tcp_validate_incoming()) introduced a TS ecr bug in slow path processing. 1 A > B P. 1:10001(10000) ack 1 2 B < A . 1:1(0) ack 1 win 257 3 A > B . 1:1001(1000) ack 1 win 227 4 A > B . 1001:2001(1000) ack 1 win 227 (ecr 200 should be ecr 300 in packets 3 & 4) Problem is tcp_ack() can trigger send of new packets (retransmits), reflecting the prior TSval, instead of the TSval contained in the currently processed incoming packet. Fix this by calling tcp_replace_ts_recent() from tcp_ack() after the checks, but before the actions. Reported-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 64 +++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3bd55bad230a..13b9c08fc158 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -113,6 +113,7 @@ int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -3564,6 +3565,27 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +static void tcp_store_ts_recent(struct tcp_sock *tp) +{ + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); +} + +static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +{ + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. + */ + + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3607,6 +3629,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + if (flag & FLAG_UPDATE_TS_RECENT) + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3927,27 +3955,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif -static inline void tcp_store_ts_recent(struct tcp_sock *tp) -{ - tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); -} - -static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) -{ - if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); - } -} - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -5543,14 +5550,9 @@ slow_path: return 0; step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) goto discard; - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -5986,7 +5988,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: @@ -6137,11 +6140,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - /* step 6: check the URG bit */ tcp_urg(sk, skb, th); -- cgit From e15465e1808542743627f13d1c0cbb7eacc82b83 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Apr 2013 21:10:38 +0000 Subject: irda: small read past the end of array in debug code The "reason" can come from skb->data[] and it hasn't been capped so it can be from 0-255 instead of just 0-6. For example in irlmp_state_dtr() the code does: reason = skb->data[3]; ... irlmp_disconnect_indication(self, reason, skb); Also LMREASON has a couple other values which don't have entries in the irlmp_reasons[] array. And 0xff is a valid reason as well which means "unknown". So far as I can see we don't actually care about "reason" except for in the debug code. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/irda/iriap.c | 3 ++- net/irda/irlmp.c | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 29340a9a6fb9..e1b37f5a2691 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -303,7 +303,8 @@ static void iriap_disconnect_indication(void *instance, void *sap, { struct iriap_cb *self; - IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(4, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); self = instance; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 6115a44c0a24..1064621da6f6 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -66,8 +66,15 @@ const char *irlmp_reasons[] = { "LM_LAP_RESET", "LM_INIT_DISCONNECT", "ERROR, NOT USED", + "UNKNOWN", }; +const char *irlmp_reason_str(LM_REASON reason) +{ + reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1); + return irlmp_reasons[reason]; +} + /* * Function irlmp_init (void) * @@ -747,7 +754,8 @@ void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, { struct lsap_cb *lsap; - IRDA_DEBUG(1, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(1, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); -- cgit From cb95ec6261a205bde6451b972fbd6e1581608cae Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Apr 2013 16:49:10 +0000 Subject: pkt_sched: fix error return code in fw_change_attrs() Fix to return -EINVAL when tb[TCA_FW_MASK] is set and head->mask != 0xFFFFFFFF instead of 0 (ifdef CONFIG_NET_CLS_IND and tb[TCA_FW_INDEV]), as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 1135d8227f9b..9b97172db84a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -204,7 +204,6 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (err < 0) return err; - err = -EINVAL; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); tcf_bind_filter(tp, &f->res, base); @@ -218,6 +217,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + err = -EINVAL; if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) -- cgit From c846ad9b880ece01bb4d8d07ba917734edf0324f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 19 Apr 2013 10:45:52 +0000 Subject: net: rate-limit warn-bad-offload splats. If one does do something unfortunate and allow a bad offload bug into the kernel, this the skb_warn_bad_offload can effectively live-lock the system, filling the logs with the same error over and over. Add rate limitation to this so that box remains otherwise functional in this case. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e7d68ed8aafe..b24ab0e98eb4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2148,6 +2148,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb) struct net_device *dev = skb->dev; const char *driver = ""; + if (!net_ratelimit()) + return; + if (dev && dev->dev.parent) driver = dev_driver_string(dev->dev.parent); -- cgit