From f09eca8db0184aeb6b9718a987cfb3653ad7c4ae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Jul 2013 20:16:39 +0200 Subject: netfilter: ctnetlink: fix incorrect NAT expectation dumping nf_ct_expect_alloc leaves unset the expectation NAT fields. However, ctnetlink_exp_dump_expect expects them to be zeroed in case they are not used, which may not be the case. This results in dumping the NAT tuple of the expectation when it should not. Fix it by zeroing the NAT fields of the expectation. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c63b618cd619..4fd1ca94fd4a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -293,6 +293,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, sizeof(exp->tuple.dst.u3) - len); exp->tuple.dst.u.all = *dst; + +#ifdef CONFIG_NF_NAT_NEEDED + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); +#endif } EXPORT_SYMBOL_GPL(nf_ct_expect_init); -- cgit From baf60efa585c78b269f0097288868a51ccc61f55 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jul 2013 19:22:19 -0700 Subject: netfilter: xt_socket: fix broken v0 support commit 681f130f39e10 ("netfilter: xt_socket: add XT_SOCKET_NOWILDCARD flag") added a potential NULL dereference if an old iptables package uses v0 of the match. Fix this by removing the test on @info in fast path. IPv6 can remove the test as well, as it uses v1 or v2. Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index f8b71911037a..20b15916f403 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -172,7 +172,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && @@ -196,7 +196,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, static bool socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) { - return socket_match(skb, par, NULL); + static struct xt_socket_mtinfo1 xt_info_v0 = { + .flags = 0, + }; + + return socket_match(skb, par, &xt_info_v0); } static bool @@ -314,7 +318,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && -- cgit From a0ec570f4f69c4cb700d743a915096c2c8f56a99 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 25 Jun 2013 09:17:17 +0200 Subject: nl80211: fix mgmt tx status and testmode reporting for netns These two events were sent to the default network namespace. This caused AP mode in a non-default netns to not work correctly. Mgmt tx status was multicasted to a different (default) netns instead of the one the AP was in. Cc: stable@vger.kernel.org Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1cc47aca7f05..9fb8820b75c5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6613,12 +6613,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) { + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; nla_nest_end(skb, data); genlmsg_end(skb, hdr); - genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0, + nl80211_testmode_mcgrp.id, gfp); } EXPORT_SYMBOL(cfg80211_testmode_event); #endif @@ -10064,7 +10066,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, genlmsg_end(msg, hdr); - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: -- cgit From 923a0e7dee8c436108279568cf34444749ac796f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Jun 2013 11:38:54 +0200 Subject: cfg80211: fix bugs in new SME implementation When splitting the SME implementation from the MLME code, I introduced a few bugs: * association failures no longer sent a connect-failure event * getting disassociated from the AP caused deauth to be sent but state wasn't cleaned up, leading to warnings * authentication failures weren't cleaned up properly, causing new connection attempts to warn and fail Fix these bugs. Signed-off-by: Johannes Berg --- net/wireless/sme.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 1d3cfb1a3f28..81c8a10d743c 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -34,8 +34,10 @@ struct cfg80211_conn { CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, + CFG80211_CONN_AUTH_FAILED, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, + CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, CFG80211_CONN_CONNECTED, } state; @@ -164,6 +166,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) NULL, 0, params->key, params->key_len, params->key_idx, NULL, 0); + case CFG80211_CONN_AUTH_FAILED: + return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -188,10 +192,17 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) WLAN_REASON_DEAUTH_LEAVING, false); return err; + case CFG80211_CONN_ASSOC_FAILED: + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); + return -ENOTCONN; case CFG80211_CONN_DEAUTH: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* free directly, disconnected event already sent */ + cfg80211_sme_free(wdev); return 0; default: return 0; @@ -371,7 +382,7 @@ bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) return true; } - wdev->conn->state = CFG80211_CONN_DEAUTH; + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; schedule_work(&rdev->conn_work); return false; } @@ -383,7 +394,13 @@ void cfg80211_sme_deauth(struct wireless_dev *wdev) void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - cfg80211_sme_free(wdev); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_AUTH_FAILED; + schedule_work(&rdev->conn_work); } void cfg80211_sme_disassoc(struct wireless_dev *wdev) @@ -399,7 +416,13 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev) void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) { - cfg80211_sme_disassoc(wdev); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; + schedule_work(&rdev->conn_work); } static int cfg80211_sme_connect(struct wireless_dev *wdev, -- cgit From f77b86d7d3acf9dfcb5ee834628d12207584b2cb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Jun 2013 15:43:38 +0200 Subject: regulatory: add missing rtnl locking restore_regulatory_settings() requires the RTNL to be held, add the missing locking in reg_timeout_work(). Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5a24c986f34b..5a950f36bae4 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2279,7 +2279,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) static void reg_timeout_work(struct work_struct *work) { REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); + rtnl_lock(); restore_regulatory_settings(true); + rtnl_unlock(); } int __init regulatory_init(void) -- cgit From 1cd158573951f737fbc878a35cb5eb47bf9af3d5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 28 Jun 2013 21:04:35 +0200 Subject: mac80211/minstrel_ht: fix cck rate sampling The CCK group needs special treatment to set the right flags and rate index. Add this missing check to prevent setting broken rates for tx packets. Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5b2d3012b983..f5aed963b22e 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + rate->count = 1; + + if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); + rate->idx = mp->cck_rates[idx]; + rate->flags = 0; + return; + } + rate->idx = sample_idx % MCS_GROUP_RATES + (sample_group->streams - 1) * MCS_GROUP_RATES; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; - rate->count = 1; } static void -- cgit From e13bae4f807401729b3f27c7e882a96b8b292809 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Jul 2013 10:43:31 +0200 Subject: mac80211: fix ethtool stats for non-station interfaces As reported in https://bugzilla.kernel.org/show_bug.cgi?id=60514, the station loop never initialises 'sinfo' and therefore adds up a stack values, leaking stack information (the number of times it adds values is easily obtained another way.) Fix this by initialising the sinfo for each station to add. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8184d121ff09..43dd7525bfcb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -666,6 +666,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, if (sta->sdata->dev != dev) continue; + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); i = 0; ADD_STA_STATS(sta); } -- cgit From 83374fe9de455e37c2a039603d2538eb77e8ec4d Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Thu, 11 Jul 2013 18:24:03 +0800 Subject: nl80211: fix the setting of RSSI threshold value for mesh RSSI threshold value used for mesh peering should be in negative value. After range checks to mesh parameters is introduced, this is not allowed. Fix this. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9fb8820b75c5..25d217d90807 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4770,9 +4770,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0, mask, NL80211_MESHCONF_RSSI_THRESHOLD, - nla_get_u32); + nla_get_s32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); -- cgit From 6b0f32745dcfba01d7be33acd1b40306c7a914c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jul 2013 22:33:26 +0200 Subject: mac80211: fix duplicate retransmission detection The duplicate retransmission detection code in mac80211 erroneously attempts to do the check for every frame, even frames that don't have a sequence control field or that don't use it (QoS-Null frames.) This is problematic because it causes the code to access data beyond the end of the SKB and depending on the data there will drop packets erroneously. Correct the code to not do duplicate detection for such frames. I found this error while testing AP powersave, it lead to retransmitted PS-Poll frames being dropped entirely as the data beyond the end of the SKB was always zero. Cc: stable@vger.kernel.org [all versions] Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 23dbcfc69b3b..2c5a79bd3777 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -936,8 +936,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + /* + * Drop duplicate 802.11 retransmissions + * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") + */ + if (rx->skb->len >= 24 && rx->sta && + !ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { -- cgit From 5c9fc93bc9bc417418fc1b6366833ae6a07b804d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 15 Jul 2013 14:35:06 +0200 Subject: mac80211/minstrel: fix NULL pointer dereference issue When priv_sta == NULL, mi->prev_sample is dereferenced too early. Move the assignment further down, after the rate_control_send_low call. Reported-by: Krzysztof Mazur Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ac7ef5414bde..e6512e2ffd20 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, struct minstrel_rate *msr, *mr; unsigned int ndx; bool mrr_capable; - bool prev_sample = mi->prev_sample; + bool prev_sample; int delta; int sampling_ratio; @@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, (mi->sample_count + mi->sample_deferred / 2); /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; mi->prev_sample = false; if (delta < 0 || (!mrr_capable && prev_sample)) return; -- cgit From 7427b370e0aa6226c763af94fc5c4e3433383543 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Thu, 20 Jun 2013 11:11:04 +0200 Subject: NFC: Fix NCI over SPI build kbuild test robot found following error: net/built-in.o: In function `nci_spi_send': >> spi.c:(.text+0x19a76f): undefined reference to `crc_ccitt' Add CRC_CCITT module to Kconfig to fix it Reported-by: kbuild test robot. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index 2a2416080b4f..a4f1e42e3481 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -11,6 +11,7 @@ config NFC_NCI config NFC_NCI_SPI depends on NFC_NCI && SPI + select CRC_CCITT bool "NCI over SPI protocol support" default n help -- cgit From 651e92716aaae60fc41b9652f54cb6803896e0da Mon Sep 17 00:00:00 2001 From: Michal Tesar Date: Fri, 19 Jul 2013 14:09:01 +0200 Subject: sysctl net: Keep tcp_syn_retries inside the boundary Limit the min/max value passed to the /proc/sys/net/ipv4/tcp_syn_retries. Signed-off-by: Michal Tesar Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b2c123c44d69..610e324348d1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,6 +36,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -332,7 +334,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", -- cgit From 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jul 2013 20:07:16 -0700 Subject: bridge: do not call setup_timer() multiple times commit 9f00b2e7cf24 ("bridge: only expire the mdb entry when query is received") added a nasty bug as an active timer can be reinitialized. setup_timer() must be done once, no matter how many time mod_timer() is called. br_multicast_new_group() is the right place to do this. Reported-by: Srivatsa S. Bhat Diagnosed-by: Thomas Gleixner Signed-off-by: Eric Dumazet Tested-by: Srivatsa S. Bhat Cc: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 69af490cce44..4b99c9a27044 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -619,6 +619,9 @@ rehash: mp->br = br; mp->addr = *group; + setup_timer(&mp->timer, br_multicast_group_expired, + (unsigned long)mp); + hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -1126,7 +1129,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); mod_timer(&mp->timer, now + br->multicast_membership_interval); mp->timer_armed = true; @@ -1204,7 +1206,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); mod_timer(&mp->timer, now + br->multicast_membership_interval); mp->timer_armed = true; -- cgit From 40d18ff959fe8b847be4f7b03f84644a7c18211e Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Fri, 19 Jul 2013 17:37:39 +0800 Subject: mac80211: prevent the buffering or frame transmission to non-assoc mesh STA This patch is intended to avoid the buffering to non-assoc mesh STA and also to avoid the triggering of frame to non-assoc mesh STA which could cause kernel panic in specific hw. One of the examples, is kernel panic happens to ath9k if user space inserts the mesh STA and not proceed with the SAE and AMPE, and later the same mesh STA is detected again. The sta_state of the mesh STA remains at IEEE80211_STA_NONE and if the ieee80211_sta_ps_deliver_wakeup is called and subsequently the ath_tx_aggr_wakeup, the kernel panic due to ath_tx_node_init is not called before to initialize the require data structures. This issue is reported by Cedric Voncken before. http://www.spinics.net/lists/linux-wireless/msg106342.html [<831ea6b4>] ath_tx_aggr_wakeup+0x44/0xcc [ath9k] [<83084214>] ieee80211_sta_ps_deliver_wakeup+0xb8/0x208 [mac80211] [<830b9824>] ieee80211_mps_sta_status_update+0x94/0x108 [mac80211] [<83099398>] ieee80211_sta_ps_transition+0xc94/0x34d8 [mac80211] [<8022399c>] nf_iterate+0x98/0x104 [<8309bb60>] ieee80211_sta_ps_transition+0x345c/0x34d8 [mac80211] Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/mesh_ps.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 3b7bfc01ee36..22290a929b94 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -229,6 +229,10 @@ void ieee80211_mps_sta_status_update(struct sta_info *sta) enum nl80211_mesh_power_mode pm; bool do_buffer; + /* For non-assoc STA, prevent buffering or frame transmission */ + if (sta->sta_state < IEEE80211_STA_ASSOC) + return; + /* * use peer-specific power mode if peering is established and the * peer's power mode is known -- cgit From cd34f647a78e7f2296fcb72392b9e5c832793e65 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 23 Jul 2013 13:56:50 +0200 Subject: mac80211: fix monitor interface suspend crash regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My commit: commit 12e7f517029dad819c45eca9ca01fdb9ba57616b Author: Stanislaw Gruszka Date: Thu Feb 28 10:55:26 2013 +0100 mac80211: cleanup generic suspend/resume procedures removed check for deleting MONITOR and AP_VLAN when suspend. That can cause a crash (i.e. in iwlagn_mac_remove_interface()) since we remove interface in the driver that we did not add before. Reference: http://marc.info/?l=linux-kernel&m=137391815113860&w=2 Bisected-by: Ortwin Glück Reported-and-tested-by: Ortwin Glück Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/pm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7fc5d0d8149a..340126204343 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } mutex_unlock(&local->sta_mtx); - /* remove all interfaces */ + /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; + drv_remove_interface(local, sdata); } -- cgit From f585a991e1d1612265f0d4e812f77e40dd54975b Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Mon, 22 Jul 2013 12:01:58 -0700 Subject: fib_trie: potential out of bounds access in trie_show_stats() With the <= max condition in the for loop, it will be always go 1 element further than needed. If the condition for the while loop is never met, then max is MAX_STAT_DEPTH, and for loop will walk off the end of nodesizes[]. Signed-off-by: Jerry Snitselaar Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed9340..108a1e9c9eac 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2133,7 +2133,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) max--; pointers = 0; - for (i = 1; i <= max; i++) + for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<nodesizes[i]; -- cgit From 905a6f96a1b18e490a75f810d733ced93c39b0e5 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 22 Jul 2013 23:45:53 +0200 Subject: ipv6: take rtnl_lock and mark mrt6 table as freed on namespace cleanup Otherwise we end up dereferencing the already freed net->ipv6.mrt pointer which leads to a panic (from Srivatsa S. Bhat): BUG: unable to handle kernel paging request at ffff882018552020 IP: [] ip6mr_sk_done+0x32/0xb0 [ipv6] PGD 290a067 PUD 207ffe0067 PMD 207ff1d067 PTE 8000002018552060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: ebtable_nat ebtables nfs fscache nf_conntrack_ipv4 nf_defrag_ipv4 ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables nfsd lockd nfs_acl exportfs auth_rpcgss autofs4 sunrpc 8021q garp bridge stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter +ip6_tables ipv6 vfat fat vhost_net macvtap macvlan vhost tun kvm_intel kvm uinput iTCO_wdt iTCO_vendor_support cdc_ether usbnet mii microcode i2c_i801 i2c_core lpc_ich mfd_core shpchp ioatdma dca mlx4_core be2net wmi acpi_cpufreq mperf ext4 jbd2 mbcache dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 7 Comm: kworker/u33:0 Not tainted 3.11.0-rc1-ea45e-a #4 Hardware name: IBM -[8737R2A]-/00Y2738, BIOS -[B2E120RUS-1.20]- 11/30/2012 Workqueue: netns cleanup_net task: ffff8810393641c0 ti: ffff881039366000 task.ti: ffff881039366000 RIP: 0010:[] [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP: 0018:ffff881039367bd8 EFLAGS: 00010286 RAX: ffff881039367fd8 RBX: ffff882018552000 RCX: dead000000200200 RDX: 0000000000000000 RSI: ffff881039367b68 RDI: ffff881039367b68 RBP: ffff881039367bf8 R08: ffff881039367b68 R09: 2222222222222222 R10: 2222222222222222 R11: 2222222222222222 R12: ffff882015a7a040 R13: ffff882014eb89c0 R14: ffff8820289e2800 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88103fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff882018552020 CR3: 0000000001c0b000 CR4: 00000000000407f0 Stack: ffff881039367c18 ffff882014eb89c0 ffff882015e28c00 0000000000000000 ffff881039367c18 ffffffffa034d9d1 ffff8820289e2800 ffff882014eb89c0 ffff881039367c58 ffffffff815bdecb ffffffff815bddf2 ffff882014eb89c0 Call Trace: [] rawv6_close+0x21/0x40 [ipv6] [] inet_release+0xfb/0x220 [] ? inet_release+0x22/0x220 [] inet6_release+0x3f/0x50 [ipv6] [] sock_release+0x29/0xa0 [] sk_release_kernel+0x30/0x70 [] icmpv6_sk_exit+0x3b/0x80 [ipv6] [] ops_exit_list+0x39/0x60 [] cleanup_net+0xfb/0x1a0 [] process_one_work+0x1da/0x610 [] ? process_one_work+0x169/0x610 [] worker_thread+0x120/0x3a0 [] ? process_one_work+0x610/0x610 [] kthread+0xee/0x100 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 Code: 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 4c 8b 67 30 49 89 fd e8 db 3c 1e e1 49 8b 9c 24 90 08 00 00 48 85 db 74 06 <4c> 39 6b 20 74 20 bb f3 ff ff ff e8 8e 3c 1e e1 89 d8 4c 8b 65 RIP [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP CR2: ffff882018552020 Reported-by: Srivatsa S. Bhat Tested-by: Srivatsa S. Bhat Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 583e8d435f9a..03986d31fa41 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -259,10 +259,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } + rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else @@ -289,7 +291,10 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { + rtnl_lock(); ip6mr_free_table(net->ipv6.mrt6); + net->ipv6.mrt6 = NULL; + rtnl_unlock(); } #endif -- cgit From deceb4c062a8dd63fe554c3be2b4bf9151a5cedf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 Jul 2013 20:22:39 +0100 Subject: net: fix comment above build_skb() build_skb() specifies that the data parameter must come from a kmalloc'd area, this is only true if frag_size equals 0, because then build_skb() will use kzsize(data) to figure out the actual data size. Update the comment to reflect that special condition. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 20e02d2605ec..3df4d4ccf440 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -309,7 +309,8 @@ EXPORT_SYMBOL(__alloc_skb); * @frag_size: size of fragment, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and - * skb_shared_info. @data must have been allocated by kmalloc() + * skb_shared_info. @data must have been allocated by kmalloc() only if + * @frag_size is 0, otherwise data should come from the page allocator. * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : -- cgit From 23df0b731954502a9391e739b92927cee4360343 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 21 Jul 2013 16:36:48 +0300 Subject: regulatory: use correct regulatory initiator on wiphy register The current regdomain was not always set by the core. This causes cards with a custom regulatory domain to ignore user initiated changes if done before the card was registered. Signed-off-by: Arik Nemtsov Acked-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- net/wireless/reg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5a950f36bae4..de06d5d1287f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2247,10 +2247,13 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) void wiphy_regulatory_register(struct wiphy *wiphy) { + struct regulatory_request *lr; + if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; - wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); + lr = get_last_request(); + wiphy_update_regulatory(wiphy, lr->initiator); } void wiphy_regulatory_deregister(struct wiphy *wiphy) -- cgit From da9910ac4a816b4340944c78d94c02a35527db46 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Fri, 21 Jun 2013 19:55:11 +0530 Subject: Bluetooth: Fix invalid length check in l2cap_information_rsp() The length check is invalid since the length varies with type of info response. This was introduced by the commit cb3b3152b2f5939d67005cff841a1ca748b19888 Because of this, l2cap info rsp is not handled and command reject is sent. > ACL data: handle 11 flags 0x02 dlen 16 L2CAP(s): Info rsp: type 2 result 0 Extended feature mask 0x00b8 Enhanced Retransmission mode Streaming mode FCS Option Fixed Channels < ACL data: handle 11 flags 0x00 dlen 10 L2CAP(s): Command rej: reason 0 Command not understood Cc: stable@vger.kernel.org Signed-off-by: Jaganath Kanakkassery Signed-off-by: Chan-Yeol Park Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4be6a264b475..68843a28a7af 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4333,7 +4333,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; u16 type, result; - if (cmd_len != sizeof(*rsp)) + if (cmd_len < sizeof(*rsp)) return -EPROTO; type = __le16_to_cpu(rsp->type); -- cgit From fcee337704d76446e0d4714cc5eff53e896f7c6f Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Thu, 11 Jul 2013 11:34:28 +0100 Subject: Bluetooth: Fix race between hci_register_dev() and hci_dev_open() If hci_dev_open() is called after hci_register_dev() added the device to the hci_dev_list but before the workqueue are created we could run into a NULL pointer dereference (see below). This bug is very unlikely to happen, systems using bluetoothd to manage their bluetooth devices will never see this happen. BUG: unable to handle kernel NULL pointer dereference 0100 IP: [] __queue_work+0x32/0x3d0 (...) Call Trace: [] queue_work_on+0x45/0x50 [] hci_req_run+0xbf/0xf0 [bluetooth] [] ? hci_init2_req+0x720/0x720 [bluetooth] [] __hci_req_sync+0xd6/0x1c0 [bluetooth] [] ? try_to_wake_up+0x2b0/0x2b0 [] ? usb_autopm_put_interface+0x30/0x40 [] hci_dev_open+0x275/0x2e0 [bluetooth] [] hci_sock_ioctl+0x1f2/0x3f0 [bluetooth] [] sock_do_ioctl+0x30/0x70 [] sock_ioctl+0x79/0x2f0 [] do_vfs_ioctl+0x96/0x560 [] SyS_ioctl+0x91/0xb0 [] system_call_fastpath+0x1a/0x1f Reported-by: Sedat Dilek Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ace5e55fe5a3..64d33d1e14c8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2207,10 +2207,6 @@ int hci_register_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - write_lock(&hci_dev_list_lock); - list_add(&hdev->list, &hci_dev_list); - write_unlock(&hci_dev_list_lock); - hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 1); if (!hdev->workqueue) { @@ -2246,6 +2242,10 @@ int hci_register_dev(struct hci_dev *hdev) if (hdev->dev_type != HCI_AMP) set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + write_lock(&hci_dev_list_lock); + list_add(&hdev->list, &hci_dev_list); + write_unlock(&hci_dev_list_lock); + hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -2258,9 +2258,6 @@ err_wqueue: destroy_workqueue(hdev->req_workqueue); err: ida_simple_remove(&hci_index_ida, hdev->id); - write_lock(&hci_dev_list_lock); - list_del(&hdev->list); - write_unlock(&hci_dev_list_lock); return error; } -- cgit From 555445cd11803c6bc93b2be31968f3949ef7708b Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Wed, 24 Jul 2013 10:39:06 +0200 Subject: neigh: prevent overflowing params in /proc/sys/net/ipv4/neigh/ Without this patch, the fields app_solicit, gc_thresh1, gc_thresh2, gc_thresh3, proxy_qlen, ucast_solicit, mcast_solicit could have assumed negative values when setting large numbers. Signed-off-by: Francesco Fusco Signed-off-by: David S. Miller --- net/core/neighbour.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b7de821f98df..9232c68941ab 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2767,6 +2767,7 @@ EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL static int zero; +static int int_max = INT_MAX; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(struct ctl_table *ctl, int write, @@ -2819,19 +2820,25 @@ static struct neigh_sysctl_table { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_UCAST_PROBE] = { .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_APP_PROBE] = { .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_RETRANS_TIME] = { .procname = "retrans_time", @@ -2874,7 +2881,9 @@ static struct neigh_sysctl_table { .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_ANYCAST_DELAY] = { .procname = "anycast_delay", @@ -2916,19 +2925,25 @@ static struct neigh_sysctl_table { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, {}, }, -- cgit From c74f2b2678f40b80265dd53556f1f778c8e1823f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 26 Jul 2013 11:00:10 +0200 Subject: genetlink: release cb_lock before requesting additional module Requesting external module with cb_lock taken can result in the deadlock like showed below: [ 2458.111347] Showing all locks held in the system: [ 2458.111347] 1 lock held by NetworkManager/582: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_rcv+0x19/0x40 [ 2458.111347] 1 lock held by modprobe/603: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_lock_all+0x15/0x30 [ 2461.579457] SysRq : Show Blocked State [ 2461.580103] task PC stack pid father [ 2461.580103] NetworkManager D ffff880034b84500 4040 582 1 0x00000080 [ 2461.580103] ffff8800197ff720 0000000000000046 00000000001d5340 ffff8800197fffd8 [ 2461.580103] ffff8800197fffd8 00000000001d5340 ffff880019631700 7fffffffffffffff [ 2461.580103] ffff8800197ff880 ffff8800197ff878 ffff880019631700 ffff880019631700 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] schedule_timeout+0x1c1/0x360 [ 2461.580103] [] ? mark_held_locks+0xbb/0x140 [ 2461.580103] [] ? _raw_spin_unlock_irq+0x2c/0x50 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] wait_for_completion_killable+0xe8/0x170 [ 2461.580103] [] ? wake_up_state+0x20/0x20 [ 2461.580103] [] call_usermodehelper_exec+0x1a5/0x210 [ 2461.580103] [] ? wait_for_completion_killable+0x3d/0x170 [ 2461.580103] [] __request_module+0x1b3/0x370 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] ctrl_getfamily+0x159/0x190 [ 2461.580103] [] genl_family_rcv_msg+0x1f4/0x2e0 [ 2461.580103] [] ? genl_family_rcv_msg+0x2e0/0x2e0 [ 2461.580103] [] genl_rcv_msg+0x8e/0xd0 [ 2461.580103] [] netlink_rcv_skb+0xa9/0xc0 [ 2461.580103] [] genl_rcv+0x28/0x40 [ 2461.580103] [] netlink_unicast+0xdd/0x190 [ 2461.580103] [] netlink_sendmsg+0x329/0x750 [ 2461.580103] [] sock_sendmsg+0x99/0xd0 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_non_nested+0x308/0x350 [ 2461.580103] [] ___sys_sendmsg+0x39e/0x3b0 [ 2461.580103] [] ? kvm_clock_read+0x2f/0x50 [ 2461.580103] [] ? sched_clock+0x9/0x10 [ 2461.580103] [] ? sched_clock_local+0x1d/0x80 [ 2461.580103] [] ? sched_clock_cpu+0xa8/0x100 [ 2461.580103] [] ? trace_hardirqs_off+0xd/0x10 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_holdtime.part.28+0xf/0x1a0 [ 2461.580103] [] ? fget_light+0xf9/0x510 [ 2461.580103] [] ? fget_light+0x3c/0x510 [ 2461.580103] [] __sys_sendmsg+0x42/0x80 [ 2461.580103] [] SyS_sendmsg+0x12/0x20 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] modprobe D ffff88000f2c8000 4632 603 602 0x00000080 [ 2461.580103] ffff88000f04fba8 0000000000000046 00000000001d5340 ffff88000f04ffd8 [ 2461.580103] ffff88000f04ffd8 00000000001d5340 ffff8800377d4500 ffff8800377d4500 [ 2461.580103] ffffffff81d0b260 ffffffff81d0b268 ffffffff00000000 ffffffff81d0b2b0 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] rwsem_down_write_failed+0xed/0x1a0 [ 2461.580103] [] ? update_cpu_load_active+0x10/0xb0 [ 2461.580103] [] call_rwsem_down_write_failed+0x13/0x20 [ 2461.580103] [] ? down_write+0x9d/0xb2 [ 2461.580103] [] ? genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_register_family+0x53/0x1f0 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] genl_register_family_with_ops+0x20/0x80 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] nl80211_init+0x24/0xf0 [cfg80211] [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] cfg80211_init+0x43/0xdb [cfg80211] [ 2461.580103] [] do_one_initcall+0xfa/0x1b0 [ 2461.580103] [] ? set_memory_nx+0x43/0x50 [ 2461.580103] [] load_module+0x1c6f/0x27f0 [ 2461.580103] [] ? store_uevent+0x40/0x40 [ 2461.580103] [] SyS_finit_module+0x86/0xb0 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] Sched Debug Version: v0.10, 3.11.0-0.rc1.git4.1.fc20.x86_64 #1 Problem start to happen after adding net-pf-16-proto-16-family-nl80211 alias name to cfg80211 module by below commit (though that commit itself is perfectly fine): commit fb4e156886ce6e8309e912d8b370d192330d19d3 Author: Marcel Holtmann Date: Sun Apr 28 16:22:06 2013 -0700 nl80211: Add generic netlink module alias for cfg80211/nl80211 Reported-and-tested-by: Jeff Layton Reported-by: Richard W.M. Jones Signed-off-by: Stanislaw Gruszka Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2fd6dbea327a..1076fe16b122 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -877,8 +877,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_MODULES if (res == NULL) { genl_unlock(); + up_read(&cb_lock); request_module("net-pf-%d-proto-%d-family-%s", PF_NETLINK, NETLINK_GENERIC, name); + down_read(&cb_lock); genl_lock(); res = genl_family_find_byname(name); } -- cgit From 3f8e2d75c14660abc8b69206f30190ab93304379 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 24 Jul 2013 02:32:46 +0300 Subject: Bluetooth: Fix HCI init for BlueFRITZ! devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit None of the BlueFRITZ! devices with manufacurer ID 31 (AVM Berlin) support HCI_Read_Local_Supported_Commands. It is safe to use the manufacturer ID (instead of e.g. a USB ID specific quirk) because the company never created any newer controllers. < HCI Command: Read Local Supported Comm.. (0x04|0x0002) plen 0 [hci0] 0.210014 > HCI Event: Command Status (0x0f) plen 4 [hci0] 0.217361 Read Local Supported Commands (0x04|0x0002) ncmd 1 Status: Unknown HCI Command (0x01) Reported-by: Jörg Esser Signed-off-by: Johan Hedberg Tested-by: Jörg Esser Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 64d33d1e14c8..0176f200ccb0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -513,7 +513,10 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - if (hdev->hci_ver > BLUETOOTH_VER_1_1) + /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read + * local supported commands HCI command. + */ + if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { -- cgit From 53e21fbc288218a423959f878c86471a0e323a9a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 27 Jul 2013 14:11:14 -0500 Subject: Bluetooth: Fix calling request callback more than once In certain circumstances, such as an HCI driver using __hci_cmd_sync_ev with HCI_EV_CMD_COMPLETE as the expected completion event there is the chance that hci_event_packet will call hci_req_cmd_complete twice (once for the explicitly looked after event and another time in the actual handler of cmd_complete). In the case of __hci_cmd_sync_ev this introduces a race where the first call wakes up the blocking __hci_cmd_sync_ev and lets it complete. However, by the time that a second __hci_cmd_sync_ev call is already in progress the second hci_req_cmd_complete call (from the previous operation) will wake up the blocking function prematurely and cause it to fail, as witnessed by the following log: [ 639.232195] hci_rx_work: hci0 Event packet [ 639.232201] hci_req_cmd_complete: opcode 0xfc8e status 0x00 [ 639.232205] hci_sent_cmd_data: hci0 opcode 0xfc8e [ 639.232210] hci_req_sync_complete: hci0 result 0x00 [ 639.232220] hci_cmd_complete_evt: hci0 opcode 0xfc8e [ 639.232225] hci_req_cmd_complete: opcode 0xfc8e status 0x00 [ 639.232228] __hci_cmd_sync_ev: hci0 end: err 0 [ 639.232234] __hci_cmd_sync_ev: hci0 [ 639.232238] hci_req_add_ev: hci0 opcode 0xfc8e plen 250 [ 639.232242] hci_prepare_cmd: skb len 253 [ 639.232246] hci_req_run: length 1 [ 639.232250] hci_sent_cmd_data: hci0 opcode 0xfc8e [ 639.232255] hci_req_sync_complete: hci0 result 0x00 [ 639.232266] hci_cmd_work: hci0 cmd_cnt 1 cmd queued 1 [ 639.232271] __hci_cmd_sync_ev: hci0 end: err 0 [ 639.232276] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-61) Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0176f200ccb0..48e1e0438f3a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3442,8 +3442,16 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) */ if (hdev->sent_cmd) { req_complete = bt_cb(hdev->sent_cmd)->req.complete; - if (req_complete) + + if (req_complete) { + /* We must set the complete callback to NULL to + * avoid calling the callback more than once if + * this function gets called again. + */ + bt_cb(hdev->sent_cmd)->req.complete = NULL; + goto call_complete; + } } /* Remove all pending commands belonging to this request */ -- cgit From a0db856a95a29efb1c23db55c02d9f0ff4f0db48 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Jul 2013 00:16:21 -0700 Subject: net_sched: Fix stack info leak in cbq_dump_wrr(). Make sure the reserved fields, and padding (if any), are fully initialized. Based upon a patch by Dan Carpenter and feedback from Joe Perches. Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 71a568862557..7a42c81a19eb 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; + memset(&opt, 0, sizeof(opt)); opt.flags = 0; opt.allot = cl->allot; opt.priority = cl->priority + 1; -- cgit From c319d50bfcf678c2857038276d9fab3c6646f3bf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jul 2013 22:34:28 +0200 Subject: nl80211: fix another nl80211_fam.attrbuf race This is similar to the race Linus had reported, but in this case it's an older bug: nl80211_prepare_wdev_dump() uses the wiphy index in cb->args[0] as it is and thus parses the message over and over again instead of just once because 0 is the first valid wiphy index. Similar code in nl80211_testmode_dump() correctly offsets the wiphy_index by 1, do that here as well. Cc: stable@vger.kernel.org Reported-by: Ben Hutchings Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 25d217d90807..3fcba69817e5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -441,10 +441,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, goto out_unlock; } *rdev = wiphy_to_dev((*wdev)->wiphy); - cb->args[0] = (*rdev)->wiphy_idx; + /* 0 is the first index - add 1 to parse only once */ + cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; } else { - struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]); + /* subtract the 1 again here */ + struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; if (!wiphy) { -- cgit From 9ea7187c53f63e31f2d1b2b1e474e31808565009 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 31 Jul 2013 01:19:43 +0200 Subject: NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD Loading a firmware into a target is typically called firmware download, not firmware upload. So we rename the netlink API to NFC_CMD_FW_DOWNLOAD in order to avoid any terminology confusion from userspace. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 20 ++++++++++---------- net/nfc/hci/core.c | 8 ++++---- net/nfc/netlink.c | 12 ++++++------ net/nfc/nfc.h | 6 +++--- 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index dc96a83aa6ab..1d074dd1650f 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,7 +44,7 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) { int rc = 0; @@ -62,28 +62,28 @@ int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) goto error; } - if (!dev->ops->fw_upload) { + if (!dev->ops->fw_download) { rc = -EOPNOTSUPP; goto error; } - dev->fw_upload_in_progress = true; - rc = dev->ops->fw_upload(dev, firmware_name); + dev->fw_download_in_progress = true; + rc = dev->ops->fw_download(dev, firmware_name); if (rc) - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; error: device_unlock(&dev->dev); return rc; } -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; - return nfc_genl_fw_upload_done(dev, firmware_name); + return nfc_genl_fw_download_done(dev, firmware_name); } -EXPORT_SYMBOL(nfc_fw_upload_done); +EXPORT_SYMBOL(nfc_fw_download_done); /** * nfc_dev_up - turn on the NFC device @@ -110,7 +110,7 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } - if (dev->fw_upload_in_progress) { + if (dev->fw_download_in_progress) { rc = -EBUSY; goto error; } diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 7b1c186736eb..fe66908401f5 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -809,14 +809,14 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) } } -static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) +static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (!hdev->ops->fw_upload) + if (!hdev->ops->fw_download) return -ENOTSUPP; - return hdev->ops->fw_upload(hdev, firmware_name); + return hdev->ops->fw_download(hdev, firmware_name); } static struct nfc_ops hci_nfc_ops = { @@ -831,7 +831,7 @@ static struct nfc_ops hci_nfc_ops = { .im_transceive = hci_transceive, .tm_send = hci_tm_send, .check_presence = hci_check_presence, - .fw_upload = hci_fw_upload, + .fw_download = hci_fw_download, .discover_se = hci_discover_se, .enable_se = hci_enable_se, .disable_se = hci_disable_se, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index b05ad909778f..f16fd59d4160 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1089,7 +1089,7 @@ exit: return rc; } -static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) +static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; @@ -1108,13 +1108,13 @@ static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], sizeof(firmware_name)); - rc = nfc_fw_upload(dev, firmware_name); + rc = nfc_fw_download(dev, firmware_name); nfc_put_device(dev); return rc; } -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { struct sk_buff *msg; void *hdr; @@ -1124,7 +1124,7 @@ int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, - NFC_CMD_FW_UPLOAD); + NFC_CMD_FW_DOWNLOAD); if (!hdr) goto free_msg; @@ -1251,8 +1251,8 @@ static struct genl_ops nfc_genl_ops[] = { .policy = nfc_genl_policy, }, { - .cmd = NFC_CMD_FW_UPLOAD, - .doit = nfc_genl_fw_upload, + .cmd = NFC_CMD_FW_DOWNLOAD, + .doit = nfc_genl_fw_download, .policy = nfc_genl_policy, }, { diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index ee85a1fc1b24..820a7850c36a 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -123,10 +123,10 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name); -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name); -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name); int nfc_dev_up(struct nfc_dev *dev); -- cgit From ff862a4668dd6dba962b1d2d8bd344afa6375683 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 28 Jul 2013 23:04:45 +0300 Subject: af_key: more info leaks in pfkey messages This is inspired by a5cc68f3d6 "af_key: fix info leaks in notify messages". There are some struct members which don't get initialized and could disclose small amounts of private information. Acked-by: Mathias Krause Signed-off-by: Dan Carpenter Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/key/af_key.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 9da862070dd8..ab8bd2cabfa0 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2081,6 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; @@ -3137,7 +3138,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; + pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ if (x->id.proto == IPPROTO_AH) @@ -3525,6 +3528,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; -- cgit From e1ee3673a83cc02b6b5e43c9e647d8dd5e1c4e26 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Mon, 29 Jul 2013 12:30:04 +0200 Subject: genetlink: fix usage of NLM_F_EXCL or NLM_F_REPLACE Currently, it is not possible to use neither NLM_F_EXCL nor NLM_F_REPLACE from genetlink. This is due to this checking in genl_family_rcv_msg: if (nlh->nlmsg_flags & NLM_F_DUMP) NLM_F_DUMP is NLM_F_MATCH|NLM_F_ROOT. Thus, if NLM_F_EXCL or NLM_F_REPLACE flag is set, genetlink believes that you're requesting a dump and it calls the .dumpit callback. The solution that I propose is to refine this checking to make it stricter: if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) And given the combination NLM_F_REPLACE and NLM_F_EXCL does not make sense to me, it removes the ambiguity. There was a patch that tried to fix this some time ago (0ab03c2 netlink: test for all flags of the NLM_F_DUMP composite) but it tried to resolve this ambiguity in *all* existing netlink subsystems, not only genetlink. That patch was reverted since it broke iproute2, which is using NLM_F_ROOT to request the dump of the routing cache. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1076fe16b122..512718adb0d5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -571,7 +571,7 @@ static int genl_family_rcv_msg(struct genl_family *family, !capable(CAP_NET_ADMIN)) return -EPERM; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ops->dumpit, .done = ops->done, -- cgit From b56e4b857c5210e848bfb80e074e5756a36cd523 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 31 Jul 2013 12:12:24 -0700 Subject: mac80211: fix infinite loop in ieee80211_determine_chantype Commit "3d9646d mac80211: fix channel selection bug" introduced a possible infinite loop by moving the out target above the chandef_downgrade while loop. When we downgrade to NL80211_CHAN_WIDTH_20_NOHT, we jump back up to re-run the while loop...indefinitely. Replace goto with break and carry on. This may not be sufficient to connect to the AP, but will at least keep the cpu from livelocking. Thanks to Derek Atkins as an extra pair of debugging eyes. Cc: stable@kernel.org Signed-off-by: Chris Wright Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ae31968d42d3..e3e7d2be9e41 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -338,7 +338,7 @@ out: if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; + break; } ret |= chandef_downgrade(chandef); -- cgit From cb236d2d713cff83d024a82b836757d9e2b50715 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jul 2013 23:07:43 +0200 Subject: mac80211: don't wait for TX status forever TX status notification can get lost, or the frames could get stuck on the queue, so don't wait for the callback from the driver forever and instead time out after half a second. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e3e7d2be9e41..e5c3cf405060 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -31,10 +31,12 @@ #include "led.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) +#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) #define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) +#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2) #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 @@ -3394,10 +3396,13 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; - ifmgd->auth_data->timeout_started = true; + auth_data->timeout_started = true; run_again(sdata, auth_data->timeout); } else { - auth_data->timeout_started = false; + auth_data->timeout = + round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG); + auth_data->timeout_started = true; + run_again(sdata, auth_data->timeout); } return 0; @@ -3434,7 +3439,11 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->timeout_started = true; run_again(sdata, assoc_data->timeout); } else { - assoc_data->timeout_started = false; + assoc_data->timeout = + round_jiffies_up(jiffies + + IEEE80211_ASSOC_TIMEOUT_LONG); + assoc_data->timeout_started = true; + run_again(sdata, assoc_data->timeout); } return 0; -- cgit From 5cdaed1e878d723d56d04ae0be1738124acf9f46 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jul 2013 11:23:06 +0200 Subject: mac80211: ignore HT primary channel while connected While we're connected, the AP shouldn't change the primary channel in the HT information. We checked this, and dropped the connection if it did change it. Unfortunately, this is causing problems on some APs, e.g. on the Netgear WRT610NL: the beacons seem to always contain a bad channel and if we made a connection using a probe response (correct data) we drop the connection immediately and can basically not connect properly at all. Work around this by ignoring the HT primary channel information in beacons if we're already connected. Also print out more verbose messages in the other situations to help diagnose similar bugs quicker in the future. Cc: stable@vger.kernel.org [3.10] Acked-by: Andy Isaacson Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e5c3cf405060..077a95360830 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -211,8 +211,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, - struct cfg80211_chan_def *chandef, bool verbose) + struct cfg80211_chan_def *chandef, bool tracking) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_chan_def vht_chandef; u32 ht_cfreq, ret; @@ -231,7 +232,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, channel->band); /* check that channel matches the right operating channel */ - if (channel->center_freq != ht_cfreq) { + if (!tracking && channel->center_freq != ht_cfreq) { /* * It's possible that some APs are confused here; * Netgear WNDR3700 sometimes reports 4 higher than @@ -239,11 +240,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, * since we look at probe response/beacon data here * it should be OK. */ - if (verbose) - sdata_info(sdata, - "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - channel->center_freq, ht_cfreq, - ht_oper->primary_chan, channel->band); + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + channel->center_freq, ht_cfreq, + ht_oper->primary_chan, channel->band); ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } @@ -297,7 +297,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, channel->band); break; default: - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT operation IE has invalid channel width (%d), disable VHT\n", vht_oper->chan_width); @@ -306,7 +306,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_valid(&vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information is invalid, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -319,7 +319,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information doesn't match HT, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -346,7 +346,7 @@ out: ret |= chandef_downgrade(chandef); } - if (chandef->width != vht_chandef.width && verbose) + if (chandef->width != vht_chandef.width && !tracking) sdata_info(sdata, "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); @@ -386,7 +386,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, /* calculate new channel (type) based on HT/VHT operation IEs */ flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper, - vht_oper, &chandef, false); + vht_oper, &chandef, true); /* * Downgrade the new channel if we associated with restricted @@ -3838,7 +3838,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, ht_oper, vht_oper, - &chandef, true); + &chandef, false); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), local->rx_chains); -- cgit From 74418edec915d0f446debebde08d170c7b8ba0ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jul 2013 10:11:25 +0200 Subject: cfg80211: fix P2P GO interface teardown When a P2P GO interface goes down, cfg80211 doesn't properly tear it down, leading to warnings later. Add the GO interface type to the enumeration to tear it down like AP interfaces. Otherwise, we leave it pending and mac80211's state can get very confused, leading to warnings later. Cc: stable@vger.kernel.org Reported-by: Ilan Peer Tested-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 4f9f216665e9..a8c29fa4f1b3 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -765,6 +765,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, dev); break; default: -- cgit From ddfe49b42d8ad4bfdf92d63d4a74f162660d878d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jul 2013 20:52:03 +0200 Subject: mac80211: continue using disabled channels while connected In case the AP has different regulatory information than we do, it can happen that we connect to an AP based on e.g. the world roaming regulatory data, and then update our database with the AP's country information disables the channel the AP is using. If this happens on an HT AP, the bandwidth tracking code will hit the WARN_ON() and disconnect. Since that's not very useful, ignore the channel-disable flag in bandwidth tracking. Cc: stable@vger.kernel.org Reported-by: Chris Wright Tested-by: Chris Wright Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 077a95360830..cc9e02d79b55 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -335,8 +335,17 @@ out: if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; + /* + * Ignore the DISABLED flag when we're already connected and only + * tracking the APs beacon for bandwidth changes - otherwise we + * might get disconnected here if we connect to an AP, update our + * regulatory information based on the AP's country IE and the + * information we have is wrong/outdated and disables the channel + * that we're actually using for the connection to the AP. + */ while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) { + tracking ? 0 : + IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; -- cgit From 8cb3b9c3642c0263d48f31d525bcee7170eedc20 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Jul 2013 13:23:39 +0300 Subject: net_sched: info leak in atm_tc_dump_class() The "pvc" struct has a hole after pvc.sap_family which is not cleared. Signed-off-by: Dan Carpenter Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ca8e0a57d945..1f9c31411f19 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, struct sockaddr_atmpvc pvc; int state; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; -- cgit From b00589af3b04736376f24625ab0b394642e89e29 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 1 Aug 2013 01:06:20 +0200 Subject: bridge: disable snooping if there is no querier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is no querier on a link then we won't get periodic reports and therefore won't be able to learn about multicast listeners behind ports, potentially leading to lost multicast packets, especially for multicast listeners that joined before the creation of the bridge. These lost multicast packets can appear since c5c23260594 ("bridge: Add multicast_querier toggle and disable queries by default") in particular. With this patch we are flooding multicast packets if our querier is disabled and if we didn't detect any other querier. A grace period of the Maximum Response Delay of the querier is added to give multicast responses enough time to arrive and to be learned from before disabling the flooding behaviour again. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_device.c | 3 ++- net/bridge/br_input.c | 3 ++- net/bridge/br_multicast.c | 39 ++++++++++++++++++++++++++++++--------- net/bridge/br_private.h | 12 ++++++++++++ 4 files changed, 46 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 2ef66781fedb..69363bd37f64 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -70,7 +70,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) } mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb, false); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1b8b8b824cd7..8c561c0aa636 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -101,7 +101,8 @@ int br_handle_frame_finish(struct sk_buff *skb) unicast = false; } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4b99c9a27044..61c5e819380e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1014,6 +1014,16 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif +static void br_multicast_update_querier_timer(struct net_bridge *br, + unsigned long max_delay) +{ + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + mod_timer(&br->multicast_querier_timer, + jiffies + br->multicast_querier_interval); +} + /* * Add port to router_list * list is maintained ordered by pointer value @@ -1064,11 +1074,11 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, - int saddr) + int saddr, + unsigned long max_delay) { if (saddr) - mod_timer(&br->multicast_querier_timer, - jiffies + br->multicast_querier_interval); + br_multicast_update_querier_timer(br, max_delay); else if (timer_pending(&br->multicast_querier_timer)) return; @@ -1096,8 +1106,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !!iph->saddr); - group = ih->group; if (skb->len == sizeof(*ih)) { @@ -1121,6 +1129,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + br_multicast_query_received(br, port, !!iph->saddr, max_delay); + if (!group) goto out; @@ -1176,8 +1186,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1198,6 +1206,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } + br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr), + max_delay); + if (!group) goto out; @@ -1643,6 +1654,8 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; + br->multicast_querier_delay_time = 0; + spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); @@ -1831,6 +1844,8 @@ unlock: int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { + unsigned long max_delay; + val = !!val; spin_lock_bh(&br->multicast_lock); @@ -1838,8 +1853,14 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) goto unlock; br->multicast_querier = val; - if (val) - br_multicast_start_querier(br); + if (!val) + goto unlock; + + max_delay = br->multicast_query_response_interval; + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + br_multicast_start_querier(br); unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3be89b3ce17b..2f7da41851bf 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -267,6 +267,7 @@ struct net_bridge unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; + unsigned long multicast_querier_delay_time; spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; @@ -501,6 +502,13 @@ static inline bool br_multicast_is_router(struct net_bridge *br) (br->multicast_router == 1 && timer_pending(&br->multicast_router_timer)); } + +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return time_is_before_jiffies(br->multicast_querier_delay_time) && + (br->multicast_querier || + timer_pending(&br->multicast_querier_timer)); +} #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, @@ -557,6 +565,10 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return false; +} static inline void br_mdb_init(void) { } -- cgit From 71ffe9c77dd7a2b62207953091efa8dafec958dd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 25 Jul 2013 10:37:49 +0200 Subject: netfilter: xt_TCPMSS: fix handling of malformed TCP header and options Make sure the packet has enough room for the TCP header and that it is not malformed. While at it, store tcph->doff*4 in a variable, as it is used several times. This patch also fixes a possible off by one in case of malformed TCP options. Reported-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 7011c71646f0..6113cc7efffc 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -52,7 +52,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, { const struct xt_tcpmss_info *info = par->targinfo; struct tcphdr *tcph; - unsigned int tcplen, i; + int len, tcp_hdrlen; + unsigned int i; __be16 oldval; u16 newmss; u8 *opt; @@ -64,11 +65,14 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (!skb_make_writable(skb, skb->len)) return -1; - tcplen = skb->len - tcphoff; + len = skb->len - tcphoff; + if (len < (int)sizeof(struct tcphdr)) + return -1; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + tcp_hdrlen = tcph->doff * 4; - /* Header cannot be larger than the packet */ - if (tcplen < tcph->doff*4) + if (len < tcp_hdrlen) return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { @@ -87,9 +91,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, newmss = info->mss; opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { - if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && - opt[i+1] == TCPOLEN_MSS) { + for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) { u_int16_t oldmss; oldmss = (opt[i+2] << 8) | opt[i+3]; @@ -112,9 +115,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, } /* There is data after the header so the option can't be added - without moving it, and doing so may make the SYN packet - itself too large. Accept the packet unmodified instead. */ - if (tcplen > tcph->doff*4) + * without moving it, and doing so may make the SYN packet + * itself too large. Accept the packet unmodified instead. + */ + if (len > tcp_hdrlen) return 0; /* @@ -143,10 +147,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, newmss = min(newmss, (u16)1220); opt = (u_int8_t *)tcph + sizeof(struct tcphdr); - memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); + memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); inet_proto_csum_replace2(&tcph->check, skb, - htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); + htons(len), htons(len + TCPOLEN_MSS), 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; -- cgit From a206bcb3b02025b23137f3228109d72e0f835c05 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 25 Jul 2013 10:46:46 +0200 Subject: netfilter: xt_TCPOPTSTRIP: fix possible off by one access Fix a possible off by one access since optlen() touches opt[offset+1] unsafely when i == tcp_hdrlen(skb) - 1. This patch replaces tcp_hdrlen() by the local variable tcp_hdrlen that stores the TCP header length, to save some cycles. Reported-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPOPTSTRIP.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index b68fa191710f..625fa1d636a0 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -38,7 +38,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, struct tcphdr *tcph; u_int16_t n, o; u_int8_t *opt; - int len; + int len, tcp_hdrlen; /* This is a fragment, no TCP header is available */ if (par->fragoff != 0) @@ -52,7 +52,9 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, return NF_DROP; tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); - if (tcph->doff * 4 > len) + tcp_hdrlen = tcph->doff * 4; + + if (len < tcp_hdrlen) return NF_DROP; opt = (u_int8_t *)tcph; @@ -61,10 +63,10 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, * Walk through all TCP options - if we find some option to remove, * set all octets to %TCPOPT_NOP and adjust checksum. */ - for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) { + for (i = sizeof(struct tcphdr); i < tcp_hdrlen - 1; i += optl) { optl = optlen(opt, i); - if (i + optl > tcp_hdrlen(skb)) + if (i + optl > tcp_hdrlen) break; if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i])) -- cgit From 447383d2ba6061bb069da45f95f223a01bba61dd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Jul 2013 11:30:23 +1000 Subject: NFSD/sunrpc: avoid deadlock on TCP connection due to memory pressure. Since we enabled auto-tuning for sunrpc TCP connections we do not guarantee that there is enough write-space on each connection to queue a reply. If memory pressure causes the window to shrink too small, the request throttling in sunrpc/svc will not accept any requests so no more requests will be handled. Even when pressure decreases the window will not grow again until data is sent on the connection. This means we get a deadlock: no requests will be handled until there is more space, and no space will be allocated until a request is handled. This can be simulated by modifying svc_tcp_has_wspace to inflate the number of byte required and removing the 'svc_sock_setbufsize' calls in svc_setup_socket. I found that multiplying by 16 was enough to make the requirement exceed the default allocation. With this modification in place: mount -o vers=3,proto=tcp 127.0.0.1:/home /mnt would block and eventually time out because the nfs server could not accept any requests. This patch relaxes the request throttling to always allow at least one request through per connection. It does this by checking both sk_stream_min_wspace() and xprt->xpt_reserved are zero. The first is zero when the TCP transmit queue is empty. The second is zero when there are no RPC requests being processed. When both of these are zero the socket is idle and so one more request can safely be allowed through. Applying this patch allows the above mount command to succeed cleanly. Tracing shows that the allocated write buffer space quickly grows and after a few requests are handled, the extra tests are no longer needed to permit further requests to be processed. The main purpose of request throttling is to handle the case when one client is slow at collecting replies and the send queue gets full of replies that the client hasn't acknowledged (at the TCP level) yet. As we only change behaviour when the send queue is empty this main purpose is still preserved. Reported-by: Ben Myers Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 305374d4fb98..7762b9f8a8b7 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1193,7 +1193,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) return 1; required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; - if (sk_stream_wspace(svsk->sk_sk) >= required) + if (sk_stream_wspace(svsk->sk_sk) >= required || + (sk_stream_min_wspace(svsk->sk_sk) == 0 && + atomic_read(&xprt->xpt_reserved) == 0)) return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); return 0; -- cgit From 9f96392b0ae6aefc02a9b900c3f4889dfafc8402 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Jun 2013 16:06:44 -0400 Subject: svcrpc: fix gss_rpc_upcall create error Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index d304f41260f2..1e1ccf539fac 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL gssproxy " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); *_clnt = NULL; goto out; } -- cgit From dc43376c26cef74226174a2394f37f2a3f8a8639 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Jun 2013 10:11:19 -0400 Subject: svcrpc: fix gss-proxy xdr decoding oops Uninitialized stack data was being used as the destination for memcpy's. Longer term we'll just delete some of this code; all we're doing is skipping over xdr that we don't care about. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 357f613df7ff..3c85d1c8a028 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -430,7 +430,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr, static int dummy_dec_nameattr_array(struct xdr_stream *xdr, struct gssx_name_attr_array *naa) { - struct gssx_name_attr dummy; + struct gssx_name_attr dummy = { .attr = {.len = 0} }; u32 count, i; __be32 *p; @@ -493,12 +493,13 @@ static int gssx_enc_name(struct xdr_stream *xdr, return err; } + static int gssx_dec_name(struct xdr_stream *xdr, struct gssx_name *name) { - struct xdr_netobj dummy_netobj; - struct gssx_name_attr_array dummy_name_attr_array; - struct gssx_option_array dummy_option_array; + struct xdr_netobj dummy_netobj = { .len = 0 }; + struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 }; + struct gssx_option_array dummy_option_array = { .count = 0 }; int err; /* name->display_name */ -- cgit From 743e217129f69aab074abe520a464fd0c6b1cca1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 31 Jul 2013 14:11:14 -0400 Subject: svcrpc: fix kfree oops in gss-proxy code mech_oid.data is an array, not kmalloc()'d memory. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 1e1ccf539fac..af7ffd447fee 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -328,7 +328,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data) kfree(data->in_handle.data); kfree(data->out_handle.data); kfree(data->out_token.data); - kfree(data->mech_oid.data); free_svc_cred(&data->creds); } -- cgit From 7193bd17ea92c4c89016c304362c9be93ce50050 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 31 Jul 2013 17:51:42 -0400 Subject: svcrpc: set cr_gss_mech from gss-proxy as well as legacy upcall The change made to rsc_parse() in 0dc1531aca7fd1440918bd55844a054e9c29acad "svcrpc: store gss mech in svc_cred" should also have been propagated to the gss-proxy codepath. This fixes a crash in the gss-proxy case. Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d0347d148b34..09fb638bcaa4 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1180,6 +1180,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, gm = gss_mech_get_by_OID(&ud->mech_oid); if (!gm) goto out; + rsci.cred.cr_gss_mech = gm; status = -EINVAL; /* mech-specific data: */ @@ -1195,7 +1196,6 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); -- cgit From 2ac3ac8f86f2fe065d746d9a9abaca867adec577 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Thu, 1 Aug 2013 10:04:14 +0200 Subject: ipv6: prevent fib6_run_gc() contention On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 19 ++++++++----------- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 4 ++-- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5fc9c7a68d8d..d872553ca933 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1632,19 +1632,16 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long expires, struct net *net) +void fib6_run_gc(unsigned long expires, struct net *net, bool force) { - if (expires != ~0UL) { + if (force) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = expires ? (int)expires : - net->ipv6.sysctl.ip6_rt_gc_interval; - } else { - if (!spin_trylock_bh(&fib6_gc_lock)) { - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - return; - } - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; + } else if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); + return; } + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = icmp6_dst_gc(); @@ -1661,7 +1658,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) static void fib6_gc_timer_cb(unsigned long arg) { - fib6_run_gc(0, (struct net *)arg); + fib6_run_gc(0, (struct net *)arg, true); } static int __net_init fib6_net_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 24c03396e008..79aa9652ed86 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1576,7 +1576,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); idev = in6_dev_get(dev); if (!idev) break; @@ -1586,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a8c891aa2464..824c424f9648 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1326,7 +1326,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) @@ -2827,7 +2827,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } -- cgit From 49a18d86f66d33a20144ecb5a34bba0d1856b260 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Thu, 1 Aug 2013 10:04:24 +0200 Subject: ipv6: update ip6_rt_last_gc every time GC is run As pointed out by Eric Dumazet, net->ipv6.ip6_rt_last_gc should hold the last time garbage collector was run so that we should update it whenever fib6_run_gc() calls fib6_clean_all(), not only if we got there from ip6_dst_gc(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 +++++- net/ipv6/route.c | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d872553ca933..bff3d821c7eb 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1634,6 +1634,8 @@ static DEFINE_SPINLOCK(fib6_gc_lock); void fib6_run_gc(unsigned long expires, struct net *net, bool force) { + unsigned long now; + if (force) { spin_lock_bh(&fib6_gc_lock); } else if (!spin_trylock_bh(&fib6_gc_lock)) { @@ -1646,10 +1648,12 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); + now = jiffies; + net->ipv6.ip6_rt_last_gc = now; if (gc_args.more) mod_timer(&net->ipv6.ip6_fib_timer, - round_jiffies(jiffies + round_jiffies(now + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 824c424f9648..b70f8979003b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1311,7 +1311,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), static int ip6_dst_gc(struct dst_ops *ops) { - unsigned long now = jiffies; struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; @@ -1321,13 +1320,12 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); - if (time_after(rt_last_gc + rt_min_interval, now) && + if (time_after(rt_last_gc + rt_min_interval, jiffies) && entries <= rt_max_size) goto out; net->ipv6.ip6_rt_gc_expire++; fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); - net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; -- cgit From 3f8f52982ad020f0704548c46de66bf464d3b967 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Aug 2013 10:41:27 +0200 Subject: ipv6: move peer_addr init into ipv6_add_addr() Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cfdcf7b2daf6..a0ce957fb671 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -813,7 +813,8 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * -ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, +ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, + const struct in6_addr *peer_addr, int pfxlen, int scope, u32 flags) { struct inet6_ifaddr *ifa = NULL; @@ -863,6 +864,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, } ifa->addr = *addr; + if (peer_addr) + ifa->peer_addr = *peer_addr; spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); @@ -1123,8 +1126,8 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr), - addr_flags) : NULL; + ipv6_add_addr(idev, &addr, NULL, tmp_plen, + ipv6_addr_scope(&addr), addr_flags) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -2179,7 +2182,8 @@ ok: */ if (!max_addresses || ipv6_count_addresses(in6_dev) < max_addresses) - ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, + ifp = ipv6_add_addr(in6_dev, &addr, NULL, + pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags); @@ -2455,15 +2459,13 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p prefered_lft = timeout; } - ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; - if (peer_pfx) - ifp->peer_addr = *peer_pfx; spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, @@ -2557,7 +2559,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); + ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2683,7 +2685,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); -- cgit From 8a226b2cfa776db6011fc84b71578513161cd3d3 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 1 Aug 2013 10:41:28 +0200 Subject: ipv6: prevent race between address creation and removal There's a race in IPv6 automatic addess assignment. The address is created with zero lifetime when it's added to various address lists. Before it gets assigned the correct lifetime, there's a window where a new address may be configured. This causes the semi-initiated address to be deleted in addrconf_verify. This was discovered as a reference leak caused by concurrent run of __ipv6_ifa_notify for both RTM_NEWADDR and RTM_DELADDR with the same address. Fix this by setting the lifetime before the address is added to inet6_addr_lst. A few notes: 1. In addrconf_prefix_rcv, by setting update_lft to zero, the if (update_lft) { ... } condition is no longer executed for newly created addresses. This is okay, as the ifp fields are set in ipv6_add_addr now and ipv6_ifa_notify is called (and has been called) through addrconf_dad_start. 2. The removal of the whole block under ifp->lock in inet6_addr_add is okay, too, as tstamp is initialized to jiffies in ipv6_add_addr. Signed-off-by: Jiri Benc Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a0ce957fb671..da4241c8c7da 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -815,7 +815,7 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, - int scope, u32 flags) + int scope, u32 flags, u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; @@ -875,6 +875,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->scope = scope; ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; + ifa->valid_lft = valid_lft; + ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; ifa->tokenized = false; @@ -1127,7 +1129,8 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? ipv6_add_addr(idev, &addr, NULL, tmp_plen, - ipv6_addr_scope(&addr), addr_flags) : NULL; + ipv6_addr_scope(&addr), addr_flags, + tmp_valid_lft, tmp_prefered_lft) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -1139,8 +1142,6 @@ retry: spin_lock_bh(&ift->lock); ift->ifpub = ifp; - ift->valid_lft = tmp_valid_lft; - ift->prefered_lft = tmp_prefered_lft; ift->cstamp = now; ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); @@ -2185,14 +2186,16 @@ ok: ifp = ipv6_add_addr(in6_dev, &addr, NULL, pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, - addr_flags); + addr_flags, valid_lft, + prefered_lft); if (IS_ERR_OR_NULL(ifp)) { in6_dev_put(in6_dev); return; } - update_lft = create = 1; + update_lft = 0; + create = 1; ifp->cstamp = jiffies; ifp->tokenized = tokenized; addrconf_dad_start(ifp); @@ -2213,7 +2216,7 @@ ok: stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!update_lft && stored_lft) { + if (!update_lft && !create && stored_lft) { if (valid_lft > MIN_VALID_LIFETIME || valid_lft > stored_lft) update_lft = 1; @@ -2459,15 +2462,10 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p prefered_lft = timeout; } - ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags); + ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, + valid_lft, prefered_lft); if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - ifp->tstamp = jiffies; - spin_unlock_bh(&ifp->lock); - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, expires, flags); /* @@ -2559,7 +2557,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT); + ifp = ipv6_add_addr(idev, addr, NULL, plen, + scope, IFA_F_PERMANENT, 0, 0); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2685,7 +2684,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); -- cgit From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:25 +0800 Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL Eliezer renames several *ll_poll to *busy_poll, but forgets CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/Kconfig | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 6 +++--- net/core/sysctl_net_core.c | 2 +- net/socket.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 37702491abe9..2b406608a1a4 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -244,7 +244,7 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis -config NET_LL_RX_POLL +config NET_RX_BUSY_POLL boolean default y diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3df4d4ccf440..2c3d0f53d198 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -740,7 +740,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_copy_secmark(new, old); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL new->napi_id = old->napi_id; #endif } diff --git a/net/core/sock.c b/net/core/sock.c index 548d716c5f62..2c097c5a35dd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -900,7 +900,7 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) @@ -1170,7 +1170,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; @@ -2292,7 +2292,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; sk->sk_ll_usec = sysctl_net_busy_read; #endif diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 660968616637..b59b6804fd98 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -298,7 +298,7 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL { .procname = "busy_poll", .data = &sysctl_net_busy_poll, diff --git a/net/socket.c b/net/socket.c index 829b460acb87..b2d7c629eeb9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,7 +106,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -- cgit From c756891a4e1c08c43780e17aca1d2b849ef31d1a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 1 Aug 2013 08:29:18 -0400 Subject: tipc: fix oops when creating server socket fails When creation of TIPC internal server socket fails, we get an oops with the following dump: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] tipc_close_conn+0x59/0xb0 [tipc] PGD 13719067 PUD 12008067 PMD 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: tipc(+) CPU: 4 PID: 4340 Comm: insmod Not tainted 3.10.0+ #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff880014360000 ti: ffff88001374c000 task.ti: ffff88001374c000 RIP: 0010:[] [] tipc_close_conn+0x59/0xb0 [tipc] RSP: 0018:ffff88001374dc98 EFLAGS: 00010292 RAX: 0000000000000000 RBX: ffff880012ac09d8 RCX: 0000000000000000 RDX: 0000000000000046 RSI: 0000000000000001 RDI: ffff880014360000 RBP: ffff88001374dcb8 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa0016fa0 R13: ffffffffa0017010 R14: ffffffffa0017010 R15: ffff880012ac09d8 FS: 0000000000000000(0000) GS:ffff880016600000(0063) knlGS:00000000f76668d0 CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b CR2: 0000000000000020 CR3: 0000000012227000 CR4: 00000000000006e0 Stack: ffff88001374dcb8 ffffffffa0016fa0 0000000000000000 0000000000000001 ffff88001374dcf8 ffffffffa0012922 ffff88001374dce8 00000000ffffffea ffffffffa0017100 0000000000000000 ffff8800134241a8 ffffffffa0017150 Call Trace: [] tipc_server_stop+0xa2/0x1b0 [tipc] [] tipc_subscr_stop+0x15/0x20 [tipc] [] tipc_core_stop+0x1d/0x33 [tipc] [] tipc_init+0xd4/0xf8 [tipc] [] ? 0xffffffffa001efff [] do_one_initcall+0x3f/0x150 [] ? __blocking_notifier_call_chain+0x7d/0xd0 [] load_module+0x11aa/0x19c0 [] ? show_initstate+0x50/0x50 [] ? retint_restore_args+0xe/0xe [] SyS_init_module+0xd9/0x110 [] sysenter_dispatch+0x7/0x1f Code: 6c 24 70 4c 89 ef e8 b7 04 8f e1 8b 73 04 4c 89 e7 e8 7c 9e 32 e1 41 83 ac 24 b8 00 00 00 01 4c 89 ef e8 eb 0a 8f e1 48 8b 43 08 <4c> 8b 68 20 4d 8d a5 48 03 00 00 4c 89 e7 e8 04 05 8f e1 4c 89 RIP [] tipc_close_conn+0x59/0xb0 [tipc] RSP CR2: 0000000000000020 ---[ end trace b02321f40e4269a3 ]--- We have the following call chain: tipc_core_start() ret = tipc_subscr_start() ret = tipc_server_start(){ server->enabled = 1; ret = tipc_open_listening_sock() } I.e., the server->enabled flag is unconditionally set to 1, whatever the return value of tipc_open_listening_sock(). This causes a crash when tipc_core_start() tries to clean up resources after a failed initialization: if (ret == failed) tipc_subscr_stop() tipc_server_stop(){ if (server->enabled) tipc_close_conn(){ NULL reference of con->sock-sk OOPS! } } To avoid this, tipc_server_start() should only set server->enabled to 1 in case of a succesful socket creation. In case of failure, it should release all allocated resources before returning. Problem introduced in commit c5fa7b3cf3cb22e4ac60485fc2dc187fe012910f ("tipc: introduce new TIPC server infrastructure") in v3.11-rc1. Note that it won't be seen often; it takes a module load under memory constrained conditions in order to trigger the failure condition. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/server.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/server.c b/net/tipc/server.c index 19da5abe0fa6..fd3fa57a410e 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -355,8 +355,12 @@ static int tipc_open_listening_sock(struct tipc_server *s) return PTR_ERR(con); sock = tipc_create_listen_sock(con); - if (!sock) + if (!sock) { + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + kfree(con); return -EINVAL; + } tipc_register_callbacks(sock, con); return 0; @@ -563,9 +567,14 @@ int tipc_server_start(struct tipc_server *s) kmem_cache_destroy(s->rcvbuf_cache); return ret; } + ret = tipc_open_listening_sock(s); + if (ret < 0) { + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } s->enabled = 1; - - return tipc_open_listening_sock(s); + return ret; } void tipc_server_stop(struct tipc_server *s) -- cgit From cbd375567f7e4811b1c721f75ec519828ac6583f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 1 Aug 2013 22:32:07 -0700 Subject: htb: fix sign extension bug When userspace passes a large priority value the assignment of the unsigned value hopt->prio to signed int cl->prio causes cl->prio to become negative and the comparison is with TC_HTB_NUMPRIO is always false. The result is that HTB crashes by referencing outside the array when processing packets. With this patch the large value wraps around like other values outside the normal range. See: https://bugzilla.kernel.org/show_bug.cgi?id=60669 Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c2124ea29f45..45e751527dfc 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -100,7 +100,7 @@ struct htb_class { struct psched_ratecfg ceil; s64 buffer, cbuffer;/* token bucket depth/rate */ s64 mbuffer; /* max wait time */ - int prio; /* these two are used only by leaves... */ + u32 prio; /* these two are used only by leaves... */ int quantum; /* but stored for parent-to-leaf return */ struct tcf_proto *filter_list; /* class attached filters */ -- cgit From 446266b0c742a2c9ee8f0dce759a0117bce58a86 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Aug 2013 11:32:43 +0200 Subject: net: rtm_to_ifaddr: free ifa if ifa_cacheinfo processing fails Commit 5c766d642 ("ipv4: introduce address lifetime") leaves the ifa resource that was allocated via inet_alloc_ifa() unfreed when returning the function with -EINVAL. Thus, free it first via inet_free_ifa(). Signed-off-by: Daniel Borkmann Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 8d48c392adcc..34ca6d5a3a4b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -772,7 +772,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -780,6 +780,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } -- cgit From 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 2 Aug 2013 18:36:40 +0400 Subject: net: check net.core.somaxconn sysctl values It's possible to assign an invalid value to the net.core.somaxconn sysctl variable, because there is no checks at all. The sk_max_ack_backlog field of the sock structure is defined as unsigned short. Therefore, the backlog argument in inet_listen() shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall is truncated to the somaxconn value. So, the somaxconn value shouldn't exceed 65535 (USHRT_MAX). Also, negative values of somaxconn are meaningless. before: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 net.core.somaxconn = 65536 $ sysctl -w net.core.somaxconn=-100 net.core.somaxconn = -100 after: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 error: "Invalid argument" setting key "net.core.somaxconn" $ sysctl -w net.core.somaxconn=-100 error: "Invalid argument" setting key "net.core.somaxconn" Based on a prior patch from Changli Gao. Signed-off-by: Roman Gushchin Reported-by: Changli Gao Suggested-by: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b59b6804fd98..31107abd2783 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -21,7 +21,9 @@ #include #include +static int zero = 0; static int one = 1; +static int ushort_max = USHRT_MAX; #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, @@ -339,7 +341,9 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .extra1 = &zero, + .extra2 = &ushort_max, + .proc_handler = proc_dointvec_minmax }, { } }; -- cgit From 6a8b7f0c85f1f42eb8b6e68ef3d5ba8020d8e272 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 2 Aug 2013 14:45:08 -0400 Subject: netlabel: use domain based selectors when address based selectors are not available NetLabel has the ability to selectively assign network security labels to outbound traffic based on either the LSM's "domain" (different for each LSM), the network destination, or a combination of both. Depending on the type of traffic, local or forwarded, and the type of traffic selector, domain or address based, different hooks are used to label the traffic; the goal being minimal overhead. Unfortunately, there is a bug such that a system using NetLabel domain based traffic selectors does not correctly label outbound local traffic that is not assigned to a socket. The issue is that in these cases the associated NetLabel hook only looks at the address based selectors and not the domain based selectors. This patch corrects this by checking both the domain and address based selectors so that the correct labeling is applied, regardless of the configuration type. In order to acomplish this fix, this patch also simplifies some of the NetLabel domainhash structures to use a more common outbound traffic mapping type: struct netlbl_dommap_def. This simplifies some of the code in this patch and paves the way for further simplifications in the future. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_cipso_v4.c | 4 +- net/netlabel/netlabel_domainhash.c | 104 +++++++++++++++++-------------------- net/netlabel/netlabel_domainhash.h | 46 ++++++++-------- net/netlabel/netlabel_kapi.c | 88 +++++++++++++------------------ net/netlabel/netlabel_mgmt.c | 44 ++++++++-------- net/netlabel/netlabel_unlabeled.c | 2 +- 6 files changed, 130 insertions(+), 158 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c15042f987bd..a1100640495d 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -691,8 +691,8 @@ static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) { struct netlbl_domhsh_walk_arg *cb_arg = arg; - if (entry->type == NETLBL_NLTYPE_CIPSOV4 && - entry->type_def.cipsov4->doi == cb_arg->doi) + if (entry->def.type == NETLBL_NLTYPE_CIPSOV4 && + entry->def.cipso->doi == cb_arg->doi) return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); return 0; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 6bb1d42f0fac..85d842e6e431 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -84,15 +84,15 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) #endif /* IPv6 */ ptr = container_of(entry, struct netlbl_dom_map, rcu); - if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { + if (ptr->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_safe(iter4, tmp4, - &ptr->type_def.addrsel->list4) { + &ptr->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); kfree(netlbl_domhsh_addr4_entry(iter4)); } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &ptr->type_def.addrsel->list6) { + &ptr->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); kfree(netlbl_domhsh_addr6_entry(iter6)); } @@ -213,21 +213,21 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, if (addr4 != NULL) { struct netlbl_domaddr4_map *map4; map4 = netlbl_domhsh_addr4_entry(addr4); - type = map4->type; - cipsov4 = map4->type_def.cipsov4; + type = map4->def.type; + cipsov4 = map4->def.cipso; netlbl_af4list_audit_addr(audit_buf, 0, NULL, addr4->addr, addr4->mask); #if IS_ENABLED(CONFIG_IPV6) } else if (addr6 != NULL) { struct netlbl_domaddr6_map *map6; map6 = netlbl_domhsh_addr6_entry(addr6); - type = map6->type; + type = map6->def.type; netlbl_af6list_audit_addr(audit_buf, 0, NULL, &addr6->addr, &addr6->mask); #endif /* IPv6 */ } else { - type = entry->type; - cipsov4 = entry->type_def.cipsov4; + type = entry->def.type; + cipsov4 = entry->def.cipso; } switch (type) { case NETLBL_NLTYPE_UNLABELED: @@ -265,26 +265,25 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) if (entry == NULL) return -EINVAL; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (entry->type_def.cipsov4 != NULL || - entry->type_def.addrsel != NULL) + if (entry->def.cipso != NULL || entry->def.addrsel != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (entry->type_def.cipsov4 == NULL) + if (entry->def.cipso == NULL) return -EINVAL; break; case NETLBL_NLTYPE_ADDRSELECT: - netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach(iter4, &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (map4->type_def.cipsov4 != NULL) + if (map4->def.cipso != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (map4->type_def.cipsov4 == NULL) + if (map4->def.cipso == NULL) return -EINVAL; break; default: @@ -292,9 +291,9 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) } } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach(iter6, &entry->def.addrsel->list6) { map6 = netlbl_domhsh_addr6_entry(iter6); - switch (map6->type) { + switch (map6->def.type) { case NETLBL_NLTYPE_UNLABELED: break; default: @@ -402,32 +401,31 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, rcu_assign_pointer(netlbl_domhsh_def, entry); } - if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { + if (entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + &entry->def.addrsel->list4) netlbl_domhsh_audit_add(entry, iter4, NULL, ret_val, audit_info); #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + &entry->def.addrsel->list6) netlbl_domhsh_audit_add(entry, NULL, iter6, ret_val, audit_info); #endif /* IPv6 */ } else netlbl_domhsh_audit_add(entry, NULL, NULL, ret_val, audit_info); - } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && - entry->type == NETLBL_NLTYPE_ADDRSELECT) { + } else if (entry_old->def.type == NETLBL_NLTYPE_ADDRSELECT && + entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { struct list_head *old_list4; struct list_head *old_list6; - old_list4 = &entry_old->type_def.addrsel->list4; - old_list6 = &entry_old->type_def.addrsel->list6; + old_list4 = &entry_old->def.addrsel->list4; + old_list6 = &entry_old->def.addrsel->list6; /* we only allow the addition of address selectors if all of * the selectors do not exist in the existing domain map */ - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) if (netlbl_af4list_search_exact(iter4->addr, iter4->mask, old_list4)) { @@ -435,8 +433,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, goto add_return; } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) if (netlbl_af6list_search_exact(&iter6->addr, &iter6->mask, old_list6)) { @@ -446,7 +443,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, #endif /* IPv6 */ netlbl_af4list_foreach_safe(iter4, tmp4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); iter4->valid = 1; ret_val = netlbl_af4list_add(iter4, old_list4); @@ -457,7 +454,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &entry->type_def.addrsel->list6) { + &entry->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); iter6->valid = 1; ret_val = netlbl_af6list_add(iter6, old_list6); @@ -538,18 +535,18 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, struct netlbl_af4list *iter4; struct netlbl_domaddr4_map *map4; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - cipso_v4_doi_putdef(map4->type_def.cipsov4); + cipso_v4_doi_putdef(map4->def.cipso); } /* no need to check the IPv6 list since we currently * support only unlabeled protocols for IPv6 */ break; case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); break; } call_rcu(&entry->rcu, netlbl_domhsh_free_entry); @@ -590,20 +587,21 @@ int netlbl_domhsh_remove_af4(const char *domain, entry_map = netlbl_domhsh_search(domain); else entry_map = netlbl_domhsh_search_def(domain); - if (entry_map == NULL || entry_map->type != NETLBL_NLTYPE_ADDRSELECT) + if (entry_map == NULL || + entry_map->def.type != NETLBL_NLTYPE_ADDRSELECT) goto remove_af4_failure; spin_lock(&netlbl_domhsh_lock); entry_addr = netlbl_af4list_remove(addr->s_addr, mask->s_addr, - &entry_map->type_def.addrsel->list4); + &entry_map->def.addrsel->list4); spin_unlock(&netlbl_domhsh_lock); if (entry_addr == NULL) goto remove_af4_failure; - netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry_map->def.addrsel->list4) goto remove_af4_single_addr; #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry_map->def.addrsel->list6) goto remove_af4_single_addr; #endif /* IPv6 */ /* the domain mapping is empty so remove it from the mapping table */ @@ -616,7 +614,7 @@ remove_af4_single_addr: * shouldn't be a problem */ synchronize_rcu(); entry = netlbl_domhsh_addr4_entry(entry_addr); - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); kfree(entry); return 0; @@ -693,8 +691,8 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr) { struct netlbl_dom_map *dom_iter; struct netlbl_af4list *addr_iter; @@ -702,15 +700,13 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af4list_search(addr, - &dom_iter->type_def.addrsel->list4); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af4list_search(addr, &dom_iter->def.addrsel->list4); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr4_entry(addr_iter); + return &(netlbl_domhsh_addr4_entry(addr_iter)->def); } #if IS_ENABLED(CONFIG_IPV6) @@ -725,7 +721,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, const struct in6_addr *addr) { struct netlbl_dom_map *dom_iter; @@ -734,15 +730,13 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af6list_search(addr, - &dom_iter->type_def.addrsel->list6); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af6list_search(addr, &dom_iter->def.addrsel->list6); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr6_entry(addr_iter); + return &(netlbl_domhsh_addr6_entry(addr_iter)->def); } #endif /* IPv6 */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 90872c4ca30f..b9be0eed8980 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -43,37 +43,35 @@ #define NETLBL_DOMHSH_BITSIZE 7 /* Domain mapping definition structures */ +struct netlbl_domaddr_map { + struct list_head list4; + struct list_head list6; +}; +struct netlbl_dommap_def { + u32 type; + union { + struct netlbl_domaddr_map *addrsel; + struct cipso_v4_doi *cipso; + }; +}; #define netlbl_domhsh_addr4_entry(iter) \ container_of(iter, struct netlbl_domaddr4_map, list) struct netlbl_domaddr4_map { - u32 type; - union { - struct cipso_v4_doi *cipsov4; - } type_def; + struct netlbl_dommap_def def; struct netlbl_af4list list; }; #define netlbl_domhsh_addr6_entry(iter) \ container_of(iter, struct netlbl_domaddr6_map, list) struct netlbl_domaddr6_map { - u32 type; - - /* NOTE: no 'type_def' union needed at present since we don't currently - * support any IPv6 labeling protocols */ + struct netlbl_dommap_def def; struct netlbl_af6list list; }; -struct netlbl_domaddr_map { - struct list_head list4; - struct list_head list6; -}; + struct netlbl_dom_map { char *domain; - u32 type; - union { - struct cipso_v4_doi *cipsov4; - struct netlbl_domaddr_map *addrsel; - } type_def; + struct netlbl_dommap_def def; u32 valid; struct list_head list; @@ -97,16 +95,16 @@ int netlbl_domhsh_remove_af4(const char *domain, int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr); +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr); +#if IS_ENABLED(CONFIG_IPV6) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr); +#endif /* IPv6 */ + int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); -#if IS_ENABLED(CONFIG_IPV6) -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, - const struct in6_addr *addr); -#endif /* IPv6 */ - #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7c94aedd0912..96a458e12f60 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -122,7 +122,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, } if (addr == NULL && mask == NULL) - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -137,7 +137,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); if (map4 == NULL) goto cfg_unlbl_map_add_failure; - map4->type = NETLBL_NLTYPE_UNLABELED; + map4->def.type = NETLBL_NLTYPE_UNLABELED; map4->list.addr = addr4->s_addr & mask4->s_addr; map4->list.mask = mask4->s_addr; map4->list.valid = 1; @@ -154,7 +154,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); if (map6 == NULL) goto cfg_unlbl_map_add_failure; - map6->type = NETLBL_NLTYPE_UNLABELED; + map6->def.type = NETLBL_NLTYPE_UNLABELED; map6->list.addr = *addr6; map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0]; map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1]; @@ -174,8 +174,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain, break; } - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto cfg_unlbl_map_add_failure; @@ -355,8 +355,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, } if (addr == NULL && mask == NULL) { - entry->type_def.cipsov4 = doi_def; - entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->def.cipso = doi_def; + entry->def.type = NETLBL_NLTYPE_CIPSOV4; } else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -367,8 +367,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); if (addrinfo == NULL) goto out_addrinfo; - addrinfo->type_def.cipsov4 = doi_def; - addrinfo->type = NETLBL_NLTYPE_CIPSOV4; + addrinfo->def.cipso = doi_def; + addrinfo->def.type = NETLBL_NLTYPE_CIPSOV4; addrinfo->list.addr = addr->s_addr & mask->s_addr; addrinfo->list.mask = mask->s_addr; addrinfo->list.valid = 1; @@ -376,8 +376,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, if (ret_val != 0) goto cfg_cipsov4_map_add_failure; - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto out_addrmap; @@ -657,14 +657,14 @@ int netlbl_sock_setattr(struct sock *sk, } switch (family) { case AF_INET: - switch (dom_entry->type) { + switch (dom_entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: ret_val = -EDESTADDRREQ; break; case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - dom_entry->type_def.cipsov4, - secattr); + dom_entry->def.cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: ret_val = 0; @@ -754,23 +754,22 @@ int netlbl_conn_setattr(struct sock *sk, { int ret_val; struct sockaddr_in *addr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (addr->sa_family) { case AF_INET: addr4 = (struct sockaddr_in *)addr; - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - addr4->sin_addr.s_addr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (entry == NULL) { ret_val = -ENOENT; goto conn_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - af4_entry->type_def.cipsov4, - secattr); + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -812,36 +811,21 @@ int netlbl_req_setattr(struct request_sock *req, const struct netlbl_lsm_secattr *secattr) { int ret_val; - struct netlbl_dom_map *dom_entry; - struct netlbl_domaddr4_map *af4_entry; - u32 proto_type; - struct cipso_v4_doi *proto_cv4; + struct netlbl_dommap_def *entry; rcu_read_lock(); - dom_entry = netlbl_domhsh_getentry(secattr->domain); - if (dom_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } switch (req->rsk_ops->family) { case AF_INET: - if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) { - struct inet_request_sock *req_inet = inet_rsk(req); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - req_inet->rmt_addr); - if (af4_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } - proto_type = af4_entry->type; - proto_cv4 = af4_entry->type_def.cipsov4; - } else { - proto_type = dom_entry->type; - proto_cv4 = dom_entry->type_def.cipsov4; + entry = netlbl_domhsh_getentry_af4(secattr->domain, + inet_rsk(req)->rmt_addr); + if (entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; } - switch (proto_type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr); + ret_val = cipso_v4_req_setattr(req, + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -899,23 +883,21 @@ int netlbl_skbuff_setattr(struct sk_buff *skb, { int ret_val; struct iphdr *hdr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (family) { case AF_INET: hdr4 = ip_hdr(skb); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - hdr4->daddr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain,hdr4->daddr); + if (entry == NULL) { ret_val = -ENOENT; goto skbuff_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_skbuff_setattr(skb, - af4_entry->type_def.cipsov4, - secattr); + ret_val = cipso_v4_skbuff_setattr(skb, entry->cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index c5384ffc6146..dd1c37d7acbc 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -104,7 +104,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = -ENOMEM; goto add_failure; } - entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); + entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); @@ -116,12 +116,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); } - /* NOTE: internally we allow/use a entry->type value of + /* NOTE: internally we allow/use a entry->def.type value of * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users * to pass that as a protocol value because we need to know the * "real" protocol */ - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: break; case NETLBL_NLTYPE_CIPSOV4: @@ -132,7 +132,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) goto add_failure; - entry->type_def.cipsov4 = cipsov4; + entry->def.cipso = cipsov4; break; default: goto add_failure; @@ -172,9 +172,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; if (cipsov4) - map->type_def.cipsov4 = cipsov4; + map->def.cipso = cipsov4; ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) { @@ -182,8 +182,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #if IS_ENABLED(CONFIG_IPV6) } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { struct in6_addr *addr; @@ -223,7 +223,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; map->list.mask = *mask; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) { @@ -231,8 +231,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #endif /* IPv6 */ } @@ -281,14 +281,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; } - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); if (nla_a == NULL) return -ENOMEM; - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) { struct netlbl_domaddr4_map *map4; struct in_addr addr_struct; @@ -310,13 +309,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map4->type); + map4->def.type); if (ret_val != 0) return ret_val; - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - map4->type_def.cipsov4->doi); + map4->def.cipso->doi); if (ret_val != 0) return ret_val; break; @@ -325,8 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_b); } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { struct netlbl_domaddr6_map *map6; nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); @@ -345,7 +343,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map6->type); + map6->def.type); if (ret_val != 0) return ret_val; @@ -356,14 +354,14 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_a); break; case NETLBL_NLTYPE_UNLABELED: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); break; case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); if (ret_val != 0) return ret_val; ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); + entry->def.cipso->doi); break; } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index af3531926ee0..8f0897407a2c 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; -- cgit From e4d091d7bf787cd303383725b8071d0bae76f981 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 1 Aug 2013 12:36:57 +0300 Subject: netfilter: nfnetlink_{log,queue}: fix information leaks in netlink message These structs have a "_pad" member. Also the "phw" structs have an 8 byte "hw_addr[]" array but sometimes only the first 6 bytes are initialized. Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 6 +++++- net/netfilter/nfnetlink_queue_core.c | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 962e9792e317..d92cc317bf8b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -419,6 +419,7 @@ __build_packet_message(struct nfnl_log_net *log, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(inst->group_num); + memset(&pmsg, 0, sizeof(pmsg)); pmsg.hw_protocol = skb->protocol; pmsg.hook = hooknum; @@ -498,7 +499,10 @@ __build_packet_message(struct nfnl_log_net *log, if (indev && skb->dev && skb->mac_header != skb->network_header) { struct nfulnl_msg_packet_hw phw; - int len = dev_parse_header(skb, phw.hw_addr); + int len; + + memset(&phw, 0, sizeof(phw)); + len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { phw.hw_addrlen = htons(len); if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw)) diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 971ea145ab3e..8a703c3dd318 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -463,7 +463,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (indev && entskb->dev && entskb->mac_header != entskb->network_header) { struct nfqnl_msg_packet_hw phw; - int len = dev_parse_header(entskb, phw.hw_addr); + int len; + + memset(&phw, 0, sizeof(phw)); + len = dev_parse_header(entskb, phw.hw_addr); if (len) { phw.hw_addrlen = htons(len); if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) -- cgit From d9af2d67e490b48f0d36f448d34e7bab9425f142 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 5 Aug 2013 16:47:38 +0200 Subject: net/vmw_vsock/af_vsock.c: drop unneeded semicolon Drop the semicolon at the end of the list_for_each_entry loop header. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 593071dabd1c..4d9334683f84 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -347,7 +347,7 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], - connected_table); + connected_table) fn(sk_vsock(vsk)); } -- cgit From 0369722f024cd374f74eac6d261014403aa27ea2 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 3 Aug 2013 22:07:46 +0200 Subject: vlan: make vlan_dev_real_dev work over stacked vlans Sometimes we might have stacked vlans on top of each other, and we're interested in the first non-vlan real device on the path, so transform vlan_dev_real_dev to go over the stacked vlans and extract the first non-vlan device. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 4a78c4de9f20..6ee48aac776f 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -91,7 +91,12 @@ EXPORT_SYMBOL(__vlan_find_dev_deep); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { - return vlan_dev_priv(dev)->real_dev; + struct net_device *ret = vlan_dev_priv(dev)->real_dev; + + while (is_vlan_dev(ret)) + ret = vlan_dev_priv(ret)->real_dev; + + return ret; } EXPORT_SYMBOL(vlan_dev_real_dev); -- cgit From 07ce76aa9bcf8bc106a53c67548c5602f1598595 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 3 Aug 2013 22:07:47 +0200 Subject: net_sched: make dev_trans_start return vlan's real dev trans_start Vlan devices are LLTX and don't update their own trans_start, so if dev_trans_start has to be called with a vlan device then 0 or a stale value will be returned. Currently the bonding is the only such user, and it's needed for proper arp monitoring when the slaves are vlans. Fix this by extracting the vlan's real device trans_start. Suggested-by: David Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4626cef4b76e..eeb8276d7a89 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -207,15 +208,19 @@ void __qdisc_run(struct Qdisc *q) unsigned long dev_trans_start(struct net_device *dev) { - unsigned long val, res = dev->trans_start; + unsigned long val, res; unsigned int i; + if (is_vlan_dev(dev)) + dev = vlan_dev_real_dev(dev); + res = dev->trans_start; for (i = 0; i < dev->num_tx_queues; i++) { val = netdev_get_tx_queue(dev, i)->trans_start; if (val && time_after(val, res)) res = val; } dev->trans_start = res; + return res; } EXPORT_SYMBOL(dev_trans_start); -- cgit From 7921895a5e852fc99de347bc0600659997de9298 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 5 Aug 2013 12:49:35 +0200 Subject: net: esp{4,6}: fix potential MTU calculation overflows Commit 91657eafb ("xfrm: take net hdr len into account for esp payload size calculation") introduced a possible interger overflow in esp{4,6}_get_mtu() handlers in case of x->props.mode equals XFRM_MODE_TUNNEL. Thus, the following expression will overflow unsigned int net_adj; ... net_adj = 0; ... return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - net_adj) & ~(align - 1)) + (net_adj - 2); where (net_adj - 2) would be evaluated as + (0 - 2) in an unsigned context. Fix it by simply removing brackets as those operations here do not need to have special precedence. Signed-off-by: Daniel Borkmann Cc: Benjamin Poirier Cc: Steffen Klassert Acked-by: Benjamin Poirier Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 2 +- net/ipv6/esp6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index ab3d814bc80a..109ee89f123e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -477,7 +477,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) } return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + (net_adj - 2); + net_adj) & ~(align - 1)) + net_adj - 2; } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 40ffd72243a4..aeac0dc3635d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -425,7 +425,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) net_adj = 0; return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + (net_adj - 2); + net_adj) & ~(align - 1)) + net_adj - 2; } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit From fc7f8f5c53fdb82d4689e24df3da1a88bc3859f7 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 2 Aug 2013 19:07:38 +0200 Subject: neighbour: populate neigh_parms on alloc before calling ndo_neigh_setup dev->ndo_neigh_setup() might need some of the values of neigh_parms, so populate them before calling it. Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/core/neighbour.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9232c68941ab..60533db8b72d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1441,16 +1441,18 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, atomic_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); + dev_hold(dev); + p->dev = dev; + write_pnet(&p->net, hold_net(net)); + p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { + release_net(net); + dev_put(dev); kfree(p); return NULL; } - dev_hold(dev); - p->dev = dev; - write_pnet(&p->net, hold_net(net)); - p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; tbl->parms.next = p; -- cgit From aab515d7c32a34300312416c50314e755ea6f765 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 11:18:49 -0700 Subject: fib_trie: remove potential out of bound access AddressSanitizer [1] dynamic checker pointed a potential out of bound access in leaf_walk_rcu() We could allocate one more slot in tnode_new() to leave the prefetch() in-place but it looks not worth the pain. Bug added in commit 82cfbb008572b ("[IPV4] fib_trie: iterator recode") [1] : https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel Reported-by: Andrey Konovalov Signed-off-by: Eric Dumazet Cc: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 108a1e9c9eac..3df6d3edb2a1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -71,7 +71,6 @@ #include #include #include -#include #include #include #include @@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) if (!c) continue; - if (IS_LEAF(c)) { - prefetch(rcu_dereference_rtnl(p->child[idx])); + if (IS_LEAF(c)) return (struct leaf *) c; - } /* Rescan start scanning in new node */ p = (struct tnode *) c; -- cgit From 248ba8ec05a2c3b118c2224e57eb10c128176ab1 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 6 Aug 2013 00:32:05 +0200 Subject: bridge: don't try to update timers in case of broken MLD queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are reading an uninitialized value for the max_delay variable when snooping an MLD query message of invalid length and would update our timers with that. Fixing this by simply ignoring such broken MLD queries (just like we do for IGMP already). This is a regression introduced by: "bridge: disable snooping if there is no querier" (b00589af3b04) Reported-by: Paul Bolle Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 61c5e819380e..08e576ada0b2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1195,7 +1195,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); if (max_delay) group = &mld->mld_mca; - } else if (skb->len >= sizeof(*mld2q)) { + } else { if (!pskb_may_pull(skb, sizeof(*mld2q))) { err = -EINVAL; goto out; -- cgit From 00326ed6442c66021cd4b5e19e80f3e2027d5d42 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 14:10:43 -0400 Subject: SUNRPC: Don't auto-disconnect from the local rpcbind socket There is no need for the kernel to time out the AF_LOCAL connection to the rpcbind socket, and doing so is problematic because when it is time to reconnect, our process may no longer be using the same mount namespace. Reported-by: Nix Signed-off-by: Trond Myklebust Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- net/sunrpc/rpcb_clnt.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3df764dc330c..b0f723227157 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -238,6 +238,14 @@ static int rpcb_create_local_unix(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_NULL, + /* + * We turn off the idle timeout to prevent the kernel + * from automatically disconnecting the socket. + * Otherwise, we'd have to cache the mount namespace + * of the caller and somehow pass that to the socket + * reconnect code. + */ + .flags = RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, }; struct rpc_clnt *clnt, *clnt4; int result = 0; -- cgit From 2ed0edf9090bf4afa2c6fc4f38575a85a80d4b20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 17:10:15 -0700 Subject: tcp: cubic: fix overflow error in bictcp_update() commit 17a6e9f1aa9 ("tcp_cubic: fix clock dependency") added an overflow error in bictcp_update() in following code : /* change the unit from HZ to bictcp_HZ */ t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - ca->epoch_start) << BICTCP_HZ) / HZ; Because msecs_to_jiffies() being unsigned long, compiler does implicit type promotion. We really want to constrain (tcp_time_stamp - ca->epoch_start) to a signed 32bit value, or else 't' has unexpected high values. This bugs triggers an increase of retransmit rates ~24 days after boot [1], as the high order bit of tcp_time_stamp flips. [1] for hosts with HZ=1000 Big thanks to Van Jacobson for spotting this problem. Diagnosed-by: Van Jacobson Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Cc: Stephen Hemminger Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index a9077f441cb2..b6b591f0a788 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -206,8 +206,8 @@ static u32 cubic_root(u64 a) */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { - u64 offs; - u32 delta, t, bic_target, max_cnt; + u32 delta, bic_target, max_cnt; + u64 offs, t; ca->ack_cnt++; /* count the number of ACKs */ @@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * if the cwnd < 1 million packets !!! */ + t = (s32)(tcp_time_stamp - ca->epoch_start); + t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - - ca->epoch_start) << BICTCP_HZ) / HZ; + t <<= BICTCP_HZ; + do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; -- cgit From 15401946f9b720efdd20bda3ae79725e9c586897 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Tue, 6 Aug 2013 08:44:46 +0800 Subject: bridge: correct the comment for file br_sysfs_br.c br_sysfs_if.c is for sysfs attributes of bridge ports, while br_sysfs_br.c is for sysfs attributes of bridge itself. Correct the comment here. Signed-off-by: Wang Sheng-Hui Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 394bb96b6087..3b9637fb7939 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -1,5 +1,5 @@ /* - * Sysfs attributes of bridge ports + * Sysfs attributes of bridge * Linux ethernet bridge * * Authors: -- cgit From cd6b423afd3c08b27e1fed52db828ade0addbc6b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 20:05:12 -0700 Subject: tcp: cubic: fix bug in bictcp_acked() While investigating about strange increase of retransmit rates on hosts ~24 days after boot, Van found hystart was disabled if ca->epoch_start was 0, as following condition is true when tcp_time_stamp high order bit is set. (s32)(tcp_time_stamp - ca->epoch_start) < HZ Quoting Van : At initialization & after every loss ca->epoch_start is set to zero so I believe that the above line will turn off hystart as soon as the 2^31 bit is set in tcp_time_stamp & hystart will stay off for 24 days. I think we've observed that cubic's restart is too aggressive without hystart so this might account for the higher drop rate we observe. Diagnosed-by: Van Jacobson Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b6b591f0a788..b6ae92a51f58 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -416,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) return; /* Discard delay samples right after fast recovery */ - if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; delay = (rtt_us << 3) / USEC_PER_MSEC; -- cgit From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 16:04:47 -0400 Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to unregister If rpcbind causes our connection to the AF_LOCAL socket to close after we've registered a service, then we want to be careful about reconnecting since the mount namespace may have changed. By simply refusing to reconnect the AF_LOCAL socket in the case of unregister, we avoid the need to somehow save the mount namespace. While this may lead to some services not unregistering properly, it should be safe. Signed-off-by: Trond Myklebust Cc: Nix Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- net/sunrpc/clnt.c | 4 ++++ net/sunrpc/netns.h | 1 + net/sunrpc/rpcb_clnt.c | 40 +++++++++++++++++++++++++++------------- 3 files changed, 32 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 74f6a704e374..ecbc4e3d83ad 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1660,6 +1660,10 @@ call_connect(struct rpc_task *task) task->tk_action = call_connect_status; if (task->tk_status < 0) return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; + } xprt_connect(task); } } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 74d948f5d5a1..779742cfc1ff 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -23,6 +23,7 @@ struct sunrpc_net { struct rpc_clnt *rpcb_local_clnt4; spinlock_t rpcb_clnt_lock; unsigned int rpcb_users; + unsigned int rpcb_is_af_local : 1; struct mutex gssp_lock; wait_queue_head_t gssp_wq; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b0f723227157..1891a1022c17 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net) } static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) + struct rpc_clnt *clnt4, + bool is_af_local) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; sn->rpcb_local_clnt4 = clnt4; + sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " @@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, true); out: return result; @@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, false); out: return result; @@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, return rpc_create(&args); } -static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) +static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set) { - int result, error = 0; + int flags = RPC_TASK_NOCONNECT; + int error, result = 0; + if (is_set || !sn->rpcb_is_af_local) + flags = RPC_TASK_SOFTCONN; msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); + error = rpc_call_sync(clnt, msg, flags); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short .rpc_argp = &map, }; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + bool is_set = false; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; + is_set = true; + } - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set); } /* @@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false); } /** -- cgit From 3e3be275851bc6fc90bfdcd732cd95563acd982b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 7 Aug 2013 02:34:31 +0200 Subject: ipv6: don't stop backtracking in fib6_lookup_1 if subtree does not match In case a subtree did not match we currently stop backtracking and return NULL (root table from fib_lookup). This could yield in invalid routing table lookups when using subtrees. Instead continue to backtrack until a valid subtree or node is found and return this match. Also remove unneeded NULL check. Reported-by: Teco Boot Cc: YOSHIFUJI Hideaki Cc: David Lamparter Cc: Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bff3d821c7eb..c4ff5bbb45c4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -993,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) - fn = fib6_lookup_1(fn->subtree, args + 1); + if (fn->subtree) { + struct fib6_node *sfn; + sfn = fib6_lookup_1(fn->subtree, + args + 1); + if (!sfn) + goto backtrack; + fn = sfn; + } #endif - if (!fn || fn->fn_flags & RTN_RTINFO) + if (fn->fn_flags & RTN_RTINFO) return fn; } } - +#ifdef CONFIG_IPV6_SUBTREES +backtrack: +#endif if (fn->fn_flags & RTN_ROOT) break; -- cgit From 77a482bdb2e68d13fae87541b341905ba70d572b Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Tue, 6 Aug 2013 13:45:43 +0300 Subject: ip_gre: fix ipgre_header to return correct offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix ipgre_header() (header_ops->create) to return the correct amount of bytes pushed. Most callers of dev_hard_header() seem to care only if it was success, but af_packet.c uses it as offset to the skb to copy from userspace only once. In practice this fixes packet socket sendto()/sendmsg() to gre tunnels. Regression introduced in c54419321455631079c7d6e60bc732dd0c5914c5 ("GRE: Refactor GRE tunneling code.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1f6eab66f7ce..8d6939eeb492 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -383,7 +383,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (daddr) memcpy(&iph->daddr, daddr, 4); if (iph->daddr) - return t->hlen; + return t->hlen + sizeof(*iph); return -(t->hlen + sizeof(*iph)); } -- cgit From e11aada32b39a060e26fa4091cb968bd42e3bcbf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Aug 2013 04:35:06 -0700 Subject: net: flow_dissector: add 802.1ad support Same behavior than 802.1q : finds the encapsulated protocol and skip 32bit header. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 00ee068efc1c..b84a1b155bc1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -65,6 +65,7 @@ ipv6: nhoff += sizeof(struct ipv6hdr); break; } + case __constant_htons(ETH_P_8021AD): case __constant_htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; struct vlan_hdr _vlan; -- cgit From 288a9376371d425edeeea41a0310922c5fb2092d Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 7 Aug 2013 11:33:25 +0300 Subject: net: rename busy poll MIB counter Rename mib counter from "low latency" to "busy poll" v1 also moved the counter to the ip MIB (suggested by Shawn Bohrer) Eric Dumazet suggested that the current location is better. So v2 just renames the counter to fit the new naming convention. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/ipv4/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6577a1149a47..463bd1273346 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,7 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), - SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), + SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_SENTINEL }; -- cgit From 645359930231d5e78fd3296a38b98c1a658a7ade Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 8 Aug 2013 15:19:48 -0700 Subject: rtnetlink: Fix inverted check in ndo_dflt_fdb_del() Fix inverted check when deleting an fdb entry. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3de740834d1f..82d968527121 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2156,7 +2156,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, /* If aging addresses are supported device will need to * implement its own handler for this. */ - if (ndm->ndm_state & NUD_PERMANENT) { + if (!(ndm->ndm_state & NUD_PERMANENT)) { pr_info("%s: FDB only supports static addresses\n", dev->name); return -EINVAL; } -- cgit From 356d7d88e088687b6578ca64601b0a2c9d145296 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 9 Aug 2013 17:21:27 -0700 Subject: netfilter: nf_conntrack: fix tcp_in_window for Fast Open Currently the conntrack checks if the ending sequence of a packet falls within the observed receive window. However it does so even if it has not observe any packet from the remote yet and uses an uninitialized receive window (td_maxwin). If a connection uses Fast Open to send a SYN-data packet which is dropped afterward in the network. The subsequent SYNs retransmits will all fail this check and be discarded, leading to a connection timeout. This is because the SYN retransmit does not contain data payload so end == initial sequence number (isn) + 1 sender->td_end == isn + syn_data_len receiver->td_maxwin == 0 The fix is to only apply this check after td_maxwin is initialized. Reported-by: Michael Chan Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 7dcc376eea5f..2f8010707d01 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -526,7 +526,7 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; - bool res; + bool res, in_recv_win; /* * Get the required data from the packet. @@ -649,14 +649,18 @@ static bool tcp_in_window(const struct nf_conn *ct, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); + /* Is the ending sequence in the receive window (if available)? */ + in_recv_win = !receiver->td_maxwin || + after(end, sender->td_end - receiver->td_maxwin - 1); + pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), - after(end, sender->td_end - receiver->td_maxwin - 1), + (in_recv_win ? 1 : 0), before(sack, receiver->td_end + 1), after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && + in_recv_win && before(sack, receiver->td_end + 1) && after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* @@ -725,7 +729,7 @@ static bool tcp_in_window(const struct nf_conn *ct, nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? - after(end, sender->td_end - receiver->td_maxwin - 1) ? + in_recv_win ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" -- cgit From 9d2c9488cedb666bc8206fbdcdc1575e0fbc5929 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 6 Aug 2013 20:21:15 +0200 Subject: batman-adv: fix potential kernel paging errors for unicast transmissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several functions which might reallocate skb data. Currently some places keep reusing their old ethhdr pointer regardless of whether they became invalid after such a reallocation or not. This potentially leads to kernel paging errors. This patch fixes these by refetching the ethdr pointer after the potential reallocations. Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 2 ++ net/batman-adv/gateway_client.c | 13 ++++++++++++- net/batman-adv/gateway_client.h | 3 +-- net/batman-adv/soft-interface.c | 9 ++++++++- net/batman-adv/unicast.c | 13 ++++++++++--- 5 files changed, 33 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index e14531f1ce1c..264de88db320 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1529,6 +1529,8 @@ out: * in these cases, the skb is further handled by this function and * returns 1, otherwise it returns 0 and the caller shall further * process the skb. + * + * This call might reallocate skb data. */ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index f105219f4a4b..7614af31daff 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -508,6 +508,7 @@ out: return 0; } +/* this call might reallocate skb data */ static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) { int ret = false; @@ -568,6 +569,7 @@ out: return ret; } +/* this call might reallocate skb data */ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) { struct ethhdr *ethhdr; @@ -619,6 +621,12 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr))) return false; + + /* skb->data might have been reallocated by pskb_may_pull() */ + ethhdr = (struct ethhdr *)skb->data; + if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) + ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN); + udphdr = (struct udphdr *)(skb->data + *header_len); *header_len += sizeof(*udphdr); @@ -634,12 +642,14 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) return true; } +/* this call might reallocate skb data */ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, - struct sk_buff *skb, struct ethhdr *ethhdr) + struct sk_buff *skb) { struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL; struct batadv_orig_node *orig_dst_node = NULL; struct batadv_gw_node *curr_gw = NULL; + struct ethhdr *ethhdr; bool ret, out_of_range = false; unsigned int header_len = 0; uint8_t curr_tq_avg; @@ -648,6 +658,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!ret) goto out; + ethhdr = (struct ethhdr *)skb->data; orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source, ethhdr->h_dest); if (!orig_dst_node) diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 039902dca4a6..1037d75da51f 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -34,7 +34,6 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, void batadv_gw_node_purge(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len); -bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, - struct sk_buff *skb, struct ethhdr *ethhdr); +bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 700d0b49742d..0f04e1c302b4 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -180,6 +180,9 @@ static int batadv_interface_tx(struct sk_buff *skb, if (batadv_bla_tx(bat_priv, skb, vid)) goto dropped; + /* skb->data might have been reallocated by batadv_bla_tx() */ + ethhdr = (struct ethhdr *)skb->data; + /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source)) batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); @@ -220,6 +223,10 @@ static int batadv_interface_tx(struct sk_buff *skb, default: break; } + + /* reminder: ethhdr might have become unusable from here on + * (batadv_gw_is_dhcp_target() might have reallocated skb data) + */ } /* ethernet packet should be broadcasted */ @@ -266,7 +273,7 @@ static int batadv_interface_tx(struct sk_buff *skb, /* unicast packet */ } else { if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_OFF) { - ret = batadv_gw_out_of_range(bat_priv, skb, ethhdr); + ret = batadv_gw_out_of_range(bat_priv, skb); if (ret) goto dropped; } diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index dc8b5d4dd636..688a0419756b 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -326,7 +326,9 @@ static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size, * @skb: the skb containing the payload to encapsulate * @orig_node: the destination node * - * Returns false if the payload could not be encapsulated or true otherwise + * Returns false if the payload could not be encapsulated or true otherwise. + * + * This call might reallocate skb data. */ static bool batadv_unicast_prepare_skb(struct sk_buff *skb, struct batadv_orig_node *orig_node) @@ -343,7 +345,9 @@ static bool batadv_unicast_prepare_skb(struct sk_buff *skb, * @orig_node: the destination node * @packet_subtype: the batman 4addr packet subtype to use * - * Returns false if the payload could not be encapsulated or true otherwise + * Returns false if the payload could not be encapsulated or true otherwise. + * + * This call might reallocate skb data. */ bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -401,7 +405,7 @@ int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh_node; int data_len = skb->len; int ret = NET_RX_DROP; - unsigned int dev_mtu; + unsigned int dev_mtu, header_len; /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) { @@ -429,10 +433,12 @@ find_router: switch (packet_type) { case BATADV_UNICAST: batadv_unicast_prepare_skb(skb, orig_node); + header_len = sizeof(struct batadv_unicast_packet); break; case BATADV_UNICAST_4ADDR: batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, packet_subtype); + header_len = sizeof(struct batadv_unicast_4addr_packet); break; default: /* this function supports UNICAST and UNICAST_4ADDR only. It @@ -441,6 +447,7 @@ find_router: goto out; } + ethhdr = (struct ethhdr *)(skb->data + header_len); unicast_packet = (struct batadv_unicast_packet *)skb->data; /* inform the destination node that we are still missing a correct route -- cgit From d4cca39d90fca21c04315095de5d0e734e839a8b Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 9 Aug 2013 17:12:58 +0800 Subject: tipc: avoid possible deadlock while enable and disable bearer We met lockdep warning when enable and disable the bearer for commands such as: tipc-config -netid=1234 -addr=1.1.3 -be=eth:eth0 tipc-config -netid=1234 -addr=1.1.3 -bd=eth:eth0 --------------------------------------------------- [ 327.693595] ====================================================== [ 327.693994] [ INFO: possible circular locking dependency detected ] [ 327.694519] 3.11.0-rc3-wwd-default #4 Tainted: G O [ 327.694882] ------------------------------------------------------- [ 327.695385] tipc-config/5825 is trying to acquire lock: [ 327.695754] (((timer))#2){+.-...}, at: [] del_timer_sync+0x0/0xd0 [ 327.696018] [ 327.696018] but task is already holding lock: [ 327.696018] (&(&b_ptr->lock)->rlock){+.-...}, at: [] bearer_disable+ 0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] which lock already depends on the new lock. [ 327.696018] [ 327.696018] [ 327.696018] the existing dependency chain (in reverse order) is: [ 327.696018] [ 327.696018] -> #1 (&(&b_ptr->lock)->rlock){+.-...}: [ 327.696018] [] validate_chain+0x6dd/0x870 [ 327.696018] [] __lock_acquire+0x3db/0x670 [ 327.696018] [] lock_acquire+0x103/0x130 [ 327.696018] [] _raw_spin_lock_bh+0x41/0x80 [ 327.696018] [] disc_timeout+0x18/0xd0 [tipc] [ 327.696018] [] call_timer_fn+0xda/0x1e0 [ 327.696018] [] run_timer_softirq+0x2a7/0x2d0 [ 327.696018] [] __do_softirq+0x16a/0x2e0 [ 327.696018] [] irq_exit+0xd5/0xe0 [ 327.696018] [] smp_apic_timer_interrupt+0x45/0x60 [ 327.696018] [] apic_timer_interrupt+0x6f/0x80 [ 327.696018] [] arch_cpu_idle+0x1e/0x30 [ 327.696018] [] cpu_idle_loop+0x1fd/0x280 [ 327.696018] [] cpu_startup_entry+0x1e/0x20 [ 327.696018] [] start_secondary+0x89/0x90 [ 327.696018] [ 327.696018] -> #0 (((timer))#2){+.-...}: [ 327.696018] [] check_prev_add+0x43e/0x4b0 [ 327.696018] [] validate_chain+0x6dd/0x870 [ 327.696018] [] __lock_acquire+0x3db/0x670 [ 327.696018] [] lock_acquire+0x103/0x130 [ 327.696018] [] del_timer_sync+0x3d/0xd0 [ 327.696018] [] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [] genl_rcv_msg+0x70/0xd0 [ 327.696018] [] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [] genl_rcv+0x27/0x40 [ 327.696018] [] netlink_unicast+0x15e/0x1b0 [ 327.696018] [] netlink_sendmsg+0x22f/0x400 [ 327.696018] [] __sock_sendmsg+0x66/0x80 [ 327.696018] [] sock_aio_write+0x107/0x120 [ 327.696018] [] do_sync_write+0x7d/0xc0 [ 327.696018] [] vfs_write+0x186/0x190 [ 327.696018] [] SyS_write+0x60/0xb0 [ 327.696018] [] system_call_fastpath+0x16/0x1b [ 327.696018] [ 327.696018] other info that might help us debug this: [ 327.696018] [ 327.696018] Possible unsafe locking scenario: [ 327.696018] [ 327.696018] CPU0 CPU1 [ 327.696018] ---- ---- [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] [ 327.696018] *** DEADLOCK *** [ 327.696018] [ 327.696018] 5 locks held by tipc-config/5825: [ 327.696018] #0: (cb_lock){++++++}, at: [] genl_rcv+0x18/0x40 [ 327.696018] #1: (genl_mutex){+.+.+.}, at: [] genl_rcv_msg+0xa6/0xd0 [ 327.696018] #2: (config_mutex){+.+.+.}, at: [] tipc_cfg_do_cmd+0x39/ 0x550 [tipc] [ 327.696018] #3: (tipc_net_lock){++.-..}, at: [] tipc_disable_bearer+ 0x18/0x60 [tipc] [ 327.696018] #4: (&(&b_ptr->lock)->rlock){+.-...}, at: [] bearer_disable+0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] stack backtrace: [ 327.696018] CPU: 2 PID: 5825 Comm: tipc-config Tainted: G O 3.11.0-rc3-wwd- default #4 [ 327.696018] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 327.696018] 00000000ffffffff ffff880037fa77a8 ffffffff814d03dd 0000000000000000 [ 327.696018] ffff880037fa7808 ffff880037fa77e8 ffffffff810b1c4f 0000000037fa77e8 [ 327.696018] ffff880037fa7808 ffff880037e4db40 0000000000000000 ffff880037e4e318 [ 327.696018] Call Trace: [ 327.696018] [] dump_stack+0x4d/0xa0 [ 327.696018] [] print_circular_bug+0x10f/0x120 [ 327.696018] [] check_prev_add+0x43e/0x4b0 [ 327.696018] [] validate_chain+0x6dd/0x870 [ 327.696018] [] ? sched_clock_cpu+0xd8/0x110 [ 327.696018] [] __lock_acquire+0x3db/0x670 [ 327.696018] [] lock_acquire+0x103/0x130 [ 327.696018] [] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [] del_timer_sync+0x3d/0xd0 [ 327.696018] [] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [] ? security_capable+0x13/0x20 [ 327.696018] [] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [] genl_rcv_msg+0x70/0xd0 [ 327.696018] [] ? genl_lock+0x20/0x20 [ 327.696018] [] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [] ? genl_rcv+0x18/0x40 [ 327.696018] [] genl_rcv+0x27/0x40 [ 327.696018] [] netlink_unicast+0x15e/0x1b0 [ 327.696018] [] ? memcpy_fromiovec+0x6c/0x90 [ 327.696018] [] netlink_sendmsg+0x22f/0x400 [ 327.696018] [] __sock_sendmsg+0x66/0x80 [ 327.696018] [] sock_aio_write+0x107/0x120 [ 327.696018] [] ? release_sock+0x8c/0xa0 [ 327.696018] [] do_sync_write+0x7d/0xc0 [ 327.696018] [] ? rw_verify_area+0x54/0x100 [ 327.696018] [] vfs_write+0x186/0x190 [ 327.696018] [] SyS_write+0x60/0xb0 [ 327.696018] [] system_call_fastpath+0x16/0x1b ----------------------------------------------------------------------- The problem is that the tipc_link_delete() will cancel the timer disc_timeout() when the b_ptr->lock is hold, but the disc_timeout() still call b_ptr->lock to finish the work, so the dead lock occurs. We should unlock the b_ptr->lock when del the disc_timeout(). Remove link_timeout() still met the same problem, the patch: http://article.gmane.org/gmane.network.tipc.general/4380 fix the problem, so no need to send patch for fix link_timeout() deadlock warming. Signed-off-by: Wang Weidong Signed-off-by: Ding Tianhong Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index cb29ef7ba2f0..609c30c80816 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -460,6 +460,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr) { struct tipc_link *l_ptr; struct tipc_link *temp_l_ptr; + struct tipc_link_req *temp_req; pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); @@ -468,9 +469,13 @@ static void bearer_disable(struct tipc_bearer *b_ptr) list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); } - if (b_ptr->link_req) - tipc_disc_delete(b_ptr->link_req); + temp_req = b_ptr->link_req; + b_ptr->link_req = NULL; spin_unlock_bh(&b_ptr->lock); + + if (temp_req) + tipc_disc_delete(temp_req); + memset(b_ptr, 0, sizeof(struct tipc_bearer)); } -- cgit From ac4f9599362475662efb6efbb334cbcec98d4778 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Aug 2013 15:09:08 +0200 Subject: net: sctp: sctp_assoc_control_transport: fix MTU size in SCTP_PF state The SCTP Quick failover draft [1] section 5.1, point 5 says that the cwnd should be 1 MTU. So, instead of 1, set it to 1 MTU. [1] https://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 Reported-by: Karl Heiss Signed-off-by: Daniel Borkmann Cc: Neil Horman Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/associola.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index bce5b79662a6..ab67efc64b24 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -846,12 +846,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, else spc_state = SCTP_ADDR_AVAILABLE; /* Don't inform ULP about transition from PF to - * active state and set cwnd to 1, see SCTP + * active state and set cwnd to 1 MTU, see SCTP * Quick failover draft section 5.1, point 5 */ if (transport->state == SCTP_PF) { ulp_notify = false; - transport->cwnd = 1; + transport->cwnd = asoc->pathmtu; } transport->state = SCTP_ACTIVE; break; -- cgit From 771085d6bf3c52de29fc213e5bad07a82e57c23e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Aug 2013 16:25:21 +0200 Subject: net: sctp: sctp_transport_destroy{, _rcu}: fix potential pointer corruption Probably this one is quite unlikely to be triggered, but it's more safe to do the call_rcu() at the end after we have dropped the reference on the asoc and freed sctp packet chunks. The reason why is because in sctp_transport_destroy_rcu() the transport is being kfree()'d, and if we're unlucky enough we could run into corrupted pointers. Probably that's more of theoretical nature, but it's safer to have this simple fix. Introduced by commit 8c98653f ("sctp: sctp_close: fix release of bindings for deferred call_rcu's"). I also did the 8c98653f regression test and it's fine that way. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index bdbbc3fd7c14..8fdd16046d66 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -181,12 +181,12 @@ static void sctp_transport_destroy(struct sctp_transport *transport) return; } - call_rcu(&transport->rcu, sctp_transport_destroy_rcu); - sctp_packet_free(&transport->packet); if (transport->asoc) sctp_association_put(transport->asoc); + + call_rcu(&transport->rcu, sctp_transport_destroy_rcu); } /* Start T3_rtx timer if it is not already running and update the heartbeat -- cgit From 58ad436fcf49810aa006016107f494c9ac9013db Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Aug 2013 09:04:05 +0200 Subject: genetlink: fix family dump race When dumping generic netlink families, only the first dump call is locked with genl_lock(), which protects the list of families, and thus subsequent calls can access the data without locking, racing against family addition/removal. This can cause a crash. Fix it - the locking needs to be conditional because the first time around it's already locked. A similar bug was reported to me on an old kernel (3.4.47) but the exact scenario that happened there is no longer possible, on those kernels the first round wasn't locked either. Looking at the current code I found the race described above, which had also existed on the old kernel. Cc: stable@vger.kernel.org Reported-by: Andrei Otcheretianski Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 512718adb0d5..f85f8a2ad6cf 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -789,6 +789,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; + bool need_locking = chains_to_skip || fams_to_skip; + + if (need_locking) + genl_lock(); for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { n = 0; @@ -810,6 +814,9 @@ errout: cb->args[0] = i; cb->args[1] = n; + if (need_locking) + genl_unlock(); + return skb->len; } -- cgit From 4221f40513233fa8edeef7fc82e44163fde03b9b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 13 Aug 2013 01:41:06 -0700 Subject: ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id. Using inner-id for tunnel id is not safe in some rare cases. E.g. packets coming from multiple sources entering same tunnel can have same id. Therefore on tunnel packet receive we could have packets from two different stream but with same source and dst IP with same ip-id which could confuse ip packet reassembly. Following patch reverts optimization from commit 490ab08127 (IP_GRE: Fix IP-Identification.) CC: Jarno Rajahalme CC: Ansis Atteka Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 7167b08977df..850525b34899 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -76,9 +76,7 @@ int iptunnel_xmit(struct net *net, struct rtable *rt, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - tunnel_ip_select_ident(skb, - (const struct iphdr *)skb_inner_network_header(skb), - &rt->dst); + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) -- cgit From 3e805ad288c524bb65aad3f1e004402223d3d504 Mon Sep 17 00:00:00 2001 From: Asbjoern Sloth Toennesen Date: Mon, 12 Aug 2013 16:30:09 +0000 Subject: rtnetlink: rtnl_bridge_getlink: Call nlmsg_find_attr() with ifinfomsg header Fix the iproute2 command `bridge vlan show`, after switching from rtgenmsg to ifinfomsg. Let's start with a little history: Feb 20: Vlad Yasevich got his VLAN-aware bridge patchset included in the 3.9 merge window. In the kernel commit 6cbdceeb, he added attribute support to bridge GETLINK requests sent with rtgenmsg. Mar 6th: Vlad got this iproute2 reference implementation of the bridge vlan netlink interface accepted (iproute2 9eff0e5c) Apr 25th: iproute2 switched from using rtgenmsg to ifinfomsg (63338dca) http://patchwork.ozlabs.org/patch/239602/ http://marc.info/?t=136680900700007 Apr 28th: Linus released 3.9 Apr 30th: Stephen released iproute2 3.9.0 The `bridge vlan show` command haven't been working since the switch to ifinfomsg, or in a released version of iproute2. Since the kernel side only supports rtgenmsg, which iproute2 switched away from just prior to the iproute2 3.9.0 release. I haven't been able to find any documentation, about neither rtgenmsg nor ifinfomsg, and in which situation to use which, but kernel commit 88c5b5ce seams to suggest that ifinfomsg should be used. Fixing this in kernel will break compatibility, but I doubt that anybody have been using it due to this bug in the user space reference implementation, at least not without noticing this bug. That said the functionality is still fully functional in 3.9, when reversing iproute2 commit 63338dca. This could also be fixed in iproute2, but thats an ugly patch that would reintroduce rtgenmsg in iproute2, and from searching in netdev it seams like rtgenmsg usage is discouraged. I'm assuming that the only reason that Vlad implemented the kernel side to use rtgenmsg, was because iproute2 was using it at the time. Signed-off-by: Asbjoern Sloth Toennesen Reviewed-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 82d968527121..ca198c1d1d30 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2384,7 +2384,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) struct nlattr *extfilt; u32 filter_mask = 0; - extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), IFLA_EXT_MASK); if (extfilt) filter_mask = nla_get_u32(extfilt); -- cgit From 30444e981ba28e892c439017fbc011d867f02a7d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 13 May 2013 08:41:06 -0700 Subject: openvswitch: Fix bad merge resolution. git silently included an extra hunk in vport_cmd_set() during automatic merging. This code is unreachable so it does not actually introduce a problem but it is clearly incorrect. Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f7e3a0d84c40..f2ed7600084e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2076,9 +2076,6 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_notify(reply, info, &ovs_dp_vport_multicast_group); return 0; - rtnl_unlock(); - return 0; - exit_free: kfree_skb(reply); exit_unlock: -- cgit From 42415c90ceaf50c792e29823e359463bc6d4ee05 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 30 Jul 2013 15:44:14 -0700 Subject: openvswitch: Use correct type while allocating flex array. Flex array is used to allocate hash buckets which is type struct hlist_head, but we use `struct hlist_head *` to calculate array size. Since hlist_head is of size pointer it works fine. Following patch use correct type. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 5c519b121e1b..1aa84dc58777 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -240,7 +240,7 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) struct flex_array *buckets; int i, err; - buckets = flex_array_alloc(sizeof(struct hlist_head *), + buckets = flex_array_alloc(sizeof(struct hlist_head), n_buckets, GFP_KERNEL); if (!buckets) return NULL; -- cgit From 36bf5cc66d60868bcc10aff209defed5a7b71c1d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 14 Aug 2013 15:50:36 -0700 Subject: openvswitch: Reset tunnel key between input and output. It doesn't make sense to output a tunnel packet using the same parameters that it was received with since that will generally just result in the packet going back to us. As a result, userspace assumes that the tunnel key is cleared when transitioning through the switch. In the majority of cases this doesn't matter since a packet is either going to a tunnel port (in which the key is overwritten with new values) or to a non-tunnel port (in which case the key is ignored). However, it's theoreticaly possible that userspace could rely on the documented behavior, so this corrects it. Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 22c5f399f1cf..ab101f715447 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -535,6 +535,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) { struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); + OVS_CB(skb)->tun_key = NULL; return do_execute_actions(dp, skb, acts->actions, acts->actions_len, false); } -- cgit From 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 14 Aug 2013 23:47:11 +0200 Subject: net_sched: restore "linklayer atm" handling commit 56b765b79 ("htb: improved accuracy at high rates") broke the "linklayer atm" handling. tc class add ... htb rate X ceil Y linklayer atm The linklayer setting is implemented by modifying the rate table which is send to the kernel. No direct parameter were transferred to the kernel indicating the linklayer setting. The commit 56b765b79 ("htb: improved accuracy at high rates") removed the use of the rate table system. To keep compatible with older iproute2 utils, this patch detects the linklayer by parsing the rate table. It also supports future versions of iproute2 to send this linklayer parameter to the kernel directly. This is done by using the __reserved field in struct tc_ratespec, to convey the choosen linklayer option, but only using the lower 4 bits of this field. Linklayer detection is limited to speeds below 100Mbit/s, because at high rates the rtab is gets too inaccurate, so bad that several fields contain the same values, this resembling the ATM detect. Fields even start to contain "0" time to send, e.g. at 1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have been more broken than we first realized. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sched/sch_api.c | 41 +++++++++++++++++++++++++++++++++++++++++ net/sched/sch_generic.c | 1 + net/sched/sch_htb.c | 13 +++++++++++++ 3 files changed, 55 insertions(+) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 281c1bded1f6..51b968d3febb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) return q; } +/* The linklayer setting were not transferred from iproute2, in older + * versions, and the rate tables lookup systems have been dropped in + * the kernel. To keep backward compatible with older iproute2 tc + * utils, we detect the linklayer setting by detecting if the rate + * table were modified. + * + * For linklayer ATM table entries, the rate table will be aligned to + * 48 bytes, thus some table entries will contain the same value. The + * mpu (min packet unit) is also encoded into the old rate table, thus + * starting from the mpu, we find low and high table entries for + * mapping this cell. If these entries contain the same value, when + * the rate tables have been modified for linklayer ATM. + * + * This is done by rounding mpu to the nearest 48 bytes cell/entry, + * and then roundup to the next cell, calc the table entry one below, + * and compare. + */ +static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) +{ + int low = roundup(r->mpu, 48); + int high = roundup(low+1, 48); + int cell_low = low >> r->cell_log; + int cell_high = (high >> r->cell_log) - 1; + + /* rtab is too inaccurate at rates > 100Mbit/s */ + if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { + pr_debug("TC linklayer: Giving up ATM detection\n"); + return TC_LINKLAYER_ETHERNET; + } + + if ((cell_high > cell_low) && (cell_high < 256) + && (rtab[cell_low] == rtab[cell_high])) { + pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", + cell_low, cell_high, rtab[cell_high]); + return TC_LINKLAYER_ATM; + } + return TC_LINKLAYER_ETHERNET; +} + static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) @@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta rtab->rate = *r; rtab->refcnt = 1; memcpy(rtab->data, nla_data(tab), 1024); + if (r->linklayer == TC_LINKLAYER_UNAWARE) + r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index eeb8276d7a89..48be3d5c0d92 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -909,6 +909,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; r->rate_bytes_ps = conf->rate; + r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* * The deal here is to replace a divide by a reciprocal one diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 45e751527dfc..c2178b15ca6e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1329,6 +1329,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; + struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; @@ -1350,6 +1351,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + /* Keeping backward compatible with rate_table based iproute2 tc */ + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) { + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); + if (rtab) + qdisc_put_rtab(rtab); + } + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) { + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); + if (ctab) + qdisc_put_rtab(ctab); + } + if (!cl) { /* new class */ struct Qdisc *new_q; int prio; -- cgit