From 119363c7dc2bcc0c33c255a7b4979c8c0fdc1896 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Apr 2013 16:29:30 +0200 Subject: cfg80211: add support for per-chain signal strength reporting Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index afa283841e8c..f687a8d0d026 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3376,6 +3376,32 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, return true; } +static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, + int id) +{ + void *attr; + int i = 0; + + if (!mask) + return true; + + attr = nla_nest_start(msg, id); + if (!attr) + return false; + + for (i = 0; i < IEEE80211_MAX_CHAINS; i++) { + if (!(mask & BIT(i))) + continue; + + if (nla_put_u8(msg, i, signal[i])) + return false; + } + + nla_nest_end(msg, attr); + + return true; +} + static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -3447,6 +3473,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, default: break; } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal, + NL80211_STA_INFO_CHAIN_SIGNAL)) + goto nla_put_failure; + } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal_avg, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) + goto nla_put_failure; + } if (sinfo->filled & STATION_INFO_TX_BITRATE) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) -- cgit From ef0621e805f9ef76eaf31ce6205028fe467e9ca9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Apr 2013 16:29:31 +0200 Subject: mac80211: add support for per-chain signal strength reporting Signed-off-by: Felix Fietkau [fix unit documentation] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 13 ++++++++++++- net/mac80211/rx.c | 14 ++++++++++++++ net/mac80211/sta_info.c | 2 ++ net/mac80211/sta_info.h | 5 +++++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1a89c80e6407..1f51bdfe574a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -444,7 +444,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct timespec uptime; u64 packets = 0; - int ac; + int i, ac; sinfo->generation = sdata->local->sta_generation; @@ -488,6 +488,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->signal = (s8)sta->last_signal; sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } + if (sta->chains) { + sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | + STATION_INFO_CHAIN_SIGNAL_AVG; + + sinfo->chains = sta->chains; + for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { + sinfo->chain_signal[i] = sta->chain_signal_last[i]; + sinfo->chain_signal_avg[i] = + (s8) -ewma_read(&sta->chain_signal_avg[i]); + } + } sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); sta_set_rate_info_rx(sta, &sinfo->rxrate); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c8447af76ead..22e412b0767f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1372,6 +1372,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + int i; if (!sta) return RX_CONTINUE; @@ -1422,6 +1423,19 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) ewma_add(&sta->avg_signal, -status->signal); } + if (status->chains) { + sta->chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + sta->chain_signal_last[i] = signal; + ewma_add(&sta->chain_signal_avg[i], -signal); + } + } + /* * Change STA power saving mode only at the end of a frame * exchange sequence. diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11216bc13b27..a04c5671d7fd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -358,6 +358,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, do_posix_clock_monotonic_gettime(&uptime); sta->last_connected = uptime.tv_sec; ewma_init(&sta->avg_signal, 1024, 8); + for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) + ewma_init(&sta->chain_signal_avg[i], 1024, 8); if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index adc30045f99e..41c28b977f7c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -344,6 +344,11 @@ struct sta_info { int last_signal; struct ewma avg_signal; int last_ack_signal; + + u8 chains; + s8 chain_signal_last[IEEE80211_MAX_CHAINS]; + struct ewma chain_signal_avg[IEEE80211_MAX_CHAINS]; + /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; -- cgit From 55300a13d2ca1d59f659cf00b9d8dc93ea225882 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Tue, 23 Apr 2013 09:54:21 +0300 Subject: cfg80211: add 60GHz regulatory class Add regulatory class for 60GHz band, according to the last specification. Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- net/wireless/util.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index f5ad4d94ba88..b11052be09be 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1169,6 +1169,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class, case 84: *band = IEEE80211_BAND_2GHZ; return true; + case 180: + *band = IEEE80211_BAND_60GHZ; + return true; } return false; -- cgit From fb4e156886ce6e8309e912d8b370d192330d19d3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 28 Apr 2013 16:22:06 -0700 Subject: nl80211: Add generic netlink module alias for cfg80211/nl80211 To support auto-loading of wireless modules from netlink users, add module alias for nl80211 family. This also adds NL80211_GENL_NAME constant to define the "nl80211" netlink family name as part of uapi. Signed-off-by: Marcel Holtmann Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 + net/wireless/nl80211.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 84c9ad7e1dca..68f0c96c0565 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -34,6 +34,7 @@ MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); +MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME); /* RCU-protected (and cfg80211_mutex for writers) */ LIST_HEAD(cfg80211_rdev_list); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f687a8d0d026..9cdcd9ec3317 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -37,10 +37,10 @@ static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = "nl80211", /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ + .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, .netnsok = true, .pre_doit = nl80211_pre_doit, -- cgit From 04a161f4609dfa387313456fa7ea469fff12cc0d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 May 2013 09:35:35 +0200 Subject: mac80211: fix HT beacon-based channel switch handling When an HT AP is advertising channel switch in a beacon, it doesn't (and shouldn't, according to 802.11-2012 Table 8-20) include a secondary channel offset element. The only possible interpretation is that the previous secondary channel offset remains valid, so use that when switching channel based only on beacon information. VHT requires the Wide Bandwidth Channel Switch subelement to be present in the Channel Switch Wrapper element, so the code for that is probably ok (see 802.11ac Draft 4, 8.4.2.165.) Reported-by: Sujith Manoharan Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 29620bfc7a69..a8016c02a757 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1015,7 +1015,8 @@ static void ieee80211_chswitch_timer(unsigned long data) static void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - u64 timestamp, struct ieee802_11_elems *elems) + u64 timestamp, struct ieee802_11_elems *elems, + bool beacon) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1032,6 +1033,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; + const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; ASSERT_MGD_MTX(ifmgd); @@ -1048,11 +1050,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + ht_oper = elems->ht_operation; if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_40MHZ)) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; + /* only used for bandwidth here */ + ht_oper = NULL; } if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) @@ -1094,10 +1099,20 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } - if (sec_chan_offs) { + if (!beacon && sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; + } else if (beacon && ht_oper) { + secondary_channel_offset = + ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; } else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - /* if HT is enabled and the IE not present, it's still HT */ + /* + * If it's not a beacon, HT is enabled and the IE not present, + * it's 20 MHz, 802.11-2012 8.5.2.6: + * This element [the Secondary Channel Offset Element] is + * present when switching to a 40 MHz channel. It may be + * present when switching to a 20 MHz channel (in which + * case the secondary channel offset is set to SCN). + */ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; } @@ -2796,7 +2811,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->iflist_mtx); } - ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems); + ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, + elems, true); } @@ -3210,7 +3226,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, - &elems); + &elems, false); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { ies_len = skb->len - offsetof(struct ieee80211_mgmt, @@ -3232,7 +3248,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, - &elems); + &elems, false); } break; } -- cgit From 4325f6caad98c075b39f0eaaac6693a0dd43f646 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 13:09:08 +0200 Subject: wireless: move crypto constants to ieee80211.h mac80211 and the Intel drivers all define crypto constants, move them to ieee80211.h instead. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/aes_ccm.c | 6 ++--- net/mac80211/key.c | 24 +++++++++--------- net/mac80211/key.h | 15 ++--------- net/mac80211/rx.c | 12 ++++----- net/mac80211/wep.c | 48 ++++++++++++++++++----------------- net/mac80211/wpa.c | 68 ++++++++++++++++++++++++++------------------------ 6 files changed, 84 insertions(+), 89 deletions(-) (limited to 'net') diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 0785e95c9924..be7614b9ed27 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -85,7 +85,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, *cpos++ = *pos++ ^ e[i]; } - for (i = 0; i < CCMP_MIC_LEN; i++) + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) mic[i] = b[i] ^ s_0[i]; } @@ -123,7 +123,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, crypto_cipher_encrypt_one(tfm, a, a); } - for (i = 0; i < CCMP_MIC_LEN; i++) { + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) { if ((mic[i] ^ s_0[i]) != a[i]) return -1; } @@ -138,7 +138,7 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (!IS_ERR(tfm)) - crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN); + crypto_cipher_setkey(tfm, key, WLAN_KEY_LEN_CCMP); return tfm; } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 67059b88fea5..e39cc91d0cf1 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -335,12 +335,12 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, switch (cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: - key->conf.iv_len = WEP_IV_LEN; - key->conf.icv_len = WEP_ICV_LEN; + key->conf.iv_len = IEEE80211_WEP_IV_LEN; + key->conf.icv_len = IEEE80211_WEP_ICV_LEN; break; case WLAN_CIPHER_SUITE_TKIP: - key->conf.iv_len = TKIP_IV_LEN; - key->conf.icv_len = TKIP_ICV_LEN; + key->conf.iv_len = IEEE80211_TKIP_IV_LEN; + key->conf.icv_len = IEEE80211_TKIP_ICV_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS; i++) { key->u.tkip.rx[i].iv32 = @@ -352,13 +352,13 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, spin_lock_init(&key->u.tkip.txlock); break; case WLAN_CIPHER_SUITE_CCMP: - key->conf.iv_len = CCMP_HDR_LEN; - key->conf.icv_len = CCMP_MIC_LEN; + key->conf.iv_len = IEEE80211_CCMP_HDR_LEN; + key->conf.icv_len = IEEE80211_CCMP_MIC_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - for (j = 0; j < CCMP_PN_LEN; j++) + for (j = 0; j < IEEE80211_CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = - seq[CCMP_PN_LEN - j - 1]; + seq[IEEE80211_CCMP_PN_LEN - j - 1]; } /* * Initialize AES key state here as an optimization so that @@ -375,9 +375,9 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); if (seq) - for (j = 0; j < CMAC_PN_LEN; j++) + for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = - seq[CMAC_PN_LEN - j - 1]; + seq[IEEE80211_CMAC_PN_LEN - j - 1]; /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -740,13 +740,13 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; - memcpy(seq->ccmp.pn, pn, CCMP_PN_LEN); + memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; - memcpy(seq->aes_cmac.pn, pn, CMAC_PN_LEN); + memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; } } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index e8de3e6d7804..036d57e76a5e 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -19,17 +19,6 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 -#define WEP_IV_LEN 4 -#define WEP_ICV_LEN 4 -#define ALG_CCMP_KEY_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 -#define TKIP_IV_LEN 8 -#define TKIP_ICV_LEN 4 -#define CMAC_PN_LEN 6 - struct ieee80211_local; struct ieee80211_sub_if_data; struct sta_info; @@ -93,13 +82,13 @@ struct ieee80211_key { * frames and the last counter is used with Robust * Management frames. */ - u8 rx_pn[IEEE80211_NUM_TIDS + 1][CCMP_PN_LEN]; + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_CCMP_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCCMPReplays */ } ccmp; struct { atomic64_t tx_pn; - u8 rx_pn[CMAC_PN_LEN]; + u8 rx_pn[IEEE80211_CMAC_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 22e412b0767f..6e2c8c5236c4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1622,7 +1622,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) entry->ccmp = 1; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], - CCMP_PN_LEN); + IEEE80211_CCMP_PN_LEN); } return RX_QUEUED; } @@ -1641,21 +1641,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * (IEEE 802.11i, 8.3.3.4.5) */ if (entry->ccmp) { int i; - u8 pn[CCMP_PN_LEN], *rpn; + u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) return RX_DROP_UNUSABLE; - memcpy(pn, entry->last_pn, CCMP_PN_LEN); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { + memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); + for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { pn[i]++; if (pn[i]) break; } queue = rx->security_idx; rpn = rx->key->u.ccmp.rx_pn[queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN)) + if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; - memcpy(entry->last_pn, pn, CCMP_PN_LEN); + memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } skb_pull(rx->skb, ieee80211_hdrlen(fc)); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index c04d401dae92..6ee2b5863572 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -28,7 +28,7 @@ int ieee80211_wep_init(struct ieee80211_local *local) { /* start WEP IV from a random value */ - get_random_bytes(&local->wep_iv, WEP_IV_LEN); + get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN); local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_tx_tfm)) { @@ -98,20 +98,21 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || - skb_headroom(skb) < WEP_IV_LEN)) + if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN || + skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) return NULL; hdrlen = ieee80211_hdrlen(hdr->frame_control); - newhdr = skb_push(skb, WEP_IV_LEN); - memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen); + newhdr = skb_push(skb, IEEE80211_WEP_IV_LEN); + memmove(newhdr, newhdr + IEEE80211_WEP_IV_LEN, hdrlen); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return newhdr + hdrlen; - skb_set_network_header(skb, skb_network_offset(skb) + WEP_IV_LEN); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_WEP_IV_LEN); ieee80211_wep_get_iv(local, keylen, keyidx, newhdr + hdrlen); return newhdr + hdrlen; } @@ -125,8 +126,8 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, unsigned int hdrlen; hdrlen = ieee80211_hdrlen(hdr->frame_control); - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); } @@ -146,7 +147,7 @@ int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key, put_unaligned(icv, (__le32 *)(data + data_len)); crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_encrypt_one(tfm, data + i, data + i); return 0; @@ -172,7 +173,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, if (!iv) return -1; - len = skb->len - (iv + WEP_IV_LEN - skb->data); + len = skb->len - (iv + IEEE80211_WEP_IV_LEN - skb->data); /* Prepend 24-bit IV to RC4 key */ memcpy(rc4key, iv, 3); @@ -181,10 +182,10 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key, keylen); /* Add room for ICV */ - skb_put(skb, WEP_ICV_LEN); + skb_put(skb, IEEE80211_WEP_ICV_LEN); return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, - iv + WEP_IV_LEN, len); + iv + IEEE80211_WEP_IV_LEN, len); } @@ -201,11 +202,11 @@ int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key, return -1; crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_decrypt_one(tfm, data + i, data + i); crc = cpu_to_le32(~crc32_le(~0, data, data_len)); - if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0) + if (memcmp(&crc, data + data_len, IEEE80211_WEP_ICV_LEN) != 0) /* ICV mismatch */ return -1; @@ -237,10 +238,10 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, return -1; hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN) + if (skb->len < hdrlen + IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN) return -1; - len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN; + len = skb->len - hdrlen - IEEE80211_WEP_IV_LEN - IEEE80211_WEP_ICV_LEN; keyidx = skb->data[hdrlen + 3] >> 6; @@ -256,16 +257,16 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key->conf.key, key->conf.keylen); if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, - skb->data + hdrlen + WEP_IV_LEN, - len)) + skb->data + hdrlen + + IEEE80211_WEP_IV_LEN, len)) ret = -1; /* Trim ICV */ - skb_trim(skb, skb->len - WEP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_WEP_ICV_LEN); /* Remove IV */ - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); return ret; } @@ -305,13 +306,14 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { - if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + WEP_IV_LEN)) + if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + + IEEE80211_WEP_IV_LEN)) return RX_DROP_UNUSABLE; if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) rx->sta->wep_weak_iv_count++; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN)) + if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index c7c6d644486f..c9edfcb7a13b 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -62,10 +62,10 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) tail = MICHAEL_MIC_LEN; if (!info->control.hw_key) - tail += TKIP_ICV_LEN; + tail += IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return TX_DROP; key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; @@ -198,15 +198,16 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = TKIP_ICV_LEN; + tail = IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return -1; - pos = skb_push(skb, TKIP_IV_LEN); - memmove(pos, pos + TKIP_IV_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + TKIP_IV_LEN); + pos = skb_push(skb, IEEE80211_TKIP_IV_LEN); + memmove(pos, pos + IEEE80211_TKIP_IV_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_TKIP_IV_LEN); pos += hdrlen; /* the HW only needs room for the IV, but not the actual IV */ @@ -227,7 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; /* Add room for ICV */ - skb_put(skb, TKIP_ICV_LEN); + skb_put(skb, IEEE80211_TKIP_ICV_LEN); return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, key, skb, pos, len); @@ -290,11 +291,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - TKIP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ - memmove(skb->data + TKIP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, TKIP_IV_LEN); + memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_TKIP_IV_LEN); return RX_CONTINUE; } @@ -337,9 +338,9 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, else qos_tid = 0; - data_len = skb->len - hdrlen - CCMP_HDR_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN; if (encrypted) - data_len -= CCMP_MIC_LEN; + data_len -= IEEE80211_CCMP_MIC_LEN; /* First block, b_0 */ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ @@ -348,7 +349,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, */ b_0[1] = qos_tid | (mgmt << 4); memcpy(&b_0[2], hdr->addr2, ETH_ALEN); - memcpy(&b_0[8], pn, CCMP_PN_LEN); + memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); /* l(m) */ put_unaligned_be16(data_len, &b_0[14]); @@ -424,15 +425,16 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = CCMP_MIC_LEN; + tail = IEEE80211_CCMP_MIC_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < CCMP_HDR_LEN)) + skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN)) return -1; - pos = skb_push(skb, CCMP_HDR_LEN); - memmove(pos, pos + CCMP_HDR_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + CCMP_HDR_LEN); + pos = skb_push(skb, IEEE80211_CCMP_HDR_LEN); + memmove(pos, pos + IEEE80211_CCMP_HDR_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_CCMP_HDR_LEN); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && @@ -457,10 +459,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) return 0; - pos += CCMP_HDR_LEN; + pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, scratch, 0); ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len, - pos, skb_put(skb, CCMP_MIC_LEN)); + pos, skb_put(skb, IEEE80211_CCMP_MIC_LEN)); return 0; } @@ -490,7 +492,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - u8 pn[CCMP_PN_LEN]; + u8 pn[IEEE80211_CCMP_PN_LEN]; int data_len; int queue; @@ -500,12 +502,13 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(hdr)) return RX_CONTINUE; - data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - + IEEE80211_CCMP_MIC_LEN; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; if (status->flag & RX_FLAG_DECRYPTED) { - if (!pskb_may_pull(rx->skb, hdrlen + CCMP_HDR_LEN)) + if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } else { if (skb_linearize(rx->skb)) @@ -516,7 +519,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) queue = rx->security_idx; - if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { + if (memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } @@ -528,19 +531,20 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, scratch, - skb->data + hdrlen + CCMP_HDR_LEN, data_len, - skb->data + skb->len - CCMP_MIC_LEN, - skb->data + hdrlen + CCMP_HDR_LEN)) + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, + data_len, + skb->data + skb->len - IEEE80211_CCMP_MIC_LEN, + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } - memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN); + memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); /* Remove CCMP header and MIC */ - if (pskb_trim(skb, skb->len - CCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN)) return RX_DROP_UNUSABLE; - memmove(skb->data + CCMP_HDR_LEN, skb->data, hdrlen); - skb_pull(skb, CCMP_HDR_LEN); + memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_CCMP_HDR_LEN); return RX_CONTINUE; } -- cgit From bd500af223c9aed7083730b7044d53162065e418 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2013 21:09:46 +0200 Subject: mac80211: write memcpy differently for smatch There's no real difference between *array and array, but the former confuses smatch so write it differently. The generated code is exactly the same. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1f51bdfe574a..66989458f5ff 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -739,7 +739,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy, if (sset == ETH_SS_STATS) { sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats); - memcpy(data, *ieee80211_gstrings_sta_stats, sz_sta_stats); + memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats); } drv_get_et_strings(sdata, sset, &(data[sz_sta_stats])); } -- cgit From 7ade7036043e2e8e2831ae189ce5c248386062f1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 May 2013 11:37:30 +0200 Subject: cfg80211: use C99 initialisers to simplify code a bit Use C99 initialisers for the auth, deauth and disassoc requests to simplify the code. Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 0c7b7dd855f6..c21e32f9549c 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -264,7 +264,16 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *sae_data, int sae_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_auth_request req; + struct cfg80211_auth_request req = { + .ie = ie, + .ie_len = ie_len, + .sae_data = sae_data, + .sae_data_len = sae_data_len, + .auth_type = auth_type, + .key = key, + .key_len = key_len, + .key_idx = key_idx, + }; int err; ASSERT_WDEV_LOCK(wdev); @@ -277,18 +286,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ether_addr_equal(bssid, wdev->current_bss->pub.bssid)) return -EALREADY; - memset(&req, 0, sizeof(req)); - - req.ie = ie; - req.ie_len = ie_len; - req.sae_data = sae_data; - req.sae_data_len = sae_data_len; - req.auth_type = auth_type; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - req.key = key; - req.key_len = key_len; - req.key_idx = key_idx; if (!req.bss) return -ENOENT; @@ -480,7 +479,12 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_disassoc_request req; + struct cfg80211_disassoc_request req = { + .reason_code = reason, + .local_state_change = local_state_change, + .ie = ie, + .ie_len = ie_len, + }; ASSERT_WDEV_LOCK(wdev); @@ -490,11 +494,6 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) return -ENOTCONN; - memset(&req, 0, sizeof(req)); - req.reason_code = reason; - req.local_state_change = local_state_change; - req.ie = ie; - req.ie_len = ie_len; if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) req.bss = &wdev->current_bss->pub; else @@ -523,24 +522,21 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_deauth_request req; u8 bssid[ETH_ALEN]; + struct cfg80211_deauth_request req = { + .reason_code = WLAN_REASON_DEAUTH_LEAVING, + .bssid = bssid, + }; ASSERT_WDEV_LOCK(wdev); if (!rdev->ops->deauth) return; - memset(&req, 0, sizeof(req)); - req.reason_code = WLAN_REASON_DEAUTH_LEAVING; - req.ie = NULL; - req.ie_len = 0; - if (!wdev->current_bss) return; memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); - req.bssid = bssid; rdev_deauth(rdev, dev, &req); if (wdev->current_bss) { -- cgit From 6e16d90b5218307db805e6b3e0b06d3946eb8c4c Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Wed, 8 May 2013 11:45:59 -0700 Subject: cfg80211: Userspace may inform kernel of mesh auth method. Authentication takes place in userspace, but the beacon is generated in the kernel. Allow userspace to inform the kernel of the authentication method so the appropriate mesh config IE can be set prior to beacon generation when joining the MBSS. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/wireless/mesh.c | 1 + net/wireless/nl80211.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'net') diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0bb93f3061a4..9546ad210550 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -82,6 +82,7 @@ const struct mesh_setup default_mesh_setup = { .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, .path_metric = IEEE80211_PATH_METRIC_AIRTIME, + .auth_id = 0, /* open */ .ie = NULL, .ie_len = 0, .is_secure = false, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9cdcd9ec3317..5f10f7acfa06 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4672,6 +4672,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, @@ -4857,6 +4858,13 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, if (setup->is_secure) setup->user_mpm = true; + if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) { + if (!setup->user_mpm) + return -EINVAL; + setup->auth_id = + nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]); + } + return 0; } -- cgit From 0d4261ad5d0028b26cd88e645b4507eed8aab3f7 Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Wed, 8 May 2013 11:46:00 -0700 Subject: mac80211: enable Auth Protocol Identifier on mesh config. Previously the mesh_auth_id was disabled. Instead set the correct mesh authentication bit based on the mesh setup. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 1 + net/mac80211/mesh.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 66989458f5ff..eb4219051043 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1746,6 +1746,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; + ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6952760881c8..c13db9ad394b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -748,7 +748,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_configure_filter(local); ifmsh->mesh_cc_id = 0; /* Disabled */ - ifmsh->mesh_auth_id = 0; /* Disabled */ /* register sync ops from extensible synchronization framework */ ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id); ifmsh->adjusting_tbtt = false; -- cgit From ce85788846ec19dcb7bef0dcbcf83fb64630f426 Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Mon, 6 May 2013 17:17:04 +0300 Subject: mac80211: enable power save only if DTIM period is available Generally, the DTIM period is available after a beacon has been received, and if no beacon has been received enabling powersave is problematic anyway for synchronisation. Since some drivers may require the DTIM period for powersave, don't enable powersave until it becomes available in case the scan/association managed to not receive a beacon. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a8016c02a757..ef378b9a32ec 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1390,6 +1390,9 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL)) return false; + if (!sdata->vif.bss_conf.dtim_period) + return false; + rcu_read_lock(); sta = sta_info_get(sdata, mgd->bssid); if (sta) @@ -3126,6 +3129,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } changed |= BSS_CHANGED_DTIM_PERIOD; + ieee80211_recalc_ps_vif(sdata); } if (elems.erp_info) { -- cgit From d2cf43674e17ca1c16c68d46d987d2f17bf7c371 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 May 2013 19:25:55 +0000 Subject: tcp: speedup tcp_fixup_rcvbuf() tcp_fixup_rcvbuf() contains a loop to estimate initial socket rcv space needed for a given mss. With large MTU (like 64K on lo), we can loop ~500 times and consume a lot of cpu cycles. perf top of 200 concurrent netperf -t TCP_CRR 5.62% netperf [kernel.kallsyms] [k] tcp_init_buffer_space 1.71% netperf [kernel.kallsyms] [k] _raw_spin_lock 1.55% netperf [kernel.kallsyms] [k] kmem_cache_free 1.51% netperf [kernel.kallsyms] [k] tcp_transmit_skb 1.50% netperf [kernel.kallsyms] [k] tcp_ack Lets use a 100% factor, and remove the loop. 100% is needed anyway for tcp_adv_win_scale=1 default value, and is also the maximum factor. Refs: commit b49960a05e32 ("tcp: change tcp_adv_win_scale and tcp_rmem[2]") Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 08bbe6096528..b358e8c98607 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -360,9 +360,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) if (mss > 1460) icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < mss) - rcvmem += 128; + rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER); rcvmem *= icwnd; -- cgit From 57b354e66b67c4c72468a26d4313d1217ef32e17 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 16 May 2013 23:36:32 +0000 Subject: dev: remove duplicate 'skb->dev = dev' in dev_forward_skb() This was added by commit 59b9997baba5 (Revert "net: maintain namespace isolation between vlan and real device"). In fact, before the initial commit - the one that is reverted -, this statement was not present. 'skb->dev = dev' is already done in eth_type_trans(), which is call just after. Spotted-by: Alain Ritoux Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fc1e289397f5..18e9730cc4be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1629,7 +1629,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) return NET_RX_DROP; } skb->skb_iif = 0; - skb->dev = dev; skb_dst_drop(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; -- cgit From caeaba79009c2ee858c3b2bf8caf922cd719fead Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 16 May 2013 22:32:00 +0000 Subject: ipv6: add support of peer address This patch adds the support of peer address for IPv6. For example, it is possible to specify the remote end of a 6inY tunnel. This was already possible in IPv4: ip addr add ip1 peer ip2 dev dev1 The peer address is specified with IFA_ADDRESS and the local address with IFA_LOCAL (like explained in include/uapi/linux/if_addr.h). Note that the API is not changed, because before this patch, it was not possible to specify two different addresses in IFA_LOCAL and IFA_REMOTE. There is a small change for the dump: if the peer is different from ::, IFA_ADDRESS will contain the peer address instead of the local address. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 64 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1ab6ab29a55..d684d23bc027 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2402,6 +2402,7 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, + const struct in6_addr *peer_pfx, unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { @@ -2457,6 +2458,8 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; + if (peer_pfx) + ifp->peer_addr = *peer_pfx; spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, @@ -2526,7 +2529,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, ireq.ifr6_prefixlen, IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); @@ -3610,18 +3613,20 @@ restart: rcu_read_unlock_bh(); } -static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) +static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, + struct in6_addr **peer_pfx) { struct in6_addr *pfx = NULL; + *peer_pfx = NULL; + if (addr) pfx = nla_data(addr); if (local) { if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) - pfx = NULL; - else - pfx = nla_data(local); + *peer_pfx = pfx; + pfx = nla_data(local); } return pfx; @@ -3639,7 +3644,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3647,7 +3652,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3705,7 +3710,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; struct inet6_ifaddr *ifa; struct net_device *dev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; @@ -3717,7 +3722,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3745,7 +3750,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) * It would be best to check for !NLM_F_CREATE here but * userspace alreay relies on not having to provide this. */ - return inet6_addr_add(net, ifm->ifa_index, pfx, + return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, preferred_lft, valid_lft); } @@ -3802,6 +3807,7 @@ static inline int rt_scope(int ifa_scope) static inline int inet6_ifaddr_msgsize(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16) /* IFA_LOCAL */ + nla_total_size(16) /* IFA_ADDRESS */ + nla_total_size(sizeof(struct ifa_cacheinfo)); } @@ -3840,13 +3846,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } + if (ipv6_addr_type(&ifa->peer_addr) != IPV6_ADDR_ANY) { + if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || + nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + goto error; + } else + if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + goto error; + + if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + goto error; return nlmsg_end(skb, nlh); + +error: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, @@ -4046,7 +4061,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *addr = NULL; + struct in6_addr *addr = NULL, *peer; struct net_device *dev = NULL; struct inet6_ifaddr *ifa; struct sk_buff *skb; @@ -4056,7 +4071,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); if (addr == NULL) { err = -EINVAL; goto errout; @@ -4564,11 +4579,26 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); + if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) + addrconf_prefix_route(&ifp->peer_addr, 128, + ifp->idev->dev, 0, 0); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); + if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) { + struct rt6_info *rt; + struct net_device *dev = ifp->idev->dev; + + rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL, + dev->ifindex, 1); + if (rt) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } + } dst_hold(&ifp->rt->dst); if (ip6_del_rt(ifp->rt)) -- cgit From 3e59cb0ddfd2c59991f38e89352ad8a3c71b2374 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 17 May 2013 13:45:05 +0000 Subject: tcp: remove bad timeout logic in fast recovery tcp_timeout_skb() was intended to trigger fast recovery on timeout, unfortunately in reality it often causes spurious retransmission storms during fast recovery. The particular sign is a fast retransmit over the highest sacked sequence (SND.FACK). Currently the RTO timer re-arming (as in RFC6298) offers a nice cushion to avoid spurious timeout: when SND.UNA advances the sender re-arms RTO and extends the timeout by icsk_rto. The sender does not offset the time elapsed since the packet at SND.UNA was sent. But if the next (DUP)ACK arrives later than ~RTTVAR and triggers tcp_fastretrans_alert(), then tcp_timeout_skb() will mark any packet sent before the icsk_rto interval lost, including one that's above the highest sacked sequence. Most likely a large part of scorebard will be marked. If most packets are not lost then the subsequent DUPACKs with new SACK blocks will cause the sender to continue to retransmit packets beyond SND.FACK spuriously. Even if only one packet is lost the sender may falsely retransmit almost the entire window. The situation becomes common in the world of bufferbloat: the RTT continues to grow as the queue builds up but RTTVAR remains small and close to the minimum 200ms. If a data packet is lost and the DUPACK triggered by the next data packet is slightly delayed, then a spurious retransmission storm forms. As the original comment on tcp_timeout_skb() suggests: the usefulness of this feature is questionable. It also wastes cycles walking the sack scoreboard and is actually harmful because of false recovery. It's time to remove this. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 65 +--------------------------------------------------- 1 file changed, 1 insertion(+), 64 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b358e8c98607..d7d369428ae4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1255,8 +1255,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = prev; - if (skb == tp->scoreboard_skb_hint) - tp->scoreboard_skb_hint = prev; if (skb == tp->lost_skb_hint) { tp->lost_skb_hint = prev; tp->lost_cnt_hint -= tcp_skb_pcount(prev); @@ -1964,20 +1962,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) return true; } -static inline int tcp_skb_timedout(const struct sock *sk, - const struct sk_buff *skb) -{ - return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; -} - -static inline int tcp_head_timedout(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - - return tp->packets_out && - tcp_skb_timedout(sk, tcp_write_queue_head(sk)); -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@ -2084,12 +2068,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) if (tcp_dupack_heuristics(tp) > tp->reordering) return true; - /* Trick#3 : when we use RFC2988 timer restart, fast - * retransmit can be triggered by timeout of queue head. - */ - if (tcp_is_fack(tp) && tcp_head_timedout(sk)) - return true; - /* Trick#4: It is still not OK... But will it be useful to delay * recovery more? */ @@ -2126,44 +2104,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) return false; } -/* New heuristics: it is possible only after we switched to restart timer - * each time when something is ACKed. Hence, we can detect timed out packets - * during fast retransmit without falling to slow start. - * - * Usefulness of this as is very questionable, since we should know which of - * the segments is the next to timeout which is relatively expensive to find - * in general case unless we add some data structure just for that. The - * current approach certainly won't find the right one too often and when it - * finally does find _something_ it usually marks large part of the window - * right away (because a retransmission with a larger timestamp blocks the - * loop from advancing). -ij - */ -static void tcp_timeout_skbs(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) - return; - - skb = tp->scoreboard_skb_hint; - if (tp->scoreboard_skb_hint == NULL) - skb = tcp_write_queue_head(sk); - - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (!tcp_skb_timedout(sk, skb)) - break; - - tcp_skb_mark_lost(tp, skb); - } - - tp->scoreboard_skb_hint = skb; - - tcp_verify_left_out(tp); -} - /* Detect loss in event "A" above by marking head of queue up as lost. * For FACK or non-SACK(Reno) senders, the first "packets" number of segments * are considered lost. For RFC3517 SACK, a segment is considered lost if it @@ -2249,8 +2189,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) else if (fast_rexmit) tcp_mark_head_lost(sk, 1, 1); } - - tcp_timeout_skbs(sk); } /* CWND moderation, preventing bursts due to too big ACKs @@ -2842,7 +2780,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, fast_rexmit = 1; } - if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) + if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); @@ -3075,7 +3013,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tp->scoreboard_skb_hint = NULL; if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = NULL; if (skb == tp->lost_skb_hint) -- cgit From 99bbc70741903c063b3ccad90a3e06fc55df9245 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 May 2013 04:02:32 +0000 Subject: rps: selective flow shedding during softnet overflow A cpu executing the network receive path sheds packets when its input queue grows to netdev_max_backlog. A single high rate flow (such as a spoofed source DoS) can exceed a single cpu processing rate and will degrade throughput of other flows hashed onto the same cpu. This patch adds a more fine grained hashtable. If the netdev backlog is above a threshold, IRQ cpus track the ratio of total traffic of each flow (using 4096 buckets, configurable). The ratio is measured by counting the number of packets per flow over the last 256 packets from the source cpu. Any flow that occupies a large fraction of this (set at 50%) will see packet drop while above the threshold. Tested: Setup is a muli-threaded UDP echo server with network rx IRQ on cpu0, kernel receive (RPS) on cpu0 and application threads on cpus 2--7 each handling 20k req/s. Throughput halves when hit with a 400 kpps antagonist storm. With this patch applied, antagonist overload is dropped and the server processes its complete load. The patch is effective when kernel receive processing is the bottleneck. The above RPS scenario is a extreme, but the same is reached with RFS and sufficient kernel processing (iptables, packet socket tap, ..). Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/Kconfig | 12 ++++++ net/core/dev.c | 48 ++++++++++++++++++++- net/core/net-procfs.c | 16 ++++++- net/core/sysctl_net_core.c | 104 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 2ddc9046868e..08de901415ee 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -259,6 +259,18 @@ config BPF_JIT packet sniffing (libpcap/tcpdump). Note : Admin should enable this feature changing /proc/sys/net/core/bpf_jit_enable +config NET_FLOW_LIMIT + boolean + depends on RPS + default y + ---help--- + The network stack has to drop packets when a receive processing CPU's + backlog reaches netdev_max_backlog. If a few out of many active flows + generate the vast majority of load, drop their traffic earlier to + maintain capacity for the other flows. This feature provides servers + with many clients some protection against DoS by a single (spoofed) + flow that greatly exceeds average workload. + menu "Network testing" config NET_PKTGEN diff --git a/net/core/dev.c b/net/core/dev.c index 18e9730cc4be..7229bc30e509 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3064,6 +3064,46 @@ static int rps_ipi_queued(struct softnet_data *sd) return 0; } +#ifdef CONFIG_NET_FLOW_LIMIT +int netdev_flow_limit_table_len __read_mostly = (1 << 12); +#endif + +static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) +{ +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + struct softnet_data *sd; + unsigned int old_flow, new_flow; + + if (qlen < (netdev_max_backlog >> 1)) + return false; + + sd = &__get_cpu_var(softnet_data); + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) { + new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + old_flow = fl->history[fl->history_head]; + fl->history[fl->history_head] = new_flow; + + fl->history_head++; + fl->history_head &= FLOW_LIMIT_HISTORY - 1; + + if (likely(fl->buckets[old_flow])) + fl->buckets[old_flow]--; + + if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { + fl->count++; + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); +#endif + return false; +} + /* * enqueue_to_backlog is called to queue an skb to a per CPU backlog * queue (may be a remote CPU queue). @@ -3073,13 +3113,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, { struct softnet_data *sd; unsigned long flags; + unsigned int qlen; sd = &per_cpu(softnet_data, cpu); local_irq_save(flags); rps_lock(sd); - if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { + qlen = skb_queue_len(&sd->input_pkt_queue); + if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); @@ -6269,6 +6311,10 @@ static int __init net_dev_init(void) sd->backlog.weight = weight_p; sd->backlog.gro_list = NULL; sd->backlog.gro_count = 0; + +#ifdef CONFIG_NET_FLOW_LIMIT + sd->flow_limit = NULL; +#endif } dev_boot_phase = 0; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 569d355fec3e..2bf83299600a 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; + unsigned int flow_limit_count = 0; - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) + flow_limit_count = fl->count; + rcu_read_unlock(); +#endif + + seq_printf(seq, + "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); + sd->cpu_collision, sd->received_rps, flow_limit_count); return 0; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfdb46ab3a7f..741db5fc7806 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -87,6 +87,96 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, } #endif /* CONFIG_RPS */ +#ifdef CONFIG_NET_FLOW_LIMIT +static DEFINE_MUTEX(flow_limit_update_mutex); + +static int flow_limit_cpu_sysctl(ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct sd_flow_limit *cur; + struct softnet_data *sd; + cpumask_var_t mask; + int i, len, ret = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + if (write) { + ret = cpumask_parse_user(buffer, *lenp, mask); + if (ret) + goto done; + + mutex_lock(&flow_limit_update_mutex); + len = sizeof(*cur) + netdev_flow_limit_table_len; + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + cur = rcu_dereference_protected(sd->flow_limit, + lockdep_is_held(&flow_limit_update_mutex)); + if (cur && !cpumask_test_cpu(i, mask)) { + RCU_INIT_POINTER(sd->flow_limit, NULL); + synchronize_rcu(); + kfree(cur); + } else if (!cur && cpumask_test_cpu(i, mask)) { + cur = kzalloc(len, GFP_KERNEL); + if (!cur) { + /* not unwinding previous changes */ + ret = -ENOMEM; + goto write_unlock; + } + cur->num_buckets = netdev_flow_limit_table_len; + rcu_assign_pointer(sd->flow_limit, cur); + } + } +write_unlock: + mutex_unlock(&flow_limit_update_mutex); + } else { + if (*ppos || !*lenp) { + *lenp = 0; + goto done; + } + + cpumask_clear(mask); + rcu_read_lock(); + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + if (rcu_dereference(sd->flow_limit)) + cpumask_set_cpu(i, mask); + } + rcu_read_unlock(); + + len = cpumask_scnprintf(buffer, *lenp, mask); + *lenp = len + 1; + *ppos += len + 1; + } + +done: + free_cpumask_var(mask); + return ret; +} + +static int flow_limit_table_len_sysctl(ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + unsigned int old, *ptr; + int ret; + + mutex_lock(&flow_limit_update_mutex); + + ptr = table->data; + old = *ptr; + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (!ret && write && !is_power_of_2(*ptr)) { + *ptr = old; + ret = -EINVAL; + } + + mutex_unlock(&flow_limit_update_mutex); + return ret; +} +#endif /* CONFIG_NET_FLOW_LIMIT */ + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -180,6 +270,20 @@ static struct ctl_table net_core_table[] = { .proc_handler = rps_sock_flow_sysctl }, #endif +#ifdef CONFIG_NET_FLOW_LIMIT + { + .procname = "flow_limit_cpu_bitmap", + .mode = 0644, + .proc_handler = flow_limit_cpu_sysctl + }, + { + .procname = "flow_limit_table_len", + .data = &netdev_flow_limit_table_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = flow_limit_table_len_sysctl + }, +#endif /* CONFIG_NET_FLOW_LIMIT */ #endif /* CONFIG_NET */ { .procname = "netdev_budget", -- cgit From 71cea17ed39fdf1c0634f530ddc6a2c2fc601c2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 May 2013 06:52:26 +0000 Subject: tcp: md5: remove spinlock usage in fast path TCP md5 code uses per cpu variables but protects access to them with a shared spinlock, which is a contention point. [ tcp_md5sig_pool_lock is locked twice per incoming packet ] Makes things much simpler, by allocating crypto structures once, first time a socket needs md5 keys, and not deallocating them as they are really small. Next step would be to allow crypto allocations being done in a NUMA aware way. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 98 +++++++++++------------------------------------- net/ipv4/tcp_ipv4.c | 10 +---- net/ipv4/tcp_minisocks.c | 6 +-- 3 files changed, 24 insertions(+), 90 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dcb116dde216..53d9c120fbb8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3095,9 +3095,8 @@ int tcp_gro_complete(struct sk_buff *skb) EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG -static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; -static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_MUTEX(tcp_md5sig_mutex); static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { @@ -3112,30 +3111,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) free_percpu(pool); } -void tcp_free_md5sig_pool(void) -{ - struct tcp_md5sig_pool __percpu *pool = NULL; - - spin_lock_bh(&tcp_md5sig_pool_lock); - if (--tcp_md5sig_users == 0) { - pool = tcp_md5sig_pool; - tcp_md5sig_pool = NULL; - } - spin_unlock_bh(&tcp_md5sig_pool_lock); - if (pool) - __tcp_free_md5sig_pool(pool); -} -EXPORT_SYMBOL(tcp_free_md5sig_pool); - -static struct tcp_md5sig_pool __percpu * -__tcp_alloc_md5sig_pool(struct sock *sk) +static void __tcp_alloc_md5sig_pool(void) { int cpu; struct tcp_md5sig_pool __percpu *pool; pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) - return NULL; + return; for_each_possible_cpu(cpu) { struct crypto_hash *hash; @@ -3146,53 +3129,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk) per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } - return pool; + /* before setting tcp_md5sig_pool, we must commit all writes + * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + */ + smp_wmb(); + tcp_md5sig_pool = pool; + return; out_free: __tcp_free_md5sig_pool(pool); - return NULL; } -struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +bool tcp_alloc_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *pool; - bool alloc = false; - -retry: - spin_lock_bh(&tcp_md5sig_pool_lock); - pool = tcp_md5sig_pool; - if (tcp_md5sig_users++ == 0) { - alloc = true; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } else if (!pool) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - cpu_relax(); - goto retry; - } else - spin_unlock_bh(&tcp_md5sig_pool_lock); - - if (alloc) { - /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool __percpu *p; - - p = __tcp_alloc_md5sig_pool(sk); - spin_lock_bh(&tcp_md5sig_pool_lock); - if (!p) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - return NULL; - } - pool = tcp_md5sig_pool; - if (pool) { - /* oops, it has already been assigned. */ - spin_unlock_bh(&tcp_md5sig_pool_lock); - __tcp_free_md5sig_pool(p); - } else { - tcp_md5sig_pool = pool = p; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } + if (unlikely(!tcp_md5sig_pool)) { + mutex_lock(&tcp_md5sig_mutex); + + if (!tcp_md5sig_pool) + __tcp_alloc_md5sig_pool(); + + mutex_unlock(&tcp_md5sig_mutex); } - return pool; + return tcp_md5sig_pool != NULL; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -3209,28 +3166,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) struct tcp_md5sig_pool __percpu *p; local_bh_disable(); - - spin_lock(&tcp_md5sig_pool_lock); - p = tcp_md5sig_pool; - if (p) - tcp_md5sig_users++; - spin_unlock(&tcp_md5sig_pool_lock); - + p = ACCESS_ONCE(tcp_md5sig_pool); if (p) - return this_cpu_ptr(p); + return __this_cpu_ptr(p); local_bh_enable(); return NULL; } EXPORT_SYMBOL(tcp_get_md5sig_pool); -void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); - tcp_free_md5sig_pool(); -} -EXPORT_SYMBOL(tcp_put_md5sig_pool); - int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, const struct tcphdr *th) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 719652305a29..d20ede0c9593 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1026,7 +1026,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp); if (!key) return -ENOMEM; - if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + if (!tcp_alloc_md5sig_pool()) { sock_kfree_s(sk, key, sizeof(*key)); return -ENOMEM; } @@ -1044,9 +1044,7 @@ EXPORT_SYMBOL(tcp_md5_do_add); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -1054,10 +1052,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); - if (hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); return 0; } EXPORT_SYMBOL(tcp_md5_do_del); @@ -1071,8 +1065,6 @@ static void tcp_clear_md5_list(struct sock *sk) md5sig = rcu_dereference_protected(tp->md5sig_info, 1); - if (!hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0f0178827259..ab1c08658528 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -317,7 +317,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) key = tp->af_specific->md5_lookup(sk, sk); if (key != NULL) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL) + if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) BUG(); } } while (0); @@ -358,10 +358,8 @@ void tcp_twsk_destructor(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) { - tcp_free_md5sig_pool(); + if (twsk->tw_md5_key) kfree_rcu(twsk->tw_md5_key, rcu); - } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); -- cgit From 1c8ad5bfa2be5025b0c81e3c2decd0574d453ab1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 21 May 2013 21:52:54 +0000 Subject: bridge: use the bridge IP addr as source addr for querier Quote from Adam: "If it is believed that the use of 0.0.0.0 as the IP address is what is causing strange behaviour on other devices then is there a good reason that a bridge rather than a router shouldn't be the active querier? If not then using the bridge IP address and having the querier enabled by default may be a reasonable solution (provided that our querier obeys the election rules and shuts up if it sees a query from a lower IP address that isn't 0.0.0.0). Just because a device is the elected querier for IGMP doesn't appear to mean it is required to perform any other routing functions." And introduce a new troggle for it, as suggested by Herbert. Suggested-by: Adam Baker Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Adam Baker Signed-off-by: Cong Wang Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 ++++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_br.c | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 81f2389f78eb..247514793101 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_IPV6) #include @@ -381,7 +382,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; - iph->saddr = 0; + iph->saddr = br->multicast_query_use_ifaddr ? + inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; @@ -1618,6 +1620,7 @@ void br_multicast_init(struct net_bridge *br) br->multicast_router = 1; br->multicast_querier = 0; + br->multicast_query_use_ifaddr = 0; br->multicast_last_member_count = 2; br->multicast_startup_query_count = 2; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d2c043a857b6..e260710a01d4 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -249,6 +249,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; + u8 multicast_query_use_ifaddr:1; u32 hash_elasticity; u32 hash_max; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8baa9c08e1a4..394bb96b6087 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -375,6 +375,31 @@ static ssize_t store_multicast_snooping(struct device *d, static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, show_multicast_snooping, store_multicast_snooping); +static ssize_t show_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); +} + +static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_use_ifaddr = !!val; + return 0; +} + +static ssize_t +store_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_use_ifaddr); +} +static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, + show_multicast_query_use_ifaddr, + store_multicast_query_use_ifaddr); + static ssize_t show_multicast_querier(struct device *d, struct device_attribute *attr, char *buf) @@ -734,6 +759,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_router.attr, &dev_attr_multicast_snooping.attr, &dev_attr_multicast_querier.attr, + &dev_attr_multicast_query_use_ifaddr.attr, &dev_attr_hash_elasticity.attr, &dev_attr_hash_max.attr, &dev_attr_multicast_last_member_count.attr, -- cgit From 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 21 May 2013 21:52:55 +0000 Subject: bridge: only expire the mdb entry when query is received Currently we arm the expire timer when the mdb entry is added, however, this causes problem when there is no querier sent out after that. So we should only arm the timer when a corresponding query is received, as suggested by Herbert. And he also mentioned "if there is no querier then group subscriptions shouldn't expire. There has to be at least one querier in the network for this thing to work. Otherwise it just degenerates into a non-snooping switch, which is OK." Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Adam Baker Signed-off-by: Cong Wang Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 39 ++++++++++++--------------------------- net/bridge/br_private.h | 1 + 2 files changed, 13 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 247514793101..40bda804fbd9 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -617,8 +617,6 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, - (unsigned long)mp); hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -656,7 +654,6 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); @@ -671,7 +668,6 @@ static int br_multicast_add_group(struct net_bridge *br, if (!port) { mp->mglist = true; - mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -679,7 +675,7 @@ static int br_multicast_add_group(struct net_bridge *br, (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->port == port) - goto found; + goto out; if ((unsigned long)p->port < (unsigned long)port) break; } @@ -690,8 +686,6 @@ static int br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); -found: - mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; @@ -1131,6 +1125,10 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; + setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && @@ -1205,6 +1203,10 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; + setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? @@ -1263,7 +1265,7 @@ static void br_multicast_leave_group(struct net_bridge *br, call_rcu_bh(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1275,30 +1277,12 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && + if (mp->mglist && mp->timer_armed && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } - - goto out; - } - - for (p = mlock_dereference(mp->ports, br); - p != NULL; - p = mlock_dereference(p->next, br)) { - if (p->port != port) - continue; - - if (!hlist_unhashed(&p->mglist) && - (timer_pending(&p->timer) ? - time_after(p->timer.expires, time) : - try_to_del_timer_sync(&p->timer) >= 0)) { - mod_timer(&p->timer, time); - } - - break; } out: @@ -1674,6 +1658,7 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); + mp->timer_armed = false; call_rcu_bh(&mp->rcu, br_multicast_free_group); } } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e260710a01d4..1b0ac95a5c37 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -112,6 +112,7 @@ struct net_bridge_mdb_entry struct timer_list timer; struct br_ip addr; bool mglist; + bool timer_armed; }; struct net_bridge_mdb_htable -- cgit From 6b7df111ece130fa979a0c4f58e53674c1e47d3e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 21 May 2013 21:52:56 +0000 Subject: bridge: send query as soon as leave is received Continue sending queries when leave is received if the user marks it as a querier. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Adam Baker Signed-off-by: Cong Wang Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 40bda804fbd9..37a467697967 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1250,6 +1250,32 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (br->multicast_querier && + !timer_pending(&br->multicast_querier_timer)) { + __br_multicast_send_query(br, port, &mp->addr); + + time = jiffies + br->multicast_last_member_count * + br->multicast_last_member_interval; + mod_timer(port ? &port->multicast_query_timer : + &br->multicast_query_timer, time); + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } + } + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; -- cgit From 1cdbcb7957cf9e5f841dbcde9b38fd18a804208b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 19 May 2013 15:46:49 +0000 Subject: net: Loosen constraints for recalculating checksum in skb_segment() This is a generic solution to resolve a specific problem that I have observed. If the encapsulation of an skb changes then ability to offload checksums may also change. In particular it may be necessary to perform checksumming in software. An example of such a case is where a non-GRE packet is received but is to be encapsulated and transmitted as GRE. Another example relates to my proposed support for for packets that are non-MPLS when received but MPLS when transmitted. The cost of this change is that the value of the csum variable may be checked when it previously was not. In the case where the csum variable is true this is pure overhead. In the case where the csum variable is false it leads to software checksumming, which I believe also leads to correct checksums in transmitted packets for the cases described above. Further analysis: This patch relies on the return value of can_checksum_protocol() being correct and in turn the return value of skb_network_protocol(), used to provide the protocol parameter of can_checksum_protocol(), being correct. It also relies on the features passed to skb_segment() and in turn to can_checksum_protocol() being correct. I believe that this problem has not been observed for VLANs because it appears that almost all drivers, the exception being xgbe, set vlan_features such that that the checksum offload support for VLAN packets is greater than or equal to that of non-VLAN packets. I wonder if the code in xgbe may be an oversight and the hardware does support checksumming of VLAN packets. If so it may be worth updating the vlan_features of the driver as this patch will force such checksums to be performed in software rather than hardware. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index af9185d0be6a..d6298914f4e7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2853,7 +2853,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) - continue; + goto perform_csum_check; if (!sg) { nskb->ip_summed = CHECKSUM_NONE; @@ -2917,6 +2917,7 @@ skip_fraglist: nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; +perform_csum_check: if (!csum) { nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); -- cgit From e43ac79a4bc6ca90de4ba10983b4ca39cd215b4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 May 2013 08:16:46 +0000 Subject: sch_tbf: segment too big GSO packets If a GSO packet has a length above tbf burst limit, the packet is currently silently dropped. Current way to handle this is to set the device in non GSO/TSO mode, or setting high bursts, and its sub optimal. We can actually segment too big GSO packets, and send individual segments as tbf parameters allow, allowing for better interoperability. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Jiri Pirko Cc: Jamal Hadi Salim Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c8388f3c3426..38008b0980d9 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -116,14 +116,57 @@ struct tbf_sched_data { struct qdisc_watchdog watchdog; /* Watchdog timer */ }; + +/* GSO packet is too big, segment it so that tbf can transmit + * each segment in time + */ +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct tbf_sched_data *q = qdisc_priv(sch); + struct sk_buff *segs, *nskb; + netdev_features_t features = netif_skb_features(skb); + int ret, nb; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) + return qdisc_reshape_fail(skb, sch); + + nb = 0; + while (segs) { + nskb = segs->next; + segs->next = NULL; + if (likely(segs->len <= q->max_size)) { + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); + } else { + ret = qdisc_reshape_fail(skb, sch); + } + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + } else { + nb++; + } + segs = nskb; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_decrease_qlen(sch, 1 - nb); + consume_skb(skb); + return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; +} + static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (qdisc_pkt_len(skb) > q->max_size) + if (qdisc_pkt_len(skb) > q->max_size) { + if (skb_is_gso(skb)) + return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); - + } ret = qdisc_enqueue(skb, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) -- cgit From 7996c799ae329fab1b9c8d475fd08883f0499ed9 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 May 2013 05:41:06 +0000 Subject: ipv6: use ipv6_addr_any() helper ipv6_addr_any() is a faster way to determine if an addr is ipv6 any addr, no need to compute the addr type. Cc: Nicolas Dichtel Cc: Hideaki YOSHIFUJI Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d684d23bc027..e05269647c2b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3846,7 +3846,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } - if (ipv6_addr_type(&ifa->peer_addr) != IPV6_ADDR_ANY) { + if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) goto error; @@ -4579,7 +4579,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); - if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) + if (!ipv6_addr_any(&ifp->peer_addr)) addrconf_prefix_route(&ifp->peer_addr, 128, ifp->idev->dev, 0, 0); break; @@ -4587,7 +4587,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); - if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) { + if (!ipv6_addr_any(&ifp->peer_addr)) { struct rt6_info *rt; struct net_device *dev = ifp->idev->dev; -- cgit From 8892475386e819aa50856947948c546ccc964d96 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 May 2013 05:52:22 +0000 Subject: ipv6: use ipv6_addr_scope() helper ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK could be replaced by ipv6_addr_scope(), which is slightly faster. Cc: Hideaki YOSHIFUJI Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e05269647c2b..432e084b6b62 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1126,8 +1126,7 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, + ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr), addr_flags) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); -- cgit From 27e7190efd5b2f728686a8293af6d9bd34c4e562 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 May 2013 11:10:57 +0000 Subject: netfilter: xt_CT: optimize XT_CT_NOTRACK The percpu untracked ct are not currently used for XT_CT_NOTRACK. xt_ct_tg_check()/xt_ct_target() provides a single ct. Thats not optimal as the ct->ct_general.use cache line will bounce among cpus. Use the intended [1] thing : xt_ct_target() should select the percpu object. [1] Refs : commit 5bfddbd46a95c97 ("netfilter: nf_conntrack: IPS_UNTRACKED bit") commit b3c5163fe0193a7 ("netfilter: nf_conntrack: per_cpu untracking") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index a60261cb0e80..da35ac06a975 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -26,6 +26,9 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (skb->nfct != NULL) return XT_CONTINUE; + /* special case the untracked ct : we want the percpu object */ + if (!ct) + ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); skb->nfct = &ct->ct_general; skb->nfctinfo = IP_CT_NEW; @@ -186,8 +189,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, int ret = -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); + ct = NULL; goto out; } @@ -311,7 +313,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (!nf_ct_is_untracked(ct)) { + if (ct && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); @@ -319,8 +321,8 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, nf_ct_l3proto_module_put(par->family); xt_ct_destroy_timeout(ct); + nf_ct_put(info->ct); } - nf_ct_put(info->ct); } static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) -- cgit From 00028aa37098168048728acc32ab0206687f2920 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 May 2013 11:01:06 +0000 Subject: netfilter: xt_socket: use IP early demux With IP early demux added in linux-3.6, we perform TCP lookup in IP layer before iptables hooks. We can avoid doing a second lookup in xt_socket. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 63b2bdb59e95..02704245710e 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -107,7 +107,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -155,9 +155,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } #endif - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; @@ -173,7 +175,8 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -260,7 +263,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); @@ -291,9 +294,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) return false; } - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; @@ -309,7 +314,8 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; -- cgit From 8bc14d25ffb9dfc242d3a877bb4fe683adb27692 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 16 May 2013 22:07:22 +0000 Subject: bridge: netfilter: using strlcpy() instead of strncpy() 'name' has already set all zero when it is defined, so not need let strncpy() to pad it again. 'name' is a string, better always let is NUL terminated, so use strlcpy() instead of strncpy(). Signed-off-by: Chen Gang Acked-by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3d110c4fc787..ac7802428384 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1339,7 +1339,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, /* ebtables expects 32 bytes long names but xt_match names are 29 bytes long. Copy 29 bytes and fill remaining bytes with zeroes. */ - strncpy(name, m->u.match->name, sizeof(name)); + strlcpy(name, m->u.match->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1351,7 +1351,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, char __user *hlp = ubase + ((char *)w - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; - strncpy(name, w->u.watcher->name, sizeof(name)); + strlcpy(name, w->u.watcher->name, sizeof(name)); if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1377,7 +1377,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); if (ret != 0) return ret; - strncpy(name, t->u.target->name, sizeof(name)); + strlcpy(name, t->u.target->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; -- cgit From 6d11cfdba52af08b889fd6d3ee4212930493eb38 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 May 2013 22:42:36 +0000 Subject: netfilter: don't panic on error while walking through the init path Don't panic if we hit an error while adding the nf_log or pernet netfilter support, just bail out. Signed-off-by: Pablo Neira Ayuso Acked-by: Gao feng --- net/netfilter/core.c | 21 +++++++++++++++------ net/netfilter/nf_log.c | 5 +---- net/socket.c | 4 +++- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 07c865a31a3d..300539db7bb1 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -302,17 +302,26 @@ static struct pernet_operations netfilter_net_ops = { .exit = netfilter_net_exit, }; -void __init netfilter_init(void) +int __init netfilter_init(void) { - int i, h; + int i, h, ret; + for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } - if (register_pernet_subsys(&netfilter_net_ops) < 0) - panic("cannot create netfilter proc entry"); + ret = register_pernet_subsys(&netfilter_net_ops); + if (ret < 0) + goto err; + + ret = netfilter_log_init(); + if (ret < 0) + goto err_pernet; - if (netfilter_log_init() < 0) - panic("cannot initialize nf_log"); + return 0; +err_pernet: + unregister_pernet_subsys(&netfilter_net_ops); +err: + return ret; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 388656d5a9ec..bd5474adcabc 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -368,10 +368,7 @@ static int __net_init nf_log_net_init(struct net *net) return 0; out_sysctl: - /* For init_net: errors will trigger panic, don't unroll on error. */ - if (!net_eq(net, &init_net)) - remove_proc_entry("nf_log", net->nf.proc_netfilter); - + remove_proc_entry("nf_log", net->nf.proc_netfilter); return ret; } diff --git a/net/socket.c b/net/socket.c index 6b94633ca61d..734194d36242 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2612,7 +2612,9 @@ static int __init sock_init(void) */ #ifdef CONFIG_NETFILTER - netfilter_init(); + err = netfilter_init(); + if (err) + goto out; #endif #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING -- cgit From de94c4591bd606729af1b913d6e98c6c449e42df Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 May 2013 22:42:37 +0000 Subject: netfilter: {ipt,ebt}_ULOG: rise warning on deprecation This target has been superseded by NFLOG. Spot a warning so we prepare removal in a couple of years. Signed-off-by: Pablo Neira Ayuso Acked-by: Gao feng --- net/bridge/netfilter/ebt_ulog.c | 6 ++++++ net/ipv4/netfilter/Kconfig | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index fc1905c51417..2ec6c19ff903 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -267,6 +267,12 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) { struct ebt_ulog_info *uloginfo = par->targinfo; + if (!par->net->xt.ebt_ulog_warn_deprecated) { + pr_info("ebt_ulog is deprecated and it will be removed soon, " + "use ebt_nflog instead\n"); + par->net->xt.ebt_ulog_warn_deprecated = true; + } + if (uloginfo->nlgroup > 31) return -EINVAL; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e7916c193932..4e9028017428 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -111,7 +111,7 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support" + tristate "ULOG target support (obsolete)" default m if NETFILTER_ADVANCED=n ---help--- diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index f8a222cb6448..c1953d07e2f4 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -325,6 +325,12 @@ static int ulog_tg_check(const struct xt_tgchk_param *par) { const struct ipt_ulog_info *loginfo = par->targinfo; + if (!par->net->xt.ulog_warn_deprecated) { + pr_info("ULOG is deprecated and it will be removed soon, " + "use NFLOG instead\n"); + par->net->xt.ulog_warn_deprecated = true; + } + if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("prefix not null-terminated\n"); return -EINVAL; -- cgit From a38e5e230e3f4e7bc9195d3e7a81567c888257ca Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 22 May 2013 14:50:32 +0900 Subject: ipvs: use cond_resched_rcu() helper when walking connections This avoids the situation where walking of a large number of connections may prevent scheduling for a long time while also avoiding excessive calls to rcu_read_unlock() and rcu_read_lock(). Note that in the case of !CONFIG_PREEMPT_RCU this will add a call to cond_resched(). Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Acked-by: Peter Zijlstra Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a083bda322b6..c8c52a98590b 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -975,8 +975,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) return cp; } } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } return NULL; @@ -1015,8 +1014,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter->l = &ip_vs_conn_tab[idx]; return cp; } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } iter->l = NULL; return NULL; @@ -1206,17 +1204,13 @@ void ip_vs_random_dropentry(struct net *net) int idx; struct ip_vs_conn *cp, *cp_c; + rcu_read_lock(); /* * Randomly scan 1/32 of the whole table every second */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned int hash = net_random() & ip_vs_conn_tab_mask; - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ @@ -1252,8 +1246,9 @@ void ip_vs_random_dropentry(struct net *net) __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); } @@ -1267,11 +1262,8 @@ static void ip_vs_conn_flush(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); flush_again: + rcu_read_lock(); for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) @@ -1286,8 +1278,9 @@ flush_again: __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ -- cgit From 5f38a11274f0e74ec0e499bc779d355510b39790 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2013 23:09:56 +0200 Subject: mac80211: assign AP_VLAN hw queues correctly A lot of code in mac80211 assumes that the hw queues are set up correctly for all interfaces (except for monitor) but this isn't true for AP_VLAN interfaces. Fix this by copying the AP master configuration when an AP VLAN is brought up, after this the AP interface can't change its configuration any more and needs to be brought down to change it, which also forces AP_VLAN interfaces down, so just copying in open() is sufficient. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 68f51c3af49f..00e2238355f0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -474,6 +474,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) master->control_port_protocol; sdata->control_port_no_encrypt = master->control_port_no_encrypt; + sdata->vif.cab_queue = master->vif.cab_queue; + memcpy(sdata->vif.hw_queue, master->vif.hw_queue, + sizeof(sdata->vif.hw_queue)); break; } case NL80211_IFTYPE_AP: -- cgit From 4c8a9d4bfaf7dbc7d2168494904d79d22cc01db7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 May 2013 01:06:09 +0200 Subject: mac80211: close AP_VLAN interfaces before unregistering all Since Eric's commit efe117ab8 ("Speedup ieee80211_remove_interfaces") there's a bug in mac80211 when it unregisters with AP_VLAN interfaces up. If the AP_VLAN interface was registered after the AP it belongs to (which is the typical case) and then we get into this code path, unregister_netdevice_many() will crash because it isn't prepared to deal with interfaces being closed in the middle of it. Exactly this happens though, because we iterate the list, find the AP master this AP_VLAN belongs to and dev_close() the dependent VLANs. After this, unregister_netdevice_many() won't pick up the fact that the AP_VLAN is already down and will do it again, causing a crash. Cc: stable@vger.kernel.org [2.6.33+] Cc: Eric Dumazet Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 00e2238355f0..ceef64426a8d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1703,6 +1703,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); + /* + * Close all AP_VLAN interfaces first, as otherwise they + * might be closed while the AP interface they belong to + * is closed, causing unregister_netdevice_many() to crash. + */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + dev_close(sdata->dev); + mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); -- cgit From 161f65ba3583b84b4714f21dbee263f99824c516 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 22 May 2013 07:49:34 +0000 Subject: bridge: Set vlan_features to allow offloads on vlans. When vlan device is configured on top of the brige, it does not support any offload capabilities because the bridge device does not initiliaze vlan_fatures. Set vlan_fatures to be equivalent to hw_fatures. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_device.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 967312803e41..75f3239130f8 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,6 +22,9 @@ #include #include "br_private.h" +#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -346,12 +349,10 @@ void br_dev_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX; + dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; + dev->vlan_features = COMMON_FEATURES; br->dev = dev; spin_lock_init(&br->lock); -- cgit From 786677d100600b7f6089bae0d3967c1b901a6141 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 May 2013 12:05:45 +0200 Subject: mac80211: add STBC flag for radiotap Some chips can tell us if received frame was encoded with STBC or not. To make this information available in user space we can use updated radiotap specification: http://www.radiotap.org/defined-fields/MCS This patch will set number of STBC encoded spatial streams (Nss). The HAVE_STBC flag should be provided by driver. Signed-off-by: Oleksij Rempel Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2c8c5236c4..7507f7cdd68c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -258,6 +258,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; if (status->flag & RX_FLAG_HT) { + unsigned int stbc; + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); *pos++ = local->hw.radiotap_mcs_details; *pos = 0; @@ -267,6 +269,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos |= IEEE80211_RADIOTAP_MCS_BW_40; if (status->flag & RX_FLAG_HT_GF) *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF; + stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT; + *pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT; pos++; *pos++ = status->rate_idx; } -- cgit From 5e4b6f5698421d94226cc2f80eae6d613c9acef8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 16 May 2013 20:11:08 +0300 Subject: cfg80211: Allow TDLS peer AID to be configured for VHT VHT uses peer AID in the PARTIAL_AID field in TDLS frames. The current design for TDLS is to first add a dummy STA entry before completing TDLS Setup and then update information on this STA entry based on what was received from the peer during the setup exchange. In theory, this could use NL80211_ATTR_STA_AID to set the peer AID just like this is used in AP mode to set the AID of an association station. However, existing cfg80211 validation rules prevent this attribute from being used with set_station operation. To avoid interoperability issues between different kernel and user space version combinations, introduce a new nl80211 attribute for the purpose of setting TDLS peer AID. This attribute can be used in both the new_station and set_station operations. It is not supposed to be allowed to change the AID value during the lifetime of the STA entry, but that validation is left for drivers to do in the change_station callback. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5f10f7acfa06..14276af7964b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -378,6 +378,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -3872,6 +3873,8 @@ static int nl80211_set_station_tdls(struct genl_info *info, struct station_parameters *params) { /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_PEER_AID]) + params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params->ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); @@ -4012,7 +4015,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) + if (!info->attrs[NL80211_ATTR_STA_AID] && + !info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -4023,7 +4027,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_STA_AID]) + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + else + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; -- cgit From d4a5a48976d12ab340ed34605b5f5049b123d868 Mon Sep 17 00:00:00 2001 From: Ashok Nagarajan Date: Mon, 13 May 2013 17:08:04 -0700 Subject: mac80211: Move mesh estab_plinks outside mesh_stats debug group As estab_plinks is not a statistics member, don't show its debug information along with other mesh stat members Signed-off-by: Ashok Nagarajan Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 14abcf44f974..f83074fe6670 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -471,6 +471,8 @@ __IEEE80211_IF_FILE_W(tsf); IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); #ifdef CONFIG_MAC80211_MESH +IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); + /* Mesh stats attributes */ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC); IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); @@ -480,7 +482,6 @@ IEEE80211_IF_FILE(dropped_frames_congestion, u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, u.mesh.mshstats.dropped_frames_no_route, DEC); -IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_FILE(dot11MeshMaxRetries, @@ -583,6 +584,7 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_mesh_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD_MODE(tsf, 0600); + DEBUGFS_ADD_MODE(estab_plinks, 0400); } static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) @@ -598,7 +600,6 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) MESHSTATS_ADD(dropped_frames_ttl); MESHSTATS_ADD(dropped_frames_no_route); MESHSTATS_ADD(dropped_frames_congestion); - MESHSTATS_ADD(estab_plinks); #undef MESHSTATS_ADD } -- cgit From b422c6cd7e93bb613030f14d7d8a0cc73f115629 Mon Sep 17 00:00:00 2001 From: Ashok Nagarajan Date: Fri, 10 May 2013 17:50:51 -0700 Subject: {cfg,mac}80211: move mandatory rates calculation to cfg80211 Move mandatory rates calculation to cfg80211, shared with non mac80211 drivers. Signed-off-by: Ashok Nagarajan [extend documentation] Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 10 +++++++--- net/mac80211/ieee80211_i.h | 3 --- net/mac80211/mesh.c | 5 +++-- net/mac80211/util.c | 26 -------------------------- net/wireless/util.c | 23 +++++++++++++++++++++++ 5 files changed, 33 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 170f9a7fa319..956ba6316da5 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -341,6 +341,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -380,8 +381,9 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); return ieee80211_ibss_finish_sta(sta, auth); } @@ -492,7 +494,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, prev_rates = sta->sta.supp_rates[band]; /* make sure mandatory rates are always added */ sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); if (sta->sta.supp_rates[band] != prev_rates) { ibss_dbg(sdata, @@ -624,6 +626,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -658,8 +661,9 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 158e6eb188d3..b7cbd4ebf0e0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1505,9 +1505,6 @@ static inline void ieee802_11_parse_elems(u8 *start, size_t len, bool action, ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band); - void ieee80211_dynamic_ps_enable_work(struct work_struct *work); void ieee80211_dynamic_ps_disable_work(struct work_struct *work); void ieee80211_dynamic_ps_timer(unsigned long data); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c13db9ad394b..c14bb816c6a3 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -741,6 +741,8 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband = + sdata->local->hw.wiphy->bands[band]; local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -758,8 +760,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; - sdata->vif.bss_conf.basic_rates = - ieee80211_mandatory_rates(local, band); + sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sband); changed |= ieee80211_mps_local_status_update(sdata); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3f87fa468b1f..707953fd8324 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1072,32 +1072,6 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, ieee80211_set_wmm_default(sdata, true); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - u32 mandatory_rates; - enum ieee80211_rate_flags mandatory_flag; - int i; - - sband = local->hw.wiphy->bands[band]; - if (WARN_ON(!sband)) - return 1; - - if (band == IEEE80211_BAND_2GHZ) - mandatory_flag = IEEE80211_RATE_MANDATORY_B; - else - mandatory_flag = IEEE80211_RATE_MANDATORY_A; - - bitrates = sband->bitrates; - mandatory_rates = 0; - for (i = 0; i < sband->n_bitrates; i++) - if (bitrates[i].flags & mandatory_flag) - mandatory_rates |= BIT(i); - return mandatory_rates; -} - void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *da, diff --git a/net/wireless/util.c b/net/wireless/util.c index b11052be09be..0962f107f57f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -33,6 +33,29 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband) +{ + struct ieee80211_rate *bitrates; + u32 mandatory_rates = 0; + enum ieee80211_rate_flags mandatory_flag; + int i; + + if (WARN_ON(!sband)) + return 1; + + if (sband->band == IEEE80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else + mandatory_flag = IEEE80211_RATE_MANDATORY_A; + + bitrates = sband->bitrates; + for (i = 0; i < sband->n_bitrates; i++) + if (bitrates[i].flags & mandatory_flag) + mandatory_rates |= BIT(i); + return mandatory_rates; +} +EXPORT_SYMBOL(ieee80211_mandatory_rates); + int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J -- cgit From 9f419f3851041e0c8170629f0639813dbfc79d5e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 21:34:22 +0200 Subject: cfg80211: move cfg80211_get_dev_from_ifindex under wext The function is only used and needed by the wext code for scanning, so move it there. Signed-off-by: Johannes Berg --- net/wireless/core.c | 21 --------------------- net/wireless/core.h | 4 ---- net/wireless/scan.c | 21 +++++++++++++++++++++ 3 files changed, 21 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 58e69d691601..cc49cf11c7a7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -90,27 +90,6 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) return &rdev->wiphy; } -struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) -{ - struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); - struct net_device *dev; - - mutex_lock(&cfg80211_mutex); - dev = dev_get_by_index(net, ifindex); - if (!dev) - goto out; - if (dev->ieee80211_ptr) { - rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else - rdev = ERR_PTR(-ENODEV); - dev_put(dev); - out: - mutex_unlock(&cfg80211_mutex); - return rdev; -} - /* requires cfg80211_mutex to be held */ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) diff --git a/net/wireless/core.h b/net/wireless/core.h index fd35dae547c4..95b29075a9c8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -164,10 +164,6 @@ int get_wiphy_idx(struct wiphy *wiphy); /* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); -/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ -extern struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex); - int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd99ea495b7e..2ce44a712f13 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1040,6 +1040,27 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) EXPORT_SYMBOL(cfg80211_unlink_bss); #ifdef CONFIG_CFG80211_WEXT +static struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) +{ + struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); + struct net_device *dev; + + mutex_lock(&cfg80211_mutex); + dev = dev_get_by_index(net, ifindex); + if (!dev) + goto out; + if (dev->ieee80211_ptr) { + rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + mutex_lock(&rdev->mtx); + } else + rdev = ERR_PTR(-ENODEV); + dev_put(dev); + out: + mutex_unlock(&cfg80211_mutex); + return rdev; +} + int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) -- cgit From 73810b77def898b43a97638478692922b7f820eb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 21:49:02 +0200 Subject: cfg80211: use atomic_t for wiphy counter There's no need to lock, we can just use an atomic_t. Signed-off-by: Johannes Berg --- net/wireless/core.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index cc49cf11c7a7..9416b8f55f50 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -289,7 +289,7 @@ static void cfg80211_event_work(struct work_struct *work) struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) { - static int wiphy_counter; + static atomic_t wiphy_counter = ATOMIC_INIT(0); struct cfg80211_registered_device *rdev; int alloc_size; @@ -311,20 +311,15 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->ops = ops; - mutex_lock(&cfg80211_mutex); - - rdev->wiphy_idx = wiphy_counter++; + rdev->wiphy_idx = atomic_inc_return(&wiphy_counter); if (unlikely(rdev->wiphy_idx < 0)) { - wiphy_counter--; - mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ + atomic_dec(&wiphy_counter); kfree(rdev); return NULL; } - mutex_unlock(&cfg80211_mutex); - /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); -- cgit From 5fe231e873729fa2f57cdc417d5c1f80871e2d7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 21:45:15 +0200 Subject: cfg80211: vastly simplify locking Virtually all code paths in cfg80211 already (need to) hold the RTNL. As such, there's little point in having another four mutexes for various parts of the code, they just cause lock ordering issues (and much of the time, the RTNL and a few of the others need thus be held.) Simplify all this by getting rid of the extra four mutexes and just use the RTNL throughout. Only a few code changes were needed to do this and we can get rid of a work struct for bonus points. Signed-off-by: Johannes Berg --- net/wireless/core.c | 166 ++++++++---------------------------------- net/wireless/core.h | 32 +------- net/wireless/debugfs.c | 4 +- net/wireless/ibss.c | 10 +-- net/wireless/mesh.c | 2 - net/wireless/mlme.c | 12 +-- net/wireless/nl80211.c | 178 +++++++++------------------------------------ net/wireless/reg.c | 36 +++------ net/wireless/scan.c | 42 +++++------ net/wireless/sme.c | 26 +------ net/wireless/util.c | 11 --- net/wireless/wext-compat.c | 22 +----- net/wireless/wext-sme.c | 18 ----- 13 files changed, 115 insertions(+), 444 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 9416b8f55f50..5fc642d4071b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -36,12 +36,10 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME); -/* RCU-protected (and cfg80211_mutex for writers) */ +/* RCU-protected (and RTNL for writers) */ LIST_HEAD(cfg80211_rdev_list); int cfg80211_rdev_list_generation; -DEFINE_MUTEX(cfg80211_mutex); - /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -53,12 +51,11 @@ module_param(cfg80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); -/* requires cfg80211_mutex to be held! */ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (rdev->wiphy_idx == wiphy_idx) { @@ -77,12 +74,11 @@ int get_wiphy_idx(struct wiphy *wiphy) return rdev->wiphy_idx; } -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) { struct cfg80211_registered_device *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); if (!rdev) @@ -90,14 +86,13 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) return &rdev->wiphy; } -/* requires cfg80211_mutex to be held */ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { struct cfg80211_registered_device *rdev2; int wiphy_idx, taken = -1, result, digits; - assert_cfg80211_lock(); + ASSERT_RTNL(); /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); @@ -195,8 +190,7 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { - lockdep_assert_held(&rdev->devlist_mtx); - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; @@ -235,8 +229,6 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); - /* read-only iteration need not hold the devlist_mtx */ - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { dev_close(wdev->netdev); @@ -245,12 +237,7 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - /* but this requires it */ - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -278,10 +265,7 @@ static void cfg80211_event_work(struct work_struct *work) event_work); rtnl_lock(); - cfg80211_lock_rdev(rdev); - cfg80211_process_rdev_events(rdev); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -323,9 +307,6 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); - mutex_init(&rdev->mtx); - mutex_init(&rdev->devlist_mtx); - mutex_init(&rdev->sched_scan_mtx); INIT_LIST_HEAD(&rdev->wdev_list); INIT_LIST_HEAD(&rdev->beacon_registrations); spin_lock_init(&rdev->beacon_registrations_lock); @@ -573,11 +554,11 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - mutex_lock(&cfg80211_mutex); + rtnl_lock(); res = device_add(&rdev->wiphy.dev); if (res) { - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return res; } @@ -606,25 +587,18 @@ int wiphy_register(struct wiphy *wiphy) } cfg80211_debugfs_rdev_add(rdev); - mutex_unlock(&cfg80211_mutex); - /* - * due to a locking dependency this has to be outside of the - * cfg80211_mutex lock - */ res = rfkill_register(rdev->rfkill); if (res) { device_del(&rdev->wiphy.dev); - mutex_lock(&cfg80211_mutex); debugfs_remove_recursive(rdev->wiphy.debugfsdir); list_del_rcu(&rdev->list); wiphy_regulatory_deregister(wiphy); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return res; } - rtnl_lock(); rdev->wiphy.registered = true; rtnl_unlock(); return 0; @@ -654,25 +628,19 @@ void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - rtnl_lock(); - rdev->wiphy.registered = false; - rtnl_unlock(); - - rfkill_unregister(rdev->rfkill); - - /* protect the device list */ - mutex_lock(&cfg80211_mutex); - wait_event(rdev->dev_wait, ({ int __count; - mutex_lock(&rdev->devlist_mtx); + rtnl_lock(); __count = rdev->opencount; - mutex_unlock(&rdev->devlist_mtx); + rtnl_unlock(); __count == 0; })); - mutex_lock(&rdev->devlist_mtx); + rtnl_lock(); + rdev->wiphy.registered = false; + + rfkill_unregister(rdev->rfkill); + BUG_ON(!list_empty(&rdev->wdev_list)); - mutex_unlock(&rdev->devlist_mtx); /* * First remove the hardware from everywhere, this makes @@ -682,20 +650,6 @@ void wiphy_unregister(struct wiphy *wiphy) list_del_rcu(&rdev->list); synchronize_rcu(); - /* - * Try to grab rdev->mtx. If a command is still in progress, - * hopefully the driver will refuse it since it's tearing - * down the device already. We wait for this command to complete - * before unlinking the item from the list. - * Note: as codified by the BUG_ON above we cannot get here if - * a virtual interface is still present. Hence, we can only get - * to lock contention here if userspace issues a command that - * identified the hardware by wiphy index. - */ - cfg80211_lock_rdev(rdev); - /* nothing */ - cfg80211_unlock_rdev(rdev); - /* * If this device got a regulatory hint tell core its * free to listen now to a new shiny device regulatory hint @@ -705,7 +659,7 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); @@ -723,9 +677,6 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; rfkill_destroy(rdev->rfkill); - mutex_destroy(&rdev->mtx); - mutex_destroy(&rdev->devlist_mtx); - mutex_destroy(&rdev->sched_scan_mtx); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); kfree(reg); @@ -750,36 +701,6 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); -static void wdev_cleanup_work(struct work_struct *work) -{ - struct wireless_dev *wdev; - struct cfg80211_registered_device *rdev; - - wdev = container_of(work, struct wireless_dev, cleanup_work); - rdev = wiphy_to_dev(wdev->wiphy); - - mutex_lock(&rdev->sched_scan_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } - - if (WARN_ON(rdev->sched_scan_req && - rdev->sched_scan_req->dev == wdev->netdev)) { - __cfg80211_stop_sched_scan(rdev, false); - } - - mutex_unlock(&rdev->sched_scan_mtx); - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - wake_up(&rdev->dev_wait); - - dev_put(wdev->netdev); -} - void cfg80211_unregister_wdev(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -789,8 +710,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) if (WARN_ON(wdev->netdev)) return; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -802,8 +721,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) WARN_ON_ONCE(1); break; } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -822,7 +739,7 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, } void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; @@ -832,9 +749,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - mutex_lock(&rdev->sched_scan_mtx); __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT @@ -887,13 +802,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * are added with nl80211. */ mutex_init(&wdev->mtx); - INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; @@ -906,7 +819,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } wdev->netdev = dev; wdev->sme_state = CFG80211_SME_IDLE; - mutex_unlock(&rdev->devlist_mtx); #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; @@ -932,26 +844,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - dev_hold(dev); - queue_work(cfg80211_wq, &wdev->cleanup_work); + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, true); + } + + if (WARN_ON(rdev->sched_scan_req && + rdev->sched_scan_req->dev == wdev->netdev)) { + __cfg80211_stop_sched_scan(rdev, false); + } + + rdev->opencount--; + wake_up(&rdev->dev_wait); break; case NETDEV_UP: - /* - * If we have a really quick DOWN/UP succession we may - * have this work still pending ... cancel it and see - * if it was pending, in which case we need to account - * for some of the work it would have done. - */ - if (cancel_work_sync(&wdev->cleanup_work)) { - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - dev_put(dev); - } cfg80211_update_iface_num(rdev, wdev->iftype, 1); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -983,10 +891,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); /* * Configure power management to the driver here so that its @@ -1002,12 +907,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } break; case NETDEV_UNREGISTER: - /* - * NB: cannot take rdev->mtx here because this may be - * called within code protected by it when interfaces - * are removed with nl80211. - */ - mutex_lock(&rdev->devlist_mtx); /* * It is possible to get NETDEV_UNREGISTER * multiple times. To detect that, check @@ -1024,7 +923,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, kfree(wdev->wext.keys); #endif } - mutex_unlock(&rdev->devlist_mtx); /* * synchronise (so that we won't find this netdev * from other code any more) and then clear the list @@ -1044,9 +942,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, return notifier_from_errno(-EOPNOTSUPP); if (rfkill_blocked(rdev->rfkill)) return notifier_from_errno(-ERFKILL); - mutex_lock(&rdev->devlist_mtx); ret = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (ret) return notifier_from_errno(ret); break; @@ -1064,12 +960,10 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) struct cfg80211_registered_device *rdev; rtnl_lock(); - mutex_lock(&cfg80211_mutex); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (net_eq(wiphy_net(&rdev->wiphy), net)) WARN_ON(cfg80211_switch_netns(rdev, &init_net)); } - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 95b29075a9c8..d21a0fc01401 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -5,7 +5,6 @@ */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H -#include #include #include #include @@ -23,11 +22,6 @@ struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; - /* we hold this mutex during any call so that - * we cannot do multiple calls at once, and also - * to avoid the deregister call to proceed while - * any call is in progress */ - struct mutex mtx; /* rfkill support */ struct rfkill_ops rfkill_ops; @@ -49,9 +43,7 @@ struct cfg80211_registered_device { /* wiphy index, internal only */ int wiphy_idx; - /* associated wireless interfaces */ - struct mutex devlist_mtx; - /* protected by devlist_mtx or RCU */ + /* associated wireless interfaces, protected by rtnl or RCU */ struct list_head wdev_list; int devlist_generation, wdev_id; int opencount; /* also protected by devlist_mtx */ @@ -75,8 +67,6 @@ struct cfg80211_registered_device { struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; - struct mutex sched_scan_mtx; - #ifdef CONFIG_NL80211_TESTMODE struct genl_info *testmode_info; #endif @@ -120,15 +110,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) } extern struct workqueue_struct *cfg80211_wq; -extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; -static inline void assert_cfg80211_lock(void) -{ - lockdep_assert_held(&cfg80211_mutex); -} - struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; @@ -161,23 +145,11 @@ static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss) struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx); int get_wiphy_idx(struct wiphy *wiphy); -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); -static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *rdev) -{ - mutex_lock(&rdev->mtx); -} - -static inline void cfg80211_unlock_rdev(struct cfg80211_registered_device *rdev) -{ - BUG_ON(IS_ERR(rdev) || !rdev); - mutex_unlock(&rdev->mtx); -} - static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) { @@ -192,7 +164,7 @@ static inline void wdev_unlock(struct wireless_dev *wdev) mutex_unlock(&wdev->mtx); } -#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx) +#define ASSERT_RDEV_LOCK(rdev) ASSERT_RTNL() #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 920cabe0461b..90d050036624 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -74,7 +74,7 @@ static ssize_t ht40allow_map_read(struct file *file, if (!buf) return -ENOMEM; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { sband = wiphy->bands[band]; @@ -85,7 +85,7 @@ static ssize_t ht40allow_map_read(struct file *file, buf, buf_size, offset); } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); r = simple_read_from_buffer(user_buf, count, ppos, buf, offset); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d80e47194d49..5449c5a6de84 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -152,11 +152,11 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(wdev); err = __cfg80211_join_ibss(rdev, dev, params, connkeys); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -359,11 +359,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev->wext.ibss.channel_fixed = false; } - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -429,11 +427,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -512,11 +508,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } else wdev->wext.ibss.bssid = NULL; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 9546ad210550..5dfb289ab761 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -186,11 +186,9 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = __cfg80211_join_mesh(rdev, dev, setup, conf); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c21e32f9549c..68b40f21bc38 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -313,14 +313,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, { int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(dev->ieee80211_ptr); err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, ssid, ssid_len, ie, ie_len, key, key_len, key_idx, sae_data, sae_data_len); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -424,12 +424,12 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(wdev); err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, ssid_len, req); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -844,7 +844,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) dfs_update_channels_wk); wiphy = &rdev->wiphy; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { sband = wiphy->bands[bandid]; if (!sband) @@ -877,7 +877,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) check_again = true; } } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); /* reschedule if there are other channels waiting to be cleared again */ if (check_again) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5bcf3a5b6465..74cdb1a0cf31 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -59,7 +59,7 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) int wiphy_idx = -1; int ifidx = -1; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!have_ifidx && !have_wdev_id) return ERR_PTR(-EINVAL); @@ -80,7 +80,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) continue; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { @@ -92,7 +91,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) break; } } - mutex_unlock(&rdev->devlist_mtx); if (result) break; @@ -109,7 +107,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) struct cfg80211_registered_device *rdev = NULL, *tmp; struct net_device *netdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!attrs[NL80211_ATTR_WIPHY] && !attrs[NL80211_ATTR_IFINDEX] && @@ -128,14 +126,12 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); if (tmp) { /* make sure wdev exists */ - mutex_lock(&tmp->devlist_mtx); list_for_each_entry(wdev, &tmp->wdev_list, list) { if (wdev->identifier != (u32)wdev_id) continue; found = true; break; } - mutex_unlock(&tmp->devlist_mtx); if (!found) tmp = NULL; @@ -182,19 +178,6 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. - * If successful, it returns non-NULL and also locks - * the driver's mutex! - * - * This means that you need to call cfg80211_unlock_rdev() - * before being allowed to acquire &cfg80211_mutex! - * - * This is necessary because we need to lock the global - * mutex to get an item off the list safely, and then - * we lock the rdev mutex so it doesn't go away under us. - * - * We don't want to keep cfg80211_mutex locked - * for all the time in order to allow requests on - * other interfaces to go through at the same time. * * The result of this can be a PTR_ERR and hence must * be checked with IS_ERR() for errors. @@ -202,20 +185,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) static struct cfg80211_registered_device * cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) { - struct cfg80211_registered_device *rdev; - - mutex_lock(&cfg80211_mutex); - rdev = __cfg80211_rdev_from_attrs(netns, info->attrs); - - /* if it is not an error we grab the lock on - * it to assure it won't be going away while - * we operate on it */ - if (!IS_ERR(rdev)) - mutex_lock(&rdev->mtx); - - mutex_unlock(&cfg80211_mutex); - - return rdev; + return __cfg80211_rdev_from_attrs(netns, info->attrs); } /* policy for the attributes */ @@ -456,7 +426,6 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, int err; rtnl_lock(); - mutex_lock(&cfg80211_mutex); if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, @@ -485,14 +454,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, *rdev = wiphy_to_dev(wiphy); *wdev = NULL; - mutex_lock(&(*rdev)->devlist_mtx); list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { if (tmp->identifier == cb->args[1]) { *wdev = tmp; break; } } - mutex_unlock(&(*rdev)->devlist_mtx); if (!*wdev) { err = -ENODEV; @@ -500,19 +467,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - cfg80211_lock_rdev(*rdev); - - mutex_unlock(&cfg80211_mutex); return 0; out_unlock: - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); return err; } static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) { - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -1568,7 +1530,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) struct nlattr **tb = nl80211_fam.attrbuf; int res; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, nl80211_fam.maxattr, nl80211_policy); if (res == 0) { @@ -1582,10 +1544,8 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) { - mutex_unlock(&cfg80211_mutex); + if (!netdev) return -ENODEV; - } if (netdev->ieee80211_ptr) { dev = wiphy_to_dev( netdev->ieee80211_ptr->wiphy); @@ -1629,7 +1589,6 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) !skb->len && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; - mutex_unlock(&cfg80211_mutex); return 1; } idx--; @@ -1638,7 +1597,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) } while (cb->args[1] > 0); break; } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); cb->args[0] = idx; @@ -1793,7 +1752,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, if (result) return result; - mutex_lock(&rdev->devlist_mtx); switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -1817,7 +1775,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, default: result = -EINVAL; } - mutex_unlock(&rdev->devlist_mtx); return result; } @@ -1866,6 +1823,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 frag_threshold = 0, rts_threshold = 0; u8 coverage_class = 0; + ASSERT_RTNL(); + /* * Try to find the wiphy and netdev. Normally this * function shouldn't need the netdev, but this is @@ -1875,31 +1834,25 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) * also passed a netdev to set_wiphy, so that it is * possible to let that go to the right netdev! */ - mutex_lock(&cfg80211_mutex); if (info->attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(genl_info_net(info), ifindex); - if (netdev && netdev->ieee80211_ptr) { + if (netdev && netdev->ieee80211_ptr) rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else + else netdev = NULL; } if (!netdev) { rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); + if (IS_ERR(rdev)) return PTR_ERR(rdev); - } wdev = NULL; netdev = NULL; result = 0; - - mutex_lock(&rdev->mtx); } else wdev = netdev->ieee80211_ptr; @@ -1912,8 +1865,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); - mutex_unlock(&cfg80211_mutex); - if (result) goto bad_res; @@ -2120,7 +2071,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } bad_res: - mutex_unlock(&rdev->mtx); if (netdev) dev_put(netdev); return result; @@ -2218,7 +2168,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; @@ -2228,7 +2178,6 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * } if_idx = 0; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (if_idx < if_start) { if_idx++; @@ -2237,17 +2186,15 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { - mutex_unlock(&rdev->devlist_mtx); goto out; } if_idx++; } - mutex_unlock(&rdev->devlist_mtx); wp_idx++; } out: - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); cb->args[0] = wp_idx; cb->args[1] = if_idx; @@ -2480,11 +2427,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -2993,8 +2938,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; bool ret = false; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) @@ -3008,8 +2951,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, break; } - mutex_unlock(&rdev->devlist_mtx); - return ret; } @@ -3171,13 +3112,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.radar_required = true; } - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, params.chandef.chan, CHAN_MODE_SHARED, radar_detect_width); - mutex_unlock(&rdev->devlist_mtx); - if (err) return err; @@ -4914,18 +4852,13 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) void *hdr = NULL; struct nlattr *nl_reg_rules; unsigned int i; - int err = -EINVAL; - - mutex_lock(&cfg80211_mutex); if (!cfg80211_regdomain) - goto out; + return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - err = -ENOBUFS; - goto out; - } + if (!msg) + return -ENOBUFS; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); @@ -4984,8 +4917,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_nest_end(msg, nl_reg_rules); genlmsg_end(msg, hdr); - err = genlmsg_reply(msg, info); - goto out; + return genlmsg_reply(msg, info); nla_put_failure_rcu: rcu_read_unlock(); @@ -4993,10 +4925,7 @@ nla_put_failure: genlmsg_cancel(msg, hdr); put_failure: nlmsg_free(msg); - err = -EMSGSIZE; -out: - mutex_unlock(&cfg80211_mutex); - return err; + return -EMSGSIZE; } static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) @@ -5062,12 +4991,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - mutex_lock(&cfg80211_mutex); - r = set_regdom(rd); /* set_regdom took ownership */ rd = NULL; - mutex_unlock(&cfg80211_mutex); bad_reg: kfree(rd); @@ -5117,7 +5043,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto unlock; @@ -5303,7 +5228,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } unlock: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5375,8 +5299,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (ie_len > wiphy->max_sched_scan_ie_len) return -EINVAL; - mutex_lock(&rdev->sched_scan_mtx); - if (rdev->sched_scan_req) { err = -EINPROGRESS; goto out; @@ -5544,7 +5466,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, out_free: kfree(request); out: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5552,17 +5473,12 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); - err = __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); - - return err; + return __cfg80211_stop_sched_scan(rdev, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, @@ -5594,12 +5510,11 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, chandef.chan, CHAN_MODE_SHARED, BIT(chandef.width)); if (err) - goto err_locked; + return err; err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); if (!err) { @@ -5607,9 +5522,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->cac_started = true; wdev->cac_start_time = jiffies; } -err_locked: - mutex_unlock(&rdev->devlist_mtx); - return err; } @@ -6472,6 +6384,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, void *data = NULL; int data_len = 0; + rtnl_lock(); + if (cb->args[0]) { /* * 0 is a valid index, but not valid for args[0], @@ -6483,18 +6397,16 @@ static int nl80211_testmode_dump(struct sk_buff *skb, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - return err; + goto out_err; - mutex_lock(&cfg80211_mutex); rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); - return PTR_ERR(rdev); + err = PTR_ERR(rdev); + goto out_err; } phy_idx = rdev->wiphy_idx; rdev = NULL; - mutex_unlock(&cfg80211_mutex); if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = @@ -6506,14 +6418,11 @@ static int nl80211_testmode_dump(struct sk_buff *skb, data_len = nla_len((void *)cb->args[1]); } - mutex_lock(&cfg80211_mutex); rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); if (!rdev) { - mutex_unlock(&cfg80211_mutex); - return -ENOENT; + err = -ENOENT; + goto out_err; } - cfg80211_lock_rdev(rdev); - mutex_unlock(&cfg80211_mutex); if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; @@ -6554,7 +6463,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, /* see above */ cb->args[0] = phy_idx + 1; out_err: - cfg80211_unlock_rdev(rdev); + rtnl_unlock(); return err; } @@ -8189,9 +8098,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->p2p_started) return 0; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -8200,9 +8107,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) return err; wdev->p2p_started = true; - mutex_lock(&rdev->devlist_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8218,11 +8123,7 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8365,11 +8266,11 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV || ops->internal_flags & NL80211_FLAG_NEED_WDEV) { - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return PTR_ERR(wdev); @@ -8380,7 +8281,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -EINVAL; @@ -8394,7 +8294,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (dev) { if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !netif_running(dev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -8403,17 +8302,12 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { if (!wdev->p2p_started) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; } } - cfg80211_lock_rdev(rdev); - - mutex_unlock(&cfg80211_mutex); - info->user_ptr[0] = rdev; } @@ -8423,8 +8317,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (info->user_ptr[0]) - cfg80211_unlock_rdev(info->user_ptr[0]); if (info->user_ptr[1]) { if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; @@ -8446,7 +8338,8 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_wiphy, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WIPHY, @@ -8461,7 +8354,8 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_interface, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WDEV, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_INTERFACE, @@ -8620,6 +8514,7 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_REG, .doit = nl80211_get_reg, .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, { @@ -8627,6 +8522,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_set_reg, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_REQ_SET_REG, @@ -9082,8 +8978,6 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - lockdep_assert_held(&rdev->sched_scan_mtx); - if (WARN_ON(!req)) return 0; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index cc35fbaa4578..e76559618588 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -377,7 +377,7 @@ static void reg_regdb_search(struct work_struct *work) const struct ieee80211_regdomain *curdom, *regdom = NULL; int i; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -402,7 +402,7 @@ static void reg_regdb_search(struct work_struct *work) if (!IS_ERR_OR_NULL(regdom)) set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); @@ -1225,7 +1225,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; @@ -1570,21 +1570,19 @@ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request, *lr; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); lr = get_last_request(); /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); - goto out; + return; } spin_lock(®_requests_lock); if (list_empty(®_requests_list)) { spin_unlock(®_requests_lock); - goto out; + return; } reg_request = list_first_entry(®_requests_list, @@ -1595,10 +1593,6 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); reg_process_hint(reg_request, reg_request->initiator); - -out: - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1607,9 +1601,6 @@ static void reg_process_pending_beacon_hints(void) struct cfg80211_registered_device *rdev; struct reg_beacon *pending_beacon, *tmp; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - /* This goes through the _pending_ beacon list */ spin_lock_bh(®_pending_beacons_lock); @@ -1626,14 +1617,16 @@ static void reg_process_pending_beacon_hints(void) } spin_unlock_bh(®_pending_beacons_lock); - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } static void reg_todo(struct work_struct *work) { + rtnl_lock(); + mutex_lock(®_mutex); reg_process_pending_hints(); reg_process_pending_beacon_hints(); + mutex_unlock(®_mutex); + rtnl_unlock(); } static void queue_regulatory_request(struct regulatory_request *request) @@ -1717,10 +1710,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -/* - * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and - * therefore cannot iterate over the rdev list here. - */ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, const u8 *country_ie, u8 country_ie_len) { @@ -1752,7 +1741,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, /* * We will run this only upon a successful connection on cfg80211. * We leave conflict resolution to the workqueue, where can hold - * cfg80211_mutex. + * the RTNL. */ if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && lr->wiphy_idx != WIPHY_IDX_INVALID) @@ -1858,7 +1847,8 @@ static void restore_regulatory_settings(bool reset_user) LIST_HEAD(tmp_reg_req_list); struct cfg80211_registered_device *rdev; - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + mutex_lock(®_mutex); reset_regdomains(true, &world_regdom); @@ -1915,7 +1905,6 @@ static void restore_regulatory_settings(bool reset_user) spin_unlock(®_requests_lock); mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); REG_DBG_PRINT("Kicking the queue\n"); @@ -2297,7 +2286,6 @@ void wiphy_regulatory_register(struct wiphy *wiphy) mutex_unlock(®_mutex); } -/* Caller must hold cfg80211_mutex */ void wiphy_regulatory_deregister(struct wiphy *wiphy) { struct wiphy *request_wiphy = NULL; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2ce44a712f13..dd01b58fa78c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); request = rdev->scan_req; @@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); ___cfg80211_scan_done(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -241,6 +241,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); request->aborted = aborted; + request->notified = true; queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -255,7 +256,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) request = rdev->sched_scan_req; - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); /* we don't have sched_scan_req anymore if the scan is stopping */ if (request) { @@ -270,7 +271,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) nl80211_send_sched_scan_results(rdev, request->dev); } - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_sched_scan_results(struct wiphy *wiphy) @@ -289,9 +290,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy) trace_cfg80211_sched_scan_stopped(wiphy); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -300,7 +301,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, { struct net_device *dev; - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (!rdev->sched_scan_req) return -ENOENT; @@ -1043,21 +1044,19 @@ EXPORT_SYMBOL(cfg80211_unlink_bss); static struct cfg80211_registered_device * cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) { - struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); + struct cfg80211_registered_device *rdev; struct net_device *dev; - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + dev = dev_get_by_index(net, ifindex); if (!dev) - goto out; - if (dev->ieee80211_ptr) { + return ERR_PTR(-ENODEV); + if (dev->ieee80211_ptr) rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else + else rdev = ERR_PTR(-ENODEV); dev_put(dev); - out: - mutex_unlock(&cfg80211_mutex); return rdev; } @@ -1083,7 +1082,6 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1190,9 +1188,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: - mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); - cfg80211_unlock_rdev(rdev); return err; } EXPORT_SYMBOL_GPL(cfg80211_wext_siwscan); @@ -1491,10 +1487,8 @@ int cfg80211_wext_giwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req) { - res = -EAGAIN; - goto out; - } + if (rdev->scan_req) + return -EAGAIN; res = ieee80211_scan_results(rdev, info, extra, data->length); data->length = 0; @@ -1503,8 +1497,6 @@ int cfg80211_wext_giwscan(struct net_device *dev, res = 0; } - out: - cfg80211_unlock_rdev(rdev); return res; } EXPORT_SYMBOL_GPL(cfg80211_wext_giwscan); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 3ed35c345cae..4dbf31407a56 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -43,35 +43,29 @@ static bool cfg80211_is_all_idle(void) struct wireless_dev *wdev; bool is_all_idle = true; - mutex_lock(&cfg80211_mutex); - /* * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - cfg80211_lock_rdev(rdev); list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) is_all_idle = false; wdev_unlock(wdev); } - cfg80211_unlock_rdev(rdev); } - mutex_unlock(&cfg80211_mutex); - return is_all_idle; } static void disconnect_work(struct work_struct *work) { - if (!cfg80211_is_all_idle()) - return; - - regulatory_hint_disconnect(); + rtnl_lock(); + if (cfg80211_is_all_idle()) + regulatory_hint_disconnect(); + rtnl_unlock(); } static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); @@ -85,7 +79,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); - lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -226,9 +219,6 @@ void cfg80211_conn_work(struct work_struct *work) u8 bssid_buf[ETH_ALEN], *bssid = NULL; rtnl_lock(); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->netdev) @@ -256,9 +246,6 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -931,14 +918,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, { int err; - mutex_lock(&rdev->devlist_mtx); - /* might request scan - scan_mtx -> wdev_mtx dependency */ - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 0962f107f57f..501724257af5 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -808,12 +808,8 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) cfg80211_process_wdev_events(wdev); - - mutex_unlock(&rdev->devlist_mtx); } int cfg80211_change_iface(struct cfg80211_registered_device *rdev, @@ -845,10 +841,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; if (ntype != otype && netif_running(dev)) { - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, ntype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -1210,8 +1204,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, if (!beacon_int) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->beacon_interval) continue; @@ -1221,8 +1213,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, } } - mutex_unlock(&rdev->devlist_mtx); - return res; } @@ -1246,7 +1236,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, int i, j; ASSERT_RTNL(); - lockdep_assert_held(&rdev->devlist_mtx); if (WARN_ON(hweight32(radar_detect) > 1)) return -EINVAL; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index d997d0f0c54a..e7c6e862580d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -72,7 +72,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; - int ret; rdev = wiphy_to_dev(wdev->wiphy); @@ -98,11 +97,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - cfg80211_lock_rdev(rdev); - ret = cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); - cfg80211_unlock_rdev(rdev); - - return ret; + return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); } EXPORT_SYMBOL_GPL(cfg80211_wext_siwmode); @@ -579,13 +574,10 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, { int err; - /* devlist mutex needed for possible IBSS re-join */ - mutex_lock(&rdev->devlist_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, remove, tx_key, idx, params); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -787,7 +779,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; - int freq, err; + int freq; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -804,10 +796,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_monitor_channel(rdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_monitor_channel(rdev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); if (freq < 0) @@ -818,10 +807,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); default: return -EOPNOTSUPP; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index e79cb5c0655a..aeefd6817189 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -87,9 +87,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, return -EINVAL; } - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -136,9 +133,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -190,9 +184,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; @@ -226,9 +217,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -287,9 +275,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -318,9 +303,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } -- cgit From 8d61ffa5e01c5f676431d12caba17db164a48a86 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 12:32:47 +0200 Subject: cfg80211/mac80211: use cfg80211 wdev mutex in mac80211 Using separate locks in cfg80211 and mac80211 has always caused issues, for example having to unlock in places in mac80211 to call cfg80211, which even needed a framework to make cfg80211 calls after some functions returned etc. Additionally, I suspect some issues people have reported with the cfg80211 state getting confused could be due to such issues, when cfg80211 is asking mac80211 to change state but mac80211 is in the process of telling cfg80211 that the state changed (in another way.) Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 +- net/mac80211/debugfs_netdev.c | 10 +- net/mac80211/ht.c | 4 +- net/mac80211/ibss.c | 39 ++--- net/mac80211/ieee80211_i.h | 25 +++- net/mac80211/main.c | 4 +- net/mac80211/mesh.c | 32 ++-- net/mac80211/mesh_plink.c | 7 +- net/mac80211/mlme.c | 341 +++++++++++++++--------------------------- net/mac80211/util.c | 4 +- net/wireless/mlme.c | 48 +----- net/wireless/trace.h | 4 +- 12 files changed, 197 insertions(+), 325 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index eb4219051043..232edf78d5a9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2318,7 +2318,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode old_req; int err; - lockdep_assert_held(&sdata->u.mgd.mtx); + lockdep_assert_held(&sdata->wdev.mtx); old_req = sdata->u.mgd.req_smps; sdata->u.mgd.req_smps = smps_mode; @@ -2375,9 +2375,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ - mutex_lock(&sdata->u.mgd.mtx); __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); - mutex_unlock(&sdata->u.mgd.mtx); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index f83074fe6670..cafe614ef93d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -228,9 +228,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); err = __ieee80211_request_smps(sdata, smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); return err; } @@ -313,16 +313,16 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); if (!sdata->u.mgd.associated) { - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); dev_kfree_skb(skb); return -ENOTCONN; } memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, addr, ETH_ALEN); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; default: dev_kfree_skb(skb); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index af8cee06e4f3..75dff338f581 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -429,9 +429,9 @@ void ieee80211_request_smps_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.request_smps_work); - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); } void ieee80211_request_smps(struct ieee80211_vif *vif, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 956ba6316da5..caa4b4f7f6e4 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -54,7 +54,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; int frame_len; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local, sdata); @@ -74,7 +74,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); @@ -263,7 +263,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *ies; u64 tsf; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (beacon_int < 10) beacon_int = 10; @@ -410,7 +410,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) return; @@ -677,7 +677,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); rcu_read_lock(); @@ -703,7 +703,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -734,7 +734,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); @@ -777,7 +777,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) int active_ibss; u16 capability; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); active_ibss = ieee80211_sta_active_ibss(sdata); ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); @@ -847,10 +847,10 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; u8 *pos, *end; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || len < 24 + 2 || !presp) @@ -934,7 +934,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&sdata->u.ibss.mtx); + sdata_lock(sdata); if (!sdata->u.ibss.ssid_len) goto mgmt_out; /* not ready to merge yet */ @@ -957,7 +957,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } mgmt_out: - mutex_unlock(&sdata->u.ibss.mtx); + sdata_unlock(sdata); } void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) @@ -965,7 +965,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct sta_info *sta; - mutex_lock(&ifibss->mtx); + sdata_lock(sdata); /* * Work could be scheduled after scan or similar @@ -1001,7 +1001,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) } out: - mutex_unlock(&ifibss->mtx); + sdata_unlock(sdata); } static void ieee80211_ibss_timer(unsigned long data) @@ -1018,7 +1018,6 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) setup_timer(&ifibss->timer, ieee80211_ibss_timer, (unsigned long) sdata); - mutex_init(&ifibss->mtx); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); } @@ -1045,8 +1044,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, { u32 changed = 0; - mutex_lock(&sdata->u.ibss.mtx); - if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); sdata->u.ibss.fixed_bssid = true; @@ -1079,8 +1076,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); sdata->u.ibss.ssid_len = params->ssid_len; - mutex_unlock(&sdata->u.ibss.mtx); - /* * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is * reserved, but an HT STA shall protect HT transmissions as though @@ -1116,8 +1111,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) struct sta_info *sta; struct beacon_data *presp; - mutex_lock(&sdata->u.ibss.mtx); - active_ibss = ieee80211_sta_active_ibss(sdata); if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { @@ -1161,7 +1154,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) /* remove beacon */ kfree(sdata->u.ibss.ie); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&sdata->u.ibss.mtx)); + lockdep_is_held(&sdata->wdev.mtx)); RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; @@ -1177,7 +1170,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.ibss.timer); - mutex_unlock(&sdata->u.ibss.mtx); - return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ba3cd284d104..9eed6f1d1614 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -394,7 +394,6 @@ struct ieee80211_if_managed { bool nullfunc_failed; bool connection_loss; - struct mutex mtx; struct cfg80211_bss *associated; struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; @@ -488,8 +487,6 @@ struct ieee80211_if_managed { struct ieee80211_if_ibss { struct timer_list timer; - struct mutex mtx; - unsigned long last_scan_completed; u32 basic_rates; @@ -580,8 +577,6 @@ struct ieee80211_if_mesh { bool accepting_plinks; int num_gates; struct beacon_data __rcu *beacon; - /* just protects beacon updates for now */ - struct mutex mtx; const u8 *ie; u8 ie_len; enum { @@ -778,6 +773,26 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } +static inline void sdata_lock(struct ieee80211_sub_if_data *sdata) + __acquires(&sdata->wdev.mtx) +{ + mutex_lock(&sdata->wdev.mtx); + __acquire(&sdata->wdev.mtx); +} + +static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) + __releases(&sdata->wdev.mtx) +{ + mutex_unlock(&sdata->wdev.mtx); + __release(&sdata->wdev.mtx); +} + +static inline void +sdata_assert_lock(struct ieee80211_sub_if_data *sdata) +{ + lockdep_assert_held(&sdata->wdev.mtx); +} + static inline enum ieee80211_band ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8a7bfc47d577..1998f1475267 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -331,7 +331,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, return NOTIFY_DONE; ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); /* Copy the addresses to the bss_conf list */ ifa = idev->ifa_list; @@ -349,7 +349,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return NOTIFY_DONE; } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c14bb816c6a3..b3d1fdd46368 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,8 +161,11 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->plink_timer); } - if (changed) + if (changed) { + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); + } } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -577,7 +580,9 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + @@ -697,25 +702,21 @@ out_free: } static int -ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) +ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) { struct beacon_data *old_bcn; int ret; - mutex_lock(&ifmsh->mtx); - - old_bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); - ret = ieee80211_mesh_build_beacon(ifmsh); + old_bcn = rcu_dereference_protected(sdata->u.mesh.beacon, + lockdep_is_held(&sdata->wdev.mtx)); + ret = ieee80211_mesh_build_beacon(&sdata->u.mesh); if (ret) /* just reuse old beacon */ - goto out; + return ret; if (old_bcn) kfree_rcu(old_bcn, rcu_head); -out: - mutex_unlock(&ifmsh->mtx); - return ret; + return 0; } void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, @@ -726,7 +727,7 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT))) - if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh)) + if (ieee80211_mesh_rebuild_beacon(sdata)) return; ieee80211_bss_info_change_notify(sdata, changed); } @@ -788,12 +789,12 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - mutex_lock(&ifmsh->mtx); + sdata_lock(sdata); bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - mutex_unlock(&ifmsh->mtx); + sdata_unlock(sdata); /* flush STAs and mpaths on this iface */ sta_info_flush(sdata); @@ -1041,7 +1042,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); RCU_INIT_POINTER(ifmsh->beacon, NULL); - mutex_init(&ifmsh->mtx); sdata->vif.bss_conf.bssid = zero_addr; } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 09bebed99416..6c4da99bc4fb 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -517,7 +517,9 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); } static void mesh_plink_timer(unsigned long data) @@ -1068,6 +1070,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - if (changed) + if (changed) { + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); + } } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1da3d6be8e11..f44f4caa69ee 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -90,41 +90,6 @@ MODULE_PARM_DESC(probe_wait_ms, */ #define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 -/* - * All cfg80211 functions have to be called outside a locked - * section so that they can acquire a lock themselves... This - * is much simpler than queuing up things in cfg80211, but we - * do need some indirection for that here. - */ -enum rx_mgmt_action { - /* no action required */ - RX_MGMT_NONE, - - /* caller must call cfg80211_send_deauth() */ - RX_MGMT_CFG80211_DEAUTH, - - /* caller must call cfg80211_send_disassoc() */ - RX_MGMT_CFG80211_DISASSOC, - - /* caller must call cfg80211_send_rx_auth() */ - RX_MGMT_CFG80211_RX_AUTH, - - /* caller must call cfg80211_send_rx_assoc() */ - RX_MGMT_CFG80211_RX_ASSOC, - - /* caller must call cfg80211_send_assoc_timeout() */ - RX_MGMT_CFG80211_ASSOC_TIMEOUT, - - /* used when a processed beacon causes a deauth */ - RX_MGMT_CFG80211_TX_DEAUTH, -}; - -/* utils */ -static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) -{ - lockdep_assert_held(&ifmgd->mtx); -} - /* * We can have multiple work items (and connection probing) * scheduling this timer, but we need to take care to only @@ -135,13 +100,14 @@ static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) * has happened -- the work that runs from this timer will * do that. */ -static void run_again(struct ieee80211_if_managed *ifmgd, unsigned long timeout) +static void run_again(struct ieee80211_sub_if_data *sdata, + unsigned long timeout) { - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); - if (!timer_pending(&ifmgd->timer) || - time_before(timeout, ifmgd->timer.expires)) - mod_timer(&ifmgd->timer, timeout); + if (!timer_pending(&sdata->u.mgd.timer) || + time_before(timeout, sdata->u.mgd.timer.expires)) + mod_timer(&sdata->u.mgd.timer, timeout); } void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) @@ -652,7 +618,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_channel *chan; u32 rates = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); @@ -962,7 +928,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -985,7 +951,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) IEEE80211_QUEUE_STOP_REASON_CSA); out: ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) @@ -1036,7 +1002,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!cbss) return; @@ -1845,7 +1811,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; u32 changed = 0; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(tx && !frame_buf)) return; @@ -2054,7 +2020,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ieee80211_flush_queues(sdata->local, sdata); } @@ -2068,7 +2034,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -2122,7 +2088,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, ifmgd->probe_send_count = 0; ieee80211_mgd_probe_ap_send(sdata); out: - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, @@ -2138,7 +2104,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return NULL; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (ifmgd->associated) cbss = ifmgd->associated; @@ -2171,9 +2137,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -2184,13 +2150,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + sdata_unlock(sdata); } static void ieee80211_beacon_connection_loss_work(struct work_struct *work) @@ -2257,7 +2219,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, auth_data->bss->bssid); @@ -2298,27 +2260,26 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, auth_data->key_idx, tx_flags); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 bssid[ETH_ALEN]; u16 auth_alg, auth_transaction, status_code; struct sta_info *sta; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) - return RX_MGMT_NONE; + return; if (!ifmgd->auth_data || ifmgd->auth_data->done) - return RX_MGMT_NONE; + return; memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN); if (!ether_addr_equal(bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); @@ -2330,14 +2291,15 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, mgmt->sa, auth_alg, ifmgd->auth_data->algorithm, auth_transaction, ifmgd->auth_data->expected_transaction); - return RX_MGMT_NONE; + return; } if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; } switch (ifmgd->auth_data->algorithm) { @@ -2350,20 +2312,20 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data->expected_transaction != 4) { ieee80211_auth_challenge(sdata, mgmt, len); /* need another frame */ - return RX_MGMT_NONE; + return; } break; default: WARN_ONCE(1, "invalid auth alg %d", ifmgd->auth_data->algorithm); - return RX_MGMT_NONE; + return; } sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && ifmgd->auth_data->expected_transaction != 2) { @@ -2371,7 +2333,8 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, * Report auth frame to user space for processing since another * round of Authentication frames is still needed. */ - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; } /* move station state to auth */ @@ -2387,30 +2350,29 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&sdata->local->sta_mtx); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; out_err: mutex_unlock(&sdata->local->sta_mtx); /* ignore frame -- wait for timeout */ - return RX_MGMT_NONE; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *bssid = NULL; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; @@ -2421,25 +2383,24 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DEAUTH; + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, len); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -2448,7 +2409,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DISASSOC; + cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, len); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -2498,7 +2459,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, assoc_data->bss->bssid); @@ -2679,10 +2640,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, return true; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct cfg80211_bss **bss) +static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; @@ -2690,13 +2650,14 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; u8 *pos; bool reassoc; + struct cfg80211_bss *bss; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (!assoc_data) - return RX_MGMT_NONE; + return; if (!ether_addr_equal(assoc_data->bss->bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; /* * AssocResp and ReassocResp have identical structure, so process both @@ -2704,7 +2665,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, */ if (len < 24 + 6) - return RX_MGMT_NONE; + return; reassoc = ieee80211_is_reassoc_req(mgmt->frame_control); capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); @@ -2731,22 +2692,23 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, assoc_data->timeout = jiffies + msecs_to_jiffies(ms); assoc_data->timeout_started = true; if (ms > IEEE80211_ASSOC_TIMEOUT) - run_again(ifmgd, assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, assoc_data->timeout); + return; } - *bss = assoc_data->bss; + bss = assoc_data->bss; if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { + if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(sdata->local->hw.wiphy, *bss); - return RX_MGMT_CFG80211_ASSOC_TIMEOUT; + cfg80211_put_bss(sdata->local->hw.wiphy, bss); + cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); + return; } sdata_info(sdata, "associated\n"); @@ -2758,7 +2720,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_assoc_data(sdata, true); } - return RX_MGMT_CFG80211_RX_ASSOC; + cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, len); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -2772,7 +2734,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; bool need_ps = false; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if ((sdata->u.mgd.associated && ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || @@ -2831,7 +2793,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd = &sdata->u.mgd; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!ether_addr_equal(mgmt->da, sdata->vif.addr)) return; /* ignore ProbeResp to foreign address */ @@ -2856,7 +2818,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } } @@ -2881,10 +2843,9 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_HT_CAPABILITY) | (1ULL << WLAN_EID_HT_OPERATION); -static enum rx_mgmt_action -ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - u8 *deauth_buf, struct ieee80211_rx_status *rx_status) +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; @@ -2899,24 +2860,25 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, u8 erp_value = 0; u32 ncrc; u8 *bssid; + u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; if (baselen > len) - return RX_MGMT_NONE; + return; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } if (rx_status->freq != chanctx_conf->def.chan->center_freq) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } chan = chanctx_conf->def.chan; rcu_read_unlock(); @@ -2943,13 +2905,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; - run_again(ifmgd, ifmgd->assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, ifmgd->assoc_data->timeout); + return; } if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; /* Track average RSSI from the Beacon frames of the current AP */ @@ -3095,7 +3057,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) - return RX_MGMT_NONE; + return; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; @@ -3151,7 +3113,9 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); - return RX_MGMT_CFG80211_TX_DEAUTH; + cfg80211_send_deauth(sdata->dev, deauth_buf, + sizeof(deauth_buf)); + return; } if (sta && elems.opmode_notif) @@ -3168,19 +3132,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); - - return RX_MGMT_NONE; } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; - struct cfg80211_bss *bss = NULL; - enum rx_mgmt_action rma = RX_MGMT_NONE; - u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; u16 fc; struct ieee802_11_elems elems; int ies_len; @@ -3189,28 +3147,27 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_BEACON: - rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - deauth_buf, rx_status); + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, skb); break; case IEEE80211_STYPE_AUTH: - rma = ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DEAUTH: - rma = ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DISASSOC: - rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ASSOC_RESP: case IEEE80211_STYPE_REASSOC_RESP: - rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss); + ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { @@ -3256,34 +3213,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } break; } - mutex_unlock(&ifmgd->mtx); - - switch (rma) { - case RX_MGMT_NONE: - /* no action */ - break; - case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_DISASSOC: - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_AUTH: - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_ASSOC: - cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_ASSOC_TIMEOUT: - cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); - break; - case RX_MGMT_CFG80211_TX_DEAUTH: - cfg80211_send_deauth(sdata->dev, deauth_buf, - sizeof(deauth_buf)); - break; - default: - WARN(1, "unexpected: %d", rma); - } + sdata_unlock(sdata); } static void ieee80211_sta_timer(unsigned long data) @@ -3297,20 +3227,12 @@ static void ieee80211_sta_timer(unsigned long data) static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, u8 *bssid, u8 reason, bool tx) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, tx, frame_buf); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - - mutex_lock(&ifmgd->mtx); } static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) @@ -3320,7 +3242,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; u32 tx_flags = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; @@ -3393,7 +3315,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, auth_data->timeout); + run_again(sdata, auth_data->timeout); } else { auth_data->timeout_started = false; } @@ -3406,7 +3328,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; struct ieee80211_local *local = sdata->local; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { @@ -3430,7 +3352,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; assoc_data->timeout_started = true; - run_again(&sdata->u.mgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); } else { assoc_data->timeout_started = false; } @@ -3455,7 +3377,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (ifmgd->status_received) { __le16 fc = ifmgd->status_fc; @@ -3467,7 +3389,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } else { ifmgd->auth_data->timeout = jiffies - 1; } @@ -3478,7 +3400,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); } else { ifmgd->assoc_data->timeout = jiffies - 1; } @@ -3501,12 +3423,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); cfg80211_send_auth_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); } } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { @@ -3519,12 +3439,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_assoc_data(sdata, false); - mutex_unlock(&ifmgd->mtx); cfg80211_send_assoc_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL) && @@ -3558,7 +3476,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) false); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { mlme_dbg(sdata, "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", @@ -3587,7 +3505,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) } } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } static void ieee80211_sta_bcn_mon_timer(unsigned long data) @@ -3648,9 +3566,9 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -3661,10 +3579,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED, true); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } #endif @@ -3696,8 +3614,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; ifmgd->p2p_noa_index = -1; - mutex_init(&ifmgd->mtx); - if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC; else @@ -4053,8 +3969,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* try to authenticate/probe */ - mutex_lock(&ifmgd->mtx); - if ((ifmgd->auth_data && !ifmgd->auth_data->done) || ifmgd->assoc_data) { err = -EBUSY; @@ -4074,8 +3988,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4092,8 +4006,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* hold our own reference */ cfg80211_ref_bss(local->hw.wiphy, auth_data->bss); - err = 0; - goto out_unlock; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); @@ -4101,9 +4014,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data = NULL; err_free: kfree(auth_data); - out_unlock: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4134,8 +4044,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->ssid_len = ssidie[1]; rcu_read_unlock(); - mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -4143,8 +4051,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -4338,7 +4246,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } rcu_read_unlock(); - run_again(ifmgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); if (bss->corrupt_data) { char *corrupt_type = "data"; @@ -4354,17 +4262,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, corrupt_type); } - err = 0; - goto out; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); - out: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4376,8 +4280,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, bool tx = !req->local_state_change; bool report_frame = false; - mutex_lock(&ifmgd->mtx); - sdata_info(sdata, "deauthenticating from %pM by local choice (reason=%d)\n", req->bssid, req->reason_code); @@ -4389,7 +4291,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); report_frame = true; goto out; @@ -4401,12 +4302,11 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); report_frame = true; } - mutex_unlock(&ifmgd->mtx); out: if (report_frame) - __cfg80211_send_deauth(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_send_deauth(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4418,18 +4318,14 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, u8 bssid[ETH_ALEN]; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); - /* * cfg80211 should catch this ... but it's racy since * we can receive a disassoc frame, process it, hand it * to cfg80211 while that's in a locked section already * trying to tell us that the user wants to disconnect. */ - if (ifmgd->associated != req->bss) { - mutex_unlock(&ifmgd->mtx); + if (ifmgd->associated != req->bss) return -ENOLINK; - } sdata_info(sdata, "disassociating from %pM by local choice (reason=%d)\n", @@ -4439,10 +4335,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, req->reason_code, !req->local_state_change, frame_buf); - mutex_unlock(&ifmgd->mtx); - __cfg80211_send_disassoc(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_send_disassoc(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4462,13 +4357,13 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->csa_connection_drop_work); cancel_work_sync(&ifmgd->chswitch_work); - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, false); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); del_timer_sync(&ifmgd->timer); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ffdfe4bc89ad..2a8d759324c2 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1581,9 +1581,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->u.mgd.dtim_period) changed |= BSS_CHANGED_DTIM_PERIOD; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); ieee80211_bss_info_change_notify(sdata, changed); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 68b40f21bc38..80ffb0138919 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -25,12 +25,9 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_rx_auth(dev); - wdev_lock(wdev); nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); cfg80211_sme_rx_auth(dev, buf, len); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_rx_auth); @@ -46,7 +43,6 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); trace_cfg80211_send_rx_assoc(dev, bss); - wdev_lock(wdev); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); @@ -59,7 +55,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && cfg80211_sme_failed_reassoc(wdev)) { cfg80211_put_bss(wiphy, bss); - goto out; + return; } nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL); @@ -71,7 +67,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, * sme will schedule work that does it later. */ cfg80211_put_bss(wiphy, bss); - goto out; + return; } if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) { @@ -87,13 +83,11 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, status_code, status_code == WLAN_STATUS_SUCCESS, bss); - out: - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_rx_assoc); -void __cfg80211_send_deauth(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_send_deauth(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -102,7 +96,7 @@ void __cfg80211_send_deauth(struct net_device *dev, const u8 *bssid = mgmt->bssid; bool was_current = false; - trace___cfg80211_send_deauth(dev); + trace_cfg80211_send_deauth(dev); ASSERT_WDEV_LOCK(wdev); if (wdev->current_bss && @@ -129,20 +123,10 @@ void __cfg80211_send_deauth(struct net_device *dev, false, NULL); } } -EXPORT_SYMBOL(__cfg80211_send_deauth); - -void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_send_deauth(dev, buf, len); - wdev_unlock(wdev); -} EXPORT_SYMBOL(cfg80211_send_deauth); -void __cfg80211_send_disassoc(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_send_disassoc(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -152,7 +136,7 @@ void __cfg80211_send_disassoc(struct net_device *dev, u16 reason_code; bool from_ap; - trace___cfg80211_send_disassoc(dev); + trace_cfg80211_send_disassoc(dev); ASSERT_WDEV_LOCK(wdev); nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); @@ -175,16 +159,6 @@ void __cfg80211_send_disassoc(struct net_device *dev, from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); } -EXPORT_SYMBOL(__cfg80211_send_disassoc); - -void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_send_disassoc(dev, buf, len); - wdev_unlock(wdev); -} EXPORT_SYMBOL(cfg80211_send_disassoc); void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) @@ -194,15 +168,12 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_auth_timeout(dev, addr); - wdev_lock(wdev); nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); if (wdev->sme_state == CFG80211_SME_CONNECTING) __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_auth_timeout); @@ -213,15 +184,12 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_assoc_timeout(dev, addr); - wdev_lock(wdev); nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); if (wdev->sme_state == CFG80211_SME_CONNECTING) __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_assoc_timeout); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5755bc14abbd..23fafeae8a10 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,12 +1911,12 @@ TRACE_EVENT(cfg80211_send_rx_assoc, NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_deauth, +DEFINE_EVENT(netdev_evt_only, cfg80211_send_deauth, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_disassoc, +DEFINE_EVENT(netdev_evt_only, cfg80211_send_disassoc, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev) ); -- cgit From 1cdd59ce8dcfa850ebb8ac2ab000a2ea572d1d69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 18:58:00 +0200 Subject: cfg80211: simplify and correct P2P-Device scan check If the driver for some reason successfully finishes scanning while in p2p_stop_device(), cfg80211 will still set it to aborted. Simplify this code using the new 'notified' value and only mark it aborted in case the driver didn't notify cfg80211 at all (in which case we also leak the request to not crash, this is a driver bug.) Signed-off-by: Johannes Berg --- net/wireless/core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 5fc642d4071b..afcb9ec70adb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -204,18 +204,15 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - bool busy = work_busy(&rdev->scan_done_wk); - /* - * If the work isn't pending or running (in which case it would - * be waiting for the lock we hold) the driver didn't properly - * cancel the scan when the interface was removed. In this case - * warn and leak the scan request object to not crash later. + * If the scan request wasn't notified as done, set it + * to aborted and leak it after a warning. The driver + * should have notified us that it ended at the latest + * during rdev_stop_p2p_device(). */ - WARN_ON(!busy); - - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, !busy); + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !rdev->scan_req->notified); } } -- cgit From db2424c58e5962a87888d25d29ceb0873eef6348 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 19:07:52 +0200 Subject: regulatory: use RCU in regulatory_hint_11d() Since it just does a quick check of the last regulatory request, the function doesn't have to hold the reg mutex but can use RCU instead. Signed-off-by: Johannes Berg --- net/wireless/reg.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e76559618588..17e5eccb42c8 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1715,20 +1715,18 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, { char alpha2[2]; enum environment_cap env = ENVIRON_ANY; - struct regulatory_request *request, *lr; - - mutex_lock(®_mutex); - lr = get_last_request(); - - if (unlikely(!lr)) - goto out; + struct regulatory_request *request = NULL, *lr; /* IE len must be evenly divisible by 2 */ if (country_ie_len & 0x01) - goto out; + return; if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) - goto out; + return; + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (!request) + return; alpha2[0] = country_ie[0]; alpha2[1] = country_ie[1]; @@ -1738,6 +1736,12 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, else if (country_ie[2] == 'O') env = ENVIRON_OUTDOOR; + rcu_read_lock(); + lr = get_last_request(); + + if (unlikely(!lr)) + goto out; + /* * We will run this only upon a successful connection on cfg80211. * We leave conflict resolution to the workqueue, where can hold @@ -1747,10 +1751,6 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, lr->wiphy_idx != WIPHY_IDX_INVALID) goto out; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); - if (!request) - goto out; - request->wiphy_idx = get_wiphy_idx(wiphy); request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; @@ -1758,8 +1758,10 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, request->country_ie_env = env; queue_regulatory_request(request); + request = NULL; out: - mutex_unlock(®_mutex); + kfree(request); + rcu_read_unlock(); } static void restore_alpha2(char *alpha2, bool reset_user) -- cgit From 38fd2143fa653f80729800c1d61d4207b91dca42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 19:17:17 +0200 Subject: regulatory: remove reg_mutex The reg_mutex is similar to the ones I just removed in cfg80211 but even less useful since it protects global data, and we hold the RTNL in all places (except module unload) already. Signed-off-by: Johannes Berg --- net/wireless/reg.c | 76 ++++++++++++++---------------------------------------- 1 file changed, 19 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 17e5eccb42c8..e1d6749234c6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -81,7 +81,10 @@ static struct regulatory_request core_request_world = { .country_ie_env = ENVIRON_ANY, }; -/* Receipt of information from last regulatory request */ +/* + * Receipt of information from last regulatory request, + * protected by RTNL (and can be accessed with RCU protection) + */ static struct regulatory_request __rcu *last_request = (void __rcu *)&core_request_world; @@ -96,39 +99,25 @@ static struct device_type reg_device_type = { * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no * information to give us an alpha2. + * (protected by RTNL, can be read under RCU) */ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; -/* - * Protects static reg.c components: - * - cfg80211_regdomain (if not used with RCU) - * - cfg80211_world_regdom - * - last_request (if not used with RCU) - * - reg_num_devs_support_basehint - */ -static DEFINE_MUTEX(reg_mutex); - /* * Number of devices that registered to the core * that support cellular base station regulatory hints + * (protected by RTNL) */ static int reg_num_devs_support_basehint; -static inline void assert_reg_lock(void) -{ - lockdep_assert_held(®_mutex); -} - static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { - return rcu_dereference_protected(cfg80211_regdomain, - lockdep_is_held(®_mutex)); + return rtnl_dereference(cfg80211_regdomain); } static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { - return rcu_dereference_protected(wiphy->regd, - lockdep_is_held(®_mutex)); + return rtnl_dereference(wiphy->regd); } static void rcu_free_regdom(const struct ieee80211_regdomain *r) @@ -140,8 +129,7 @@ static void rcu_free_regdom(const struct ieee80211_regdomain *r) static struct regulatory_request *get_last_request(void) { - return rcu_dereference_check(last_request, - lockdep_is_held(®_mutex)); + return rcu_dereference_rtnl(last_request); } /* Used to queue up regulatory hints */ @@ -200,6 +188,7 @@ static const struct ieee80211_regdomain world_regdom = { } }; +/* protected by RTNL */ static const struct ieee80211_regdomain *cfg80211_world_regdom = &world_regdom; @@ -215,7 +204,7 @@ static void reset_regdomains(bool full_reset, const struct ieee80211_regdomain *r; struct regulatory_request *lr; - assert_reg_lock(); + ASSERT_RTNL(); r = get_cfg80211_regdom(); @@ -936,13 +925,7 @@ static bool reg_request_cell_base(struct regulatory_request *request) bool reg_last_request_cell_base(void) { - bool val; - - mutex_lock(®_mutex); - val = reg_request_cell_base(get_last_request()); - mutex_unlock(®_mutex); - - return val; + return reg_request_cell_base(get_last_request()); } #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS @@ -1444,8 +1427,6 @@ static void reg_set_request_processed(void) * what it believes should be the current regulatory domain. * * Returns one of the different reg request treatment values. - * - * Caller must hold ®_mutex */ static enum reg_request_treatment __regulatory_hint(struct wiphy *wiphy, @@ -1622,10 +1603,8 @@ static void reg_process_pending_beacon_hints(void) static void reg_todo(struct work_struct *work) { rtnl_lock(); - mutex_lock(®_mutex); reg_process_pending_hints(); reg_process_pending_beacon_hints(); - mutex_unlock(®_mutex); rtnl_unlock(); } @@ -1851,8 +1830,6 @@ static void restore_regulatory_settings(bool reset_user) ASSERT_RTNL(); - mutex_lock(®_mutex); - reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -1906,8 +1883,6 @@ static void restore_regulatory_settings(bool reset_user) list_splice_tail_init(&tmp_reg_req_list, ®_requests_list); spin_unlock(®_requests_lock); - mutex_unlock(®_mutex); - REG_DBG_PRINT("Kicking the queue\n"); schedule_work(®_work); @@ -2222,7 +2197,6 @@ int set_regdom(const struct ieee80211_regdomain *rd) struct regulatory_request *lr; int r; - mutex_lock(®_mutex); lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ @@ -2232,14 +2206,12 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); kfree(rd); - goto out; + return r; } /* This would make this whole thing pointless */ - if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { - r = -EINVAL; - goto out; - } + if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) + return -EINVAL; /* update all wiphys now with the new established regulatory domain */ update_all_wiphy_regulatory(lr->initiator); @@ -2250,10 +2222,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); - out: - mutex_unlock(®_mutex); - - return r; + return 0; } int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) @@ -2278,14 +2247,10 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) void wiphy_regulatory_register(struct wiphy *wiphy) { - mutex_lock(®_mutex); - if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); - - mutex_unlock(®_mutex); } void wiphy_regulatory_deregister(struct wiphy *wiphy) @@ -2293,7 +2258,6 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) struct wiphy *request_wiphy = NULL; struct regulatory_request *lr; - mutex_lock(®_mutex); lr = get_last_request(); if (!reg_dev_ignore_cell_hint(wiphy)) @@ -2306,12 +2270,10 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy || request_wiphy != wiphy) - goto out; + return; lr->wiphy_idx = WIPHY_IDX_INVALID; lr->country_ie_env = ENVIRON_ANY; -out: - mutex_unlock(®_mutex); } static void reg_timeout_work(struct work_struct *work) @@ -2375,9 +2337,9 @@ void regulatory_exit(void) cancel_delayed_work_sync(®_timeout); /* Lock to suppress warnings */ - mutex_lock(®_mutex); + rtnl_lock(); reset_regdomains(true, NULL); - mutex_unlock(®_mutex); + rtnl_unlock(); dev_set_uevent_suppress(®_pdev->dev, true); -- cgit From 91bf9b26fc95c505846bc2f744a73b51f2aaee1d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2013 17:44:01 +0200 Subject: cfg80211: remove some locked wrappers from mlme API By making all the API functions require wdev locking we can clean up the API a bit, getting rid of the locking version of each function. This also decreases the size of cfg80211 by a small amount. Signed-off-by: Johannes Berg --- net/wireless/core.h | 25 ++--------- net/wireless/mlme.c | 120 ++++++++++--------------------------------------- net/wireless/nl80211.c | 34 +++++++++----- net/wireless/sme.c | 40 ++++++++--------- 4 files changed, 70 insertions(+), 149 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index d21a0fc01401..c8f87dfd50d3 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -282,38 +282,21 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev); /* MLME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len); int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, const u8 *sae_data, int sae_data_len); -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *ssid, int ssid_len, struct cfg80211_assoc_request *req); -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 80ffb0138919..7bde5d9c0003 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -221,15 +221,15 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, EXPORT_SYMBOL(cfg80211_michael_mic_failure); /* some MLME handling for userspace SME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) +int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, + const u8 *ssid, int ssid_len, + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx, + const u8 *sae_data, int sae_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_auth_request req = { @@ -271,28 +271,6 @@ out: return err; } -int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) -{ - int err; - - ASSERT_RTNL(); - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key, key_len, key_idx, - sae_data, sae_data_len); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - /* Do a logical ht_capa &= ht_capa_mask. */ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask) @@ -327,12 +305,12 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, p1[i] &= p2[i]; } -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) +int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + const u8 *bssid, + const u8 *ssid, int ssid_len, + struct cfg80211_assoc_request *req) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -382,30 +360,10 @@ out: return err; } -int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - ASSERT_RTNL(); - - wdev_lock(wdev); - err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, - ssid, ssid_len, req); - wdev_unlock(wdev); - - return err; -} - -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_deauth_request req = { @@ -425,26 +383,10 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, return rdev_deauth(rdev, dev, &req); } -int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); - - return err; -} - -static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_disassoc_request req = { @@ -470,22 +412,6 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, return rdev_disassoc(rdev, dev, &req); } -int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); - - return err; -} - void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 74cdb1a0cf31..49c2f2f511dc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5904,10 +5904,13 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (local_state_change) return 0; - return cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key.p.key, key.p.key_len, key.idx, - sae_data, sae_data_len); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, + ssid, ssid_len, ie, ie_len, + key.p.key, key.p.key_len, key.idx, + sae_data, sae_data_len); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, @@ -6074,9 +6077,12 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); - if (!err) + if (!err) { + wdev_lock(dev->ieee80211_ptr); err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, ssid_len, &req); + wdev_unlock(dev->ieee80211_ptr); + } return err; } @@ -6086,7 +6092,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6121,8 +6127,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) @@ -6130,7 +6139,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6165,8 +6174,11 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static bool diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 4dbf31407a56..31d67add7424 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -169,13 +169,13 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_AUTHENTICATE_NEXT: BUG_ON(!rdev->ops->auth); wdev->conn->state = CFG80211_CONN_AUTHENTICATING; - return __cfg80211_mlme_auth(rdev, wdev->netdev, - params->channel, params->auth_type, - params->bssid, - params->ssid, params->ssid_len, - NULL, 0, - params->key, params->key_len, - params->key_idx, NULL, 0); + return cfg80211_mlme_auth(rdev, wdev->netdev, + params->channel, params->auth_type, + params->bssid, + params->ssid, params->ssid_len, + NULL, 0, + params->key, params->key_len, + params->key_idx, NULL, 0); case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -191,19 +191,19 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) req.vht_capa = params->vht_capa; req.vht_capa_mask = params->vht_capa_mask; - err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, - params->bssid, params->ssid, - params->ssid_len, &req); + err = cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, + params->bssid, params->ssid, + params->ssid_len, &req); if (err) - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, - false); + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, + false); return err; case CFG80211_CONN_DEAUTH_ASSOC_FAIL: - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); /* return an error so that we call __cfg80211_connect_result() */ return -EINVAL; default: @@ -961,7 +961,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, } /* wdev->conn->params.bssid must be set if > SCANNING */ - err = __cfg80211_mlme_deauth(rdev, dev, + err = cfg80211_mlme_deauth(rdev, dev, wdev->conn->params.bssid, NULL, 0, reason, false); if (err) @@ -1018,6 +1018,6 @@ void cfg80211_sme_disassoc(struct net_device *dev, memcpy(bssid, bss->pub.bssid, ETH_ALEN); - __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); } -- cgit From 83739b03de97049181d711c95200b94a14d3f693 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2013 17:44:01 +0200 Subject: cfg80211: remove some locked wrappers from sme API By making all the API functions require wdev locking we can clean up the API a bit, getting rid of the locking version of each function. This also decreases the size of cfg80211 by a small amount. Signed-off-by: Johannes Berg --- net/wireless/core.c | 4 ++-- net/wireless/core.h | 11 ++--------- net/wireless/nl80211.c | 10 ++++++++-- net/wireless/sme.c | 41 +++++++---------------------------------- net/wireless/util.c | 2 ++ net/wireless/wext-sme.c | 23 +++++++++++------------ 6 files changed, 32 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index afcb9ec70adb..ee422871fe9e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -755,8 +755,8 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, wdev->wext.ie_len = 0; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; #endif - __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, true); + cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, true); wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: diff --git a/net/wireless/core.h b/net/wireless/core.h index c8f87dfd50d3..b4b4a566626b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -328,18 +328,11 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); /* SME */ -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid); int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys); -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, - bool wextev); + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 49c2f2f511dc..a09f36bb957c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6683,7 +6683,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.vht_capa)); } - err = cfg80211_connect(rdev, dev, &connect, connkeys); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); + wdev_unlock(dev->ieee80211_ptr); if (err) kfree(connkeys); return err; @@ -6694,6 +6696,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; + int ret; if (!info->attrs[NL80211_ATTR_REASON_CODE]) reason = WLAN_REASON_DEAUTH_LEAVING; @@ -6707,7 +6710,10 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - return cfg80211_disconnect(rdev, dev, reason, true); + wdev_lock(dev->ieee80211_ptr); + ret = cfg80211_disconnect(rdev, dev, reason, true); + wdev_unlock(dev->ieee80211_ptr); + return ret; } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 31d67add7424..81be95f3be74 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -760,11 +760,11 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, } EXPORT_SYMBOL(cfg80211_disconnected); -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid) +int cfg80211_connect(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss = NULL; @@ -911,22 +911,8 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, } } -int cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, bool wextev) +int cfg80211_disconnect(struct cfg80211_registered_device *rdev, + struct net_device *dev, u16 reason, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -983,19 +969,6 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, return 0; } -int cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - u16 reason, bool wextev) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_disconnect(rdev, dev, reason, wextev); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - void cfg80211_sme_disassoc(struct net_device *dev, struct cfg80211_internal_bss *bss) { diff --git a/net/wireless/util.c b/net/wireless/util.c index 501724257af5..74458b7f61eb 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -858,8 +858,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: + wdev_lock(dev->ieee80211_ptr); cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); + wdev_unlock(dev->ieee80211_ptr); break; case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index aeefd6817189..a53f8404f451 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -54,8 +54,8 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (wdev->wext.prev_bssid_valid) prev_bssid = wdev->wext.prev_bssid; - err = __cfg80211_connect(rdev, wdev->netdev, - &wdev->wext.connect, ck, prev_bssid); + err = cfg80211_connect(rdev, wdev->netdev, + &wdev->wext.connect, ck, prev_bssid); if (err) kfree(ck); @@ -100,8 +100,8 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, /* if SSID set, we'll try right again, avoid event */ if (wdev->wext.connect.ssid_len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -199,8 +199,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, /* if SSID set now, we'll try to connect, avoid event */ if (len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -288,8 +288,8 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, ether_addr_equal(bssid, wdev->wext.connect.bssid)) goto out; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -365,8 +365,8 @@ int cfg80211_wext_siwgenie(struct net_device *dev, wdev->wext.ie_len = ie_len; if (wdev->sme_state != CFG80211_SME_IDLE) { - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -402,8 +402,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: - err = __cfg80211_disconnect(rdev, dev, mlme->reason_code, - true); + err = cfg80211_disconnect(rdev, dev, mlme->reason_code, true); break; default: err = -EOPNOTSUPP; -- cgit From 079956742452494326081349a66942654498cafa Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 29 Apr 2013 11:55:10 -0700 Subject: ipvs: change type of netns_ipvs->sysctl_sync_qlen_max This member of struct netns_ipvs is calculated from nr_free_buffer_pages so change its type to unsigned long in case of overflow. Also, type of its related proc var sync_qlen_max and the return type of function sysctl_sync_qlen_max() should be changed to unsigned long, too. Besides, the type of ipvs_master_sync_state->sync_queue_len should be changed to unsigned long accordingly. Signed-off-by: Zhang Yanfei Cc: Julian Anastasov Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5b142fb16480..70146496e73a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1716,9 +1716,9 @@ static struct ctl_table vs_vars[] = { }, { .procname = "sync_qlen_max", - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "sync_sock_size", -- cgit From 6d0bfe22611602f36617bc7aa2ffa1bbb2f54c67 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 22 May 2013 20:17:31 +0000 Subject: net: ipv6: Add IPv6 support to the ping socket. This adds the ability to send ICMPv6 echo requests without a raw socket. The equivalent ability for ICMPv4 was added in 2011. Instead of having separate code paths for IPv4 and IPv6, make most of the code in net/ipv4/ping.c dual-stack and only add a few IPv6-specific bits (like the protocol definition) to a new net/ipv6/ping.c. Hopefully this will reduce divergence and/or duplication of bugs in the future. Caveats: - Setting options via ancillary data (e.g., using IPV6_PKTINFO to specify the outgoing interface) is not yet supported. - There are no separate security settings for IPv4 and IPv6; everything is controlled by /proc/net/ipv4/ping_group_range. - The proc interface does not yet display IPv6 ping sockets properly. Tested with a patched copy of ping6 and using raw socket calls. Compiles and works with all of CONFIG_IPV6={n,m,y}. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 5 +- net/ipv4/ping.c | 557 +++++++++++++++++++++++++++++++++++++--------------- net/ipv6/Makefile | 2 +- net/ipv6/af_inet6.c | 12 ++ net/ipv6/icmp.c | 19 +- net/ipv6/ping.c | 216 ++++++++++++++++++++ 6 files changed, 643 insertions(+), 168 deletions(-) create mode 100644 net/ipv6/ping.c (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 76e10b47e053..562efd91f457 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -939,7 +939,8 @@ error: void icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int offset = iph->ihl<<2; + struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); @@ -949,7 +950,7 @@ void icmp_err(struct sk_buff *skb, u32 info) * triggered by ICMP_ECHOREPLY which sent from kernel. */ if (icmph->type != ICMP_ECHOREPLY) { - ping_err(skb, info); + ping_err(skb, offset, info); return; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7d93d62cd5fd..71f6ad02fa67 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -46,8 +45,18 @@ #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#include +#include +#endif -static struct ping_table ping_table; + +struct ping_table ping_table; +struct pingv6_ops pingv6_ops; +EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; @@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma pr_debug("hash(%d) = %d\n", num, res); return res; } +EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned int num) @@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } -static int ping_v4_get_port(struct sock *sk, unsigned short ident) +int ping_get_port(struct sock *sk, unsigned short ident) { struct hlist_nulls_node *node; struct hlist_nulls_head *hlist; @@ -103,6 +113,10 @@ next_port: ping_portaddr_for_each_entry(sk2, node, hlist) { isk2 = inet_sk(sk2); + /* BUG? Why is this reuse and not reuseaddr? ping.c + * doesn't turn off SO_REUSEADDR, and it doesn't expect + * that other ping processes can steal its packets. + */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) @@ -125,17 +139,18 @@ fail: write_unlock_bh(&ping_table.lock); return 1; } +EXPORT_SYMBOL_GPL(ping_get_port); -static void ping_v4_hash(struct sock *sk) +void ping_hash(struct sock *sk) { - pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); + pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ } -static void ping_v4_unhash(struct sock *sk) +void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); - pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); @@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk) write_unlock_bh(&ping_table.lock); } } +EXPORT_SYMBOL_GPL(ping_unhash); -static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, - u16 ident, int dif) +static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + + if (skb->protocol == htons(ETH_P_IP)) { + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &ip_hdr(skb)->daddr, dif); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", + (int)ident, &ipv6_hdr(skb)->daddr, dif); +#endif + } - pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", - (int)ident, &daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk, - (int)isk->inet_num, &isk->inet_rcv_saddr, - sk->sk_bound_dev_if); - pr_debug("iterate\n"); if (isk->inet_num != ident) continue; - if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) - continue; + + if (skb->protocol == htons(ETH_P_IP) && + sk->sk_family == AF_INET) { + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, + (int) isk->inet_num, &isk->inet_rcv_saddr, + sk->sk_bound_dev_if); + + if (isk->inet_rcv_saddr && + isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + continue; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6) && + sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, + (int) isk->inet_num, + &inet6_sk(sk)->rcv_saddr, + sk->sk_bound_dev_if); + + if (!ipv6_addr_any(&np->rcv_saddr) && + !ipv6_addr_equal(&np->rcv_saddr, + &ipv6_hdr(skb)->daddr)) + continue; +#endif + } + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; @@ -200,7 +245,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, } -static int ping_init_sock(struct sock *sk) +int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); @@ -225,8 +270,9 @@ static int ping_init_sock(struct sock *sk) return -EACCES; } +EXPORT_SYMBOL_GPL(ping_init_sock); -static void ping_close(struct sock *sk, long timeout) +void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); @@ -234,36 +280,122 @@ static void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } +EXPORT_SYMBOL_GPL(ping_close); + +/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ +int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { + struct net *net = sock_net(sk); + if (sk->sk_family == AF_INET) { + struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + int chk_addr_ret; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", + sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); + + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + + if ((sysctl_ip_nonlocal_bind == 0 && + isk->freebind == 0 && isk->transparent == 0 && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) + return -EADDRNOTAVAIL; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + int addr_type, scoped, has_addr; + struct net_device *dev = NULL; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", + sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); + + addr_type = ipv6_addr_type(&addr->sin6_addr); + scoped = __ipv6_addr_needs_scope_id(addr_type); + if ((addr_type != IPV6_ADDR_ANY && + !(addr_type & IPV6_ADDR_UNICAST)) || + (scoped && !addr->sin6_scope_id)) + return -EINVAL; + + rcu_read_lock(); + if (addr->sin6_scope_id) { + dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } + has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, + scoped); + rcu_read_unlock(); + + if (!(isk->freebind || isk->transparent || has_addr || + addr_type == IPV6_ADDR_ANY)) + return -EADDRNOTAVAIL; + + if (scoped) + sk->sk_bound_dev_if = addr->sin6_scope_id; +#endif + } else { + return -EAFNOSUPPORT; + } + return 0; +} +void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +{ + if (saddr->sa_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + struct sockaddr_in *addr = (struct sockaddr_in *) saddr; + isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (saddr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; + struct ipv6_pinfo *np = inet6_sk(sk); + np->rcv_saddr = np->saddr = addr->sin6_addr; +#endif + } +} + +void ping_clear_saddr(struct sock *sk, int dif) +{ + sk->sk_bound_dev_if = dif; + if (sk->sk_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + isk->inet_rcv_saddr = isk->inet_saddr = 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&np->saddr, 0, sizeof(np->saddr)); +#endif + } +} /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ -static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *isk = inet_sk(sk); unsigned short snum; - int chk_addr_ret; int err; + int dif = sk->sk_bound_dev_if; - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); - - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) - chk_addr_ret = RTN_LOCAL; - - if ((sysctl_ip_nonlocal_bind == 0 && - isk->freebind == 0 && isk->transparent == 0 && - chk_addr_ret != RTN_LOCAL) || - chk_addr_ret == RTN_MULTICAST || - chk_addr_ret == RTN_BROADCAST) - return -EADDRNOTAVAIL; + err = ping_check_bind_addr(sk, isk, uaddr, addr_len); + if (err) + return err; lock_sock(sk); @@ -272,42 +404,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; err = -EADDRINUSE; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; - snum = ntohs(addr->sin_port); - if (ping_v4_get_port(sk, snum) != 0) { - isk->inet_saddr = isk->inet_rcv_saddr = 0; + ping_set_saddr(sk, uaddr); + snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); + if (ping_get_port(sk, snum) != 0) { + ping_clear_saddr(sk, dif); goto out; } - pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", + pr_debug("after bind(): num = %d, dif = %d\n", (int)isk->inet_num, - &isk->inet_rcv_saddr, (int)sk->sk_bound_dev_if); err = 0; - if (isk->inet_rcv_saddr) + if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || + (sk->sk_family == AF_INET6 && + !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); +#endif + sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ -static inline int ping_supported(int type, int code) +static inline int ping_supported(int family, int type, int code) { - if (type == ICMP_ECHO && code == 0) - return 1; - return 0; + return (family == AF_INET && type == ICMP_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); } /* @@ -315,30 +455,42 @@ static inline int ping_supported(int type, int code) * sort of error condition. */ -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -void ping_err(struct sk_buff *skb, u32 info) +void ping_err(struct sk_buff *skb, int offset, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int family; + struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; + if (skb->protocol == htons(ETH_P_IP)) { + family = AF_INET; + type = icmp_hdr(skb)->type; + code = icmp_hdr(skb)->code; + icmph = (struct icmphdr *)(skb->data + offset); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + family = AF_INET6; + type = icmp6_hdr(skb)->icmp6_type; + code = icmp6_hdr(skb)->icmp6_code; + icmph = (struct icmphdr *) (skb->data + offset); + } else { + BUG(); + } + /* We assume the packet has already been checked by icmp_unreach */ - if (!ping_supported(icmph->type, icmph->code)) + if (!ping_supported(family, icmph->type, icmph->code)) return; - pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", + skb->protocol, type, code, ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); - sk = ping_v4_lookup(net, iph->daddr, iph->saddr, - ntohs(icmph->un.echo.id), skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk == NULL) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ @@ -349,72 +501,83 @@ void ping_err(struct sk_buff *skb, u32 info) harderr = 0; inet_sock = inet_sk(sk); - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - /* This is not a real error but ping wants to see it. - * Report it with some fake errno. */ - err = EREMOTEIO; - break; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; + if (skb->protocol == htons(ETH_P_IP)) { + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + /* This is not a real error but ping wants to see it. + * Report it with some fake errno. + */ + err = EREMOTEIO; + break; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); + if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + case ICMP_REDIRECT: + /* See ICMP_SOURCE_QUENCH */ + ipv4_sk_redirect(skb, sk); + err = EREMOTEIO; + break; } - break; - case ICMP_REDIRECT: - /* See ICMP_SOURCE_QUENCH */ - ipv4_sk_redirect(skb, sk); - err = EREMOTEIO; - break; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); +#endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if (!inet_sock->recverr) { + if ((family == AF_INET && !inet_sock->recverr) || + (family == AF_INET6 && !inet6_sk(sk)->recverr)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, 0 /* no remote port */, - info, (u8 *)icmph); + if (family == AF_INET) { + ip_icmp_error(sk, skb, err, 0 /* no remote port */, + info, (u8 *)icmph); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, + info, (u8 *)icmph); +#endif + } } sk->sk_err = err; sk->sk_error_report(sk); out: sock_put(sk); } +EXPORT_SYMBOL_GPL(ping_err); /* - * Copy and checksum an ICMP Echo packet from user space into a buffer. + * Copy and checksum an ICMP Echo packet from user space into a buffer + * starting from the payload. */ -struct pingfakehdr { - struct icmphdr icmph; - struct iovec *iov; - __wsum wcheck; -}; - -static int ping_getfrag(void *from, char *to, - int offset, int fraglen, int odd, struct sk_buff *skb) +int ping_getfrag(void *from, char *to, + int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; @@ -425,20 +588,33 @@ static int ping_getfrag(void *from, char *to, pfh->iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; + } else if (offset < sizeof(struct icmphdr)) { + BUG(); + } else { + if (csum_partial_copy_fromiovecend + (to, pfh->iov, offset - sizeof(struct icmphdr), + fraglen, &pfh->wcheck)) + return -EFAULT; + } - return 0; +#if IS_ENABLED(CONFIG_IPV6) + /* For IPv6, checksum each skb as we go along, as expected by + * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in + * wcheck, it will be finalized in ping_v4_push_pending_frames. + */ + if (pfh->family == AF_INET6) { + skb->csum = pfh->wcheck; + skb->ip_summed = CHECKSUM_NONE; + pfh->wcheck = 0; } - if (offset < sizeof(struct icmphdr)) - BUG(); - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) - return -EFAULT; +#endif + return 0; } +EXPORT_SYMBOL_GPL(ping_getfrag); -static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, - struct flowi4 *fl4) +static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); @@ -450,24 +626,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, return ip_push_pending_frames(sk, fl4); } -static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct net *net = sock_net(sk); - struct flowi4 fl4; - struct inet_sock *inet = inet_sk(sk); - struct ipcm_cookie ipc; - struct icmphdr user_icmph; - struct pingfakehdr pfh; - struct rtable *rt = NULL; - struct ip_options_data opt_copy; - int free = 0; - __be32 saddr, daddr, faddr; - u8 tos; - int err; - - pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len) { + u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; @@ -482,15 +643,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* * Fetch the ICMP header provided by the userland. - * iovec is modified! + * iovec is modified! The ICMP header is consumed. */ - - if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, - sizeof(struct icmphdr))) + if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len)) return -EFAULT; - if (!ping_supported(user_icmph.type, user_icmph.code)) + + if (family == AF_INET) { + type = ((struct icmphdr *) user_icmph)->type; + code = ((struct icmphdr *) user_icmph)->code; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + type = ((struct icmp6hdr *) user_icmph)->icmp6_type; + code = ((struct icmp6hdr *) user_icmph)->icmp6_code; +#endif + } else { + BUG(); + } + + if (!ping_supported(family, type, code)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(ping_common_sendmsg); + +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct net *net = sock_net(sk); + struct flowi4 fl4; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct icmphdr user_icmph; + struct pingfakehdr pfh; + struct rtable *rt = NULL; + struct ip_options_data opt_copy; + int free = 0; + __be32 saddr, daddr, faddr; + u8 tos; + int err; + + pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + /* * Get and verify the address. */ @@ -595,13 +794,14 @@ back_from_confirm: pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.iov = msg->msg_iov; pfh.wcheck = 0; + pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else - err = ping_push_pending_frames(sk, &pfh, &fl4); + err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: @@ -622,11 +822,13 @@ do_confirm: goto out; } -static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -636,11 +838,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + return pingv6_ops.ipv6_recv_error(sk, msg, len); +#endif + } + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -659,15 +872,40 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); - /* Copy the address. */ - if (sin) { + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { + sin = (struct sockaddr_in *) msg->msg_name; sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); + +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + + if (np->sndflow) + sin6->sin6_flowinfo = ip6_flowinfo(ip6); + + if (__ipv6_addr_needs_scope_id( + ipv6_addr_type(&sin6->sin6_addr))) + sin6->sin6_scope_id = IP6CB(skb)->iif; + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); +#endif + } else { + BUG(); } - if (isk->cmsg_flags) - ip_cmsg_recv(msg, skb); + err = copied; done: @@ -676,8 +914,9 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_recvmsg); -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); @@ -688,6 +927,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* @@ -698,10 +938,7 @@ void ping_rcv(struct sk_buff *skb) { struct sock *sk; struct net *net = dev_net(skb->dev); - struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ @@ -711,8 +948,7 @@ void ping_rcv(struct sk_buff *skb) /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); - sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), - skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { pr_debug("rcv on socket %p\n", sk); ping_queue_rcv_skb(sk, skb_get(skb)); @@ -723,6 +959,7 @@ void ping_rcv(struct sk_buff *skb) /* We're called from icmp_rcv(). kfree_skb() is done there. */ } +EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -733,14 +970,14 @@ struct proto ping_prot = { .disconnect = udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .sendmsg = ping_sendmsg, + .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, - .hash = ping_v4_hash, - .unhash = ping_v4_unhash, - .get_port = ping_v4_get_port, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 9af088d2cdaa..470a9c008e9b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ab5c7ad482cd..a5ac969aeefe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -840,6 +841,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_udplite_proto; + err = proto_register(&pingv6_prot, 1); + if (err) + goto out_unregister_ping_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. @@ -930,6 +934,10 @@ static int __init inet6_init(void) if (err) goto ipv6_packet_fail; + err = pingv6_init(); + if (err) + goto pingv6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -942,6 +950,8 @@ out: sysctl_fail: ipv6_packet_cleanup(); #endif +pingv6_fail: + pingv6_exit(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -985,6 +995,8 @@ register_pernet_fail: rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); +out_unregister_ping_proto: + proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4ff0a42b8c7..1d2902e61786 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -57,6 +57,7 @@ #include #include +#include #include #include #include @@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net) static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ + struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, 0, 0); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); + + if (!(type & ICMPV6_INFOMSG_MASK)) + if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) + ping_err(skb, offset, info); } static int icmpv6_rcv(struct sk_buff *skb); @@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } @@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - /* we couldn't care less */ + ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 000000000000..a6462d657c15 --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,216 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev, int strict) +{ + return 0; +} + +int __init pingv6_init(void) +{ + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; + inet6_unregister_protosw(&pingv6_protosw); +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + + return err; +} -- cgit From 42e52bf9e3ae80fd44b21ddfcd64c54e6db2ff76 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 25 May 2013 04:12:10 +0000 Subject: net: add netnotifier event for upper device change Now when upper device is changed, event is not propagated via RT Netlink to userspace. Userspace might never now about the change. Fix this by adding upper-device-change notifier event. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7229bc30e509..50c02ded1d69 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4411,7 +4411,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, else list_add_tail_rcu(&upper->list, &dev->upper_dev_list); dev_hold(upper_dev); - + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); return 0; } @@ -4471,6 +4471,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_del_rcu(&upper->list); dev_put(upper_dev); kfree_rcu(upper, rcu); + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); -- cgit From 1f6afc81088a1f5a472b272408730c73b72c68aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 May 2013 15:03:54 +0000 Subject: tcp: remove one indentation level in tcp_rcv_state_process() Remove one level of indentation 'introduced' in commit c3ae62af8e75 (tcp: should drop incoming frames without ACK flag set) if (true) { ... } @acceptable variable is a boolean. This patch is a pure cleanup. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 269 +++++++++++++++++++++++++-------------------------- 1 file changed, 133 insertions(+), 136 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8230cd6243aa..40614257d2c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5536,6 +5536,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *req; int queued = 0; + bool acceptable; tp->rx_opt.saw_tstamp = 0; @@ -5606,157 +5607,153 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; /* step 5: check the ACK field */ - if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; - - switch (sk->sk_state) { - case TCP_SYN_RECV: - if (acceptable) { - /* Once we leave TCP_SYN_RECV, we no longer - * need req so release it. - */ - if (req) { - tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->num_retrans; - - reqsk_fastopen_remove(sk, req, false); - } else { - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_congestion_control(sk); + acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; - tcp_mtup_init(sk); - tcp_init_buffer_space(sk); - tp->copied_seq = tp->rcv_nxt; - } - smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); + switch (sk->sk_state) { + case TCP_SYN_RECV: + if (acceptable) { + /* Once we leave TCP_SYN_RECV, we no longer + * need req so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->num_retrans; - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sk_sleep == - * NULL and sk->sk_socket == NULL. + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for + * correct metrics. */ - if (sk->sk_socket) - sk_wake_async(sk, - SOCK_WAKE_IO, POLL_OUT); - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << - tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - if (req) { - /* Re-arm the timer because data may - * have been sent out. This is similar - * to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more - * aggressive and retranmitting any data - * sooner based on when they were sent - * out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); - /* Prevent spurious tcp_cwnd_restart() on - * first data packet. + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + + /* Note, that this wakeup is only for marginal + * crossed SYN case. Passively open sockets + * are not waked up, because sk->sk_sleep == + * NULL and sk->sk_socket == NULL. + */ + if (sk->sk_socket) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << + tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + + if (req) { + /* Re-arm the timer because data may + * have been sent out. This is similar + * to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive + * and retransmitting any data sooner based + * on when they are sent out. */ - tp->lsndtime = tcp_time_stamp; + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } - break; + /* Prevent spurious tcp_cwnd_restart() on + * first data packet. + */ + tp->lsndtime = tcp_time_stamp; - case TCP_FIN_WAIT1: - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. + tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); + } else { + return 1; + } + break; + + case TCP_FIN_WAIT1: + /* If we enter the TCP_FIN_WAIT1 state and we are a + * Fast Open socket and this is the first acceptable + * ACK we have received, this would have acknowledged + * our SYNACK so stop the SYNACK timer. + */ + if (req != NULL) { + /* Return RST if ack_seq is invalid. + * Note that RFC793 only says to generate a + * DUPACK for it but for TCP Fast Open it seems + * better to treat this case like TCP_SYN_RECV + * above. */ - if (req != NULL) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) + if (!acceptable) + return 1; + /* We no longer need the request sock. */ + reqsk_fastopen_remove(sk, req, false); + tcp_rearm_rto(sk); + } + if (tp->snd_una == tp->write_seq) { + struct dst_entry *dst; + + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + + if (!sock_flag(sk, SOCK_DEAD)) { + /* Wake up lingering close() */ + sk->sk_state_change(sk); + } else { + int tmo; + + if (tp->linger2 < 0 || + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + tcp_done(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } - if (tp->snd_una == tp->write_seq) { - struct dst_entry *dst; - - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; - - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - - if (!sock_flag(sk, SOCK_DEAD)) - /* Wake up lingering close() */ - sk->sk_state_change(sk); - else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; - } + } - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } + tmo = tcp_fin_time(sk); + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + } else if (th->fin || sock_owned_by_user(sk)) { + /* Bad case. We could lose such FIN otherwise. + * It is not a big problem, but it looks confusing + * and not so rare event. We still can lose it now, + * if it spins in bh_lock_sock(), but it is really + * marginal case. + */ + inet_csk_reset_keepalive_timer(sk, tmo); + } else { + tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); + goto discard; } } - break; + } + break; - case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) { - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; - } - break; + case TCP_CLOSING: + if (tp->snd_una == tp->write_seq) { + tcp_time_wait(sk, TCP_TIME_WAIT, 0); + goto discard; + } + break; - case TCP_LAST_ACK: - if (tp->snd_una == tp->write_seq) { - tcp_update_metrics(sk); - tcp_done(sk); - goto discard; - } - break; + case TCP_LAST_ACK: + if (tp->snd_una == tp->write_seq) { + tcp_update_metrics(sk); + tcp_done(sk); + goto discard; } + break; } /* step 6: check the URG bit */ -- cgit From 61eb900352ff731d990d5415ce9f04e4af6a6136 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 24 May 2013 18:36:13 +0000 Subject: tcp: Remove another indentation level in tcp_rcv_state_process case TCP_SYN_RECV: can have another indentation level removed by converting if (acceptable) { ...; } else { return 1; } to if (!acceptable) return 1; ...; Reflow code and comments to fit 80 columns. Another pure cleanup patch. Signed-off-by: Joe Perches Improved-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 110 ++++++++++++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 40614257d2c5..413b480b9329 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5612,70 +5612,62 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, switch (sk->sk_state) { case TCP_SYN_RECV: - if (acceptable) { - /* Once we leave TCP_SYN_RECV, we no longer - * need req so release it. - */ - if (req) { - tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->num_retrans; + if (!acceptable) + return 1; - reqsk_fastopen_remove(sk, req, false); - } else { - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_congestion_control(sk); + /* Once we leave TCP_SYN_RECV, we no longer need req + * so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->num_retrans; - tcp_mtup_init(sk); - tcp_init_buffer_space(sk); - tp->copied_seq = tp->rcv_nxt; - } - smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); - - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sk_sleep == - * NULL and sk->sk_socket == NULL. - */ - if (sk->sk_socket) - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << - tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - if (req) { - /* Re-arm the timer because data may - * have been sent out. This is similar - * to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more aggressive - * and retransmitting any data sooner based - * on when they are sent out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for correct metrics. */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); + + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); - /* Prevent spurious tcp_cwnd_restart() on - * first data packet. + /* Note, that this wakeup is only for marginal crossed SYN case. + * Passively open sockets are not waked up, because + * sk->sk_sleep == NULL and sk->sk_socket == NULL. + */ + if (sk->sk_socket) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + + if (req) { + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. */ - tp->lsndtime = tcp_time_stamp; + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } + /* Prevent spurious tcp_cwnd_restart() on first data packet */ + tp->lsndtime = tcp_time_stamp; + + tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); break; case TCP_FIN_WAIT1: -- cgit From c48b22daa6062fff9eded311b4d6974c29b40487 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 24 May 2013 18:06:58 +0000 Subject: tcp: Remove 2 indentation levels in tcp_rcv_state_process case TCP_FIN_WAIT1 can also be simplified by reversing tests and adding breaks; Add braces after case and move automatic definitions. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 76 +++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 413b480b9329..9579e1a5a144 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5670,7 +5670,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_fast_path_on(tp); break; - case TCP_FIN_WAIT1: + case TCP_FIN_WAIT1: { + struct dst_entry *dst; + int tmo; + /* If we enter the TCP_FIN_WAIT1 state and we are a * Fast Open socket and this is the first acceptable * ACK we have received, this would have acknowledged @@ -5689,48 +5692,47 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); } - if (tp->snd_una == tp->write_seq) { - struct dst_entry *dst; + if (tp->snd_una != tp->write_seq) + break; - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); - if (!sock_flag(sk, SOCK_DEAD)) { - /* Wake up lingering close() */ - sk->sk_state_change(sk); - } else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; - } + if (!sock_flag(sk, SOCK_DEAD)) { + /* Wake up lingering close() */ + sk->sk_state_change(sk); + break; + } - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } - } + if (tp->linger2 < 0 || + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + tcp_done(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + return 1; + } + + tmo = tcp_fin_time(sk); + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + } else if (th->fin || sock_owned_by_user(sk)) { + /* Bad case. We could lose such FIN otherwise. + * It is not a big problem, but it looks confusing + * and not so rare event. We still can lose it now, + * if it spins in bh_lock_sock(), but it is really + * marginal case. + */ + inet_csk_reset_keepalive_timer(sk, tmo); + } else { + tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); + goto discard; } break; + } case TCP_CLOSING: if (tp->snd_una == tp->write_seq) { -- cgit From 9d5242b19269432ea388d766312ed49f184f83fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 25 May 2013 01:46:10 +0000 Subject: netfilter: nfnetlink_queue: avoid peer_portid test The portid is set to NETLINK_CB(skb).portid at create time. The run-time check will always be false. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 2e0e835baf72..cff4449f01d2 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -509,10 +509,6 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, } spin_lock_bh(&queue->lock); - if (!queue->peer_portid) { - err = -EINVAL; - goto err_out_free_nskb; - } if (queue->queue_total >= queue->queue_maxlen) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; -- cgit From 4e7dba99c9e606e304f104ce4071d8b5ba93957e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 22 May 2013 14:59:10 +0200 Subject: netfilter: Implement RFC 1123 for FTP conntrack The FTP conntrack code currently only accepts the following format for the 227 response for PASV: 227 Entering Passive Mode (148,100,81,40,31,161). It doesn't accept the following format from an obscure server: 227 Data transfer will passively listen to 67,218,99,134,50,144 From RFC 1123: The format of the 227 reply to a PASV command is not well standardized. In particular, an FTP client cannot assume that the parentheses shown on page 40 of RFC-959 will be present (and in fact, Figure 3 on page 43 omits them). Therefore, a User-FTP program that interprets the PASV reply must scan the reply for the first digit of the host and port numbers. This patch adds support for the RFC 1123 clarification by: - Allowing a search filter to specify NUL as the terminator so that try_number will return successfully if the array of numbers has been filled when an unexpected character is encountered. - Using space as the separator for the 227 reply and then scanning for the first digit of the number sequence. The number sequence is parsed out using the existing try_rfc959 but with a NUL terminator. References: https://bugzilla.novell.com/show_bug.cgi?id=466279 References: http://bugzilla.netfilter.org/show_bug.cgi?id=574 Reported-by: Mark Post Signed-off-by: Jeff Mahoney Signed-off-by: Jiri Slaby Cc: Pablo Neira Ayuso Cc: Patrick McHardy Cc: "David S. Miller" Cc: netfilter-devel@vger.kernel.org Cc: netfilter@vger.kernel.org Cc: coreteam@netfilter.org Cc: netdev@vger.kernel.org Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 73 +++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 6b217074237b..b8a0924064ef 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -55,10 +55,14 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); -static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); -static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); +static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_eprt(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, - char); + char, unsigned int *); static struct ftp_search { const char *pattern; @@ -66,7 +70,7 @@ static struct ftp_search { char skip; char term; enum nf_ct_ftp_type ftptype; - int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); + int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *); } search[IP_CT_DIR_MAX][2] = { [IP_CT_DIR_ORIGINAL] = { { @@ -90,10 +94,8 @@ static struct ftp_search { { .pattern = "227 ", .plen = sizeof("227 ") - 1, - .skip = '(', - .term = ')', .ftptype = NF_CT_FTP_PASV, - .getnum = try_rfc959, + .getnum = try_rfc1123, }, { .pattern = "229 ", @@ -132,8 +134,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], i++; else { /* Unexpected character; true if it's the - terminator and we're finished. */ - if (*data == term && i == array_size - 1) + terminator (or we don't care about one) + and we're finished. */ + if ((*data == term || !term) && i == array_size - 1) return len; pr_debug("Char %u (got %u nums) `%u' unexpected\n", @@ -148,7 +151,8 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], /* Returns 0, or length of numbers: 192,168,1,1,5,6 */ static int try_rfc959(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { int length; u_int32_t array[6]; @@ -163,6 +167,33 @@ static int try_rfc959(const char *data, size_t dlen, return length; } +/* + * From RFC 1123: + * The format of the 227 reply to a PASV command is not + * well standardized. In particular, an FTP client cannot + * assume that the parentheses shown on page 40 of RFC-959 + * will be present (and in fact, Figure 3 on page 43 omits + * them). Therefore, a User-FTP program that interprets + * the PASV reply must scan the reply for the first digit + * of the host and port numbers. + */ +static int try_rfc1123(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) +{ + int i; + for (i = 0; i < dlen; i++) + if (isdigit(data[i])) + break; + + if (i == dlen) + return 0; + + *offset += i; + + return try_rfc959(data + i, dlen - i, cmd, 0, offset); +} + /* Grab port: number up to delimiter */ static int get_port(const char *data, int start, size_t dlen, char delim, __be16 *port) @@ -191,7 +222,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim, /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, - char term) + char term, unsigned int *offset) { char delim; int length; @@ -239,7 +270,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, /* Returns 0, or length of numbers: |||6446| */ static int try_epsv_response(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { char delim; @@ -261,9 +293,10 @@ static int find_pattern(const char *data, size_t dlen, unsigned int *numlen, struct nf_conntrack_man *cmd, int (*getnum)(const char *, size_t, - struct nf_conntrack_man *, char)) + struct nf_conntrack_man *, char, + unsigned int *)) { - size_t i; + size_t i = plen; pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); if (dlen == 0) @@ -293,16 +326,18 @@ static int find_pattern(const char *data, size_t dlen, pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip to the 'skip' character */ - for (i = plen; data[i] != skip; i++) - if (i == dlen - 1) return -1; + if (skip) { + for (i = plen; data[i] != skip; i++) + if (i == dlen - 1) return -1; - /* Skip over the last character */ - i++; + /* Skip over the last character */ + i++; + } pr_debug("Skipped up to `%c'!\n", skip); *numoff = i; - *numlen = getnum(data + i, dlen - i, cmd, term); + *numlen = getnum(data + i, dlen - i, cmd, term, numoff); if (!*numlen) return -1; -- cgit From 6abb9cb99f33b20c2f32f18a3ae9cc7543e46edb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2013 09:30:07 +0200 Subject: cfg80211: make WoWLAN configuration available to drivers Make the current WoWLAN configuration available to drivers at runtime. This isn't really useful for the normal WoWLAN behaviour and accessing it can also be racy, but drivers may use it for testing the WoWLAN device behaviour while the host stays up & running to observe the device. Signed-off-by: Johannes Berg --- net/wireless/core.c | 4 +++- net/wireless/core.h | 21 ++++++++++---------- net/wireless/nl80211.c | 53 ++++++++++++++++++++++++++------------------------ net/wireless/sysfs.c | 8 ++++++-- 4 files changed, 48 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index ee422871fe9e..41cec1776f4f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -663,8 +663,10 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); - if (rdev->wowlan && rdev->ops->set_wakeup) +#ifdef CONFIG_PM + if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); +#endif cfg80211_rdev_free_wowlan(rdev); } EXPORT_SYMBOL(wiphy_unregister); diff --git a/net/wireless/core.h b/net/wireless/core.h index b4b4a566626b..a65eaf8a84c1 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -74,8 +74,6 @@ struct cfg80211_registered_device { struct work_struct conn_work; struct work_struct event_work; - struct cfg80211_wowlan *wowlan; - struct delayed_work dfs_update_channels_wk; /* netlink port which started critical protocol (0 means not started) */ @@ -96,17 +94,20 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { +#ifdef CONFIG_PM int i; - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) return; - for (i = 0; i < rdev->wowlan->n_patterns; i++) - kfree(rdev->wowlan->patterns[i].mask); - kfree(rdev->wowlan->patterns); - if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) - sock_release(rdev->wowlan->tcp->sock); - kfree(rdev->wowlan->tcp); - kfree(rdev->wowlan); + for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++) + kfree(rdev->wiphy.wowlan_config->patterns[i].mask); + kfree(rdev->wiphy.wowlan_config->patterns); + if (rdev->wiphy.wowlan_config->tcp && + rdev->wiphy.wowlan_config->tcp->sock) + sock_release(rdev->wiphy.wowlan_config->tcp->sock); + kfree(rdev->wiphy.wowlan_config->tcp); + kfree(rdev->wiphy.wowlan_config); +#endif } extern struct workqueue_struct *cfg80211_wq; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a09f36bb957c..fb6abcb359a1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7489,28 +7489,29 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) static int nl80211_send_wowlan_patterns(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { + struct cfg80211_wowlan *wowlan = rdev->wiphy.wowlan_config; struct nlattr *nl_pats, *nl_pat; int i, pat_len; - if (!rdev->wowlan->n_patterns) + if (!wowlan->n_patterns) return 0; nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; - for (i = 0; i < rdev->wowlan->n_patterns; i++) { + for (i = 0; i < wowlan->n_patterns; i++) { nl_pat = nla_nest_start(msg, i + 1); if (!nl_pat) return -ENOBUFS; - pat_len = rdev->wowlan->patterns[i].pattern_len; + pat_len = wowlan->patterns[i].pattern_len; if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || + wowlan->patterns[i].mask) || nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, rdev->wowlan->patterns[i].pattern) || + pat_len, wowlan->patterns[i].pattern) || nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, - rdev->wowlan->patterns[i].pkt_offset)) + wowlan->patterns[i].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } @@ -7573,12 +7574,12 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - if (rdev->wowlan && rdev->wowlan->tcp) { + if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { /* adjust size to have room for all the data */ - size += rdev->wowlan->tcp->tokens_size + - rdev->wowlan->tcp->payload_len + - rdev->wowlan->tcp->wake_len + - rdev->wowlan->tcp->wake_len / 8; + size += rdev->wiphy.wowlan_config->tcp->tokens_size + + rdev->wiphy.wowlan_config->tcp->payload_len + + rdev->wiphy.wowlan_config->tcp->wake_len + + rdev->wiphy.wowlan_config->tcp->wake_len / 8; } msg = nlmsg_new(size, GFP_KERNEL); @@ -7590,33 +7591,34 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto nla_put_failure; - if (rdev->wowlan) { + if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; - if ((rdev->wowlan->any && + if ((rdev->wiphy.wowlan_config->any && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - (rdev->wowlan->disconnect && + (rdev->wiphy.wowlan_config->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - (rdev->wowlan->magic_pkt && + (rdev->wiphy.wowlan_config->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - (rdev->wowlan->gtk_rekey_failure && + (rdev->wiphy.wowlan_config->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - (rdev->wowlan->eap_identity_req && + (rdev->wiphy.wowlan_config->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - (rdev->wowlan->four_way_handshake && + (rdev->wiphy.wowlan_config->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - (rdev->wowlan->rfkill_release && + (rdev->wiphy.wowlan_config->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; - if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + if (nl80211_send_wowlan_tcp(msg, + rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; nla_nest_end(msg, nl_wowlan); @@ -7783,7 +7785,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) struct cfg80211_wowlan *ntrig; struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan; int err, i; - bool prev_enabled = rdev->wowlan; + bool prev_enabled = rdev->wiphy.wowlan_config; if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && !rdev->wiphy.wowlan.tcp) @@ -7791,7 +7793,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = NULL; + rdev->wiphy.wowlan_config = NULL; goto set_wakeup; } @@ -7927,11 +7929,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; } cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = ntrig; + rdev->wiphy.wowlan_config = ntrig; set_wakeup: - if (rdev->ops->set_wakeup && prev_enabled != !!rdev->wowlan) - rdev_set_wakeup(rdev, rdev->wowlan); + if (rdev->ops->set_wakeup && + prev_enabled != !!rdev->wiphy.wowlan_config) + rdev_set_wakeup(rdev, rdev->wiphy.wowlan_config); return 0; error: diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 8f28b9f798d8..360a42c6f694 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -91,6 +91,7 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) cfg80211_leave(rdev, wdev); } +#ifdef CONFIG_PM static int wiphy_suspend(struct device *dev, pm_message_t state) { struct cfg80211_registered_device *rdev = dev_to_rdev(dev); @@ -100,10 +101,10 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) rtnl_lock(); if (rdev->wiphy.registered) { - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) cfg80211_leave_all(rdev); if (rdev->ops->suspend) - ret = rdev_suspend(rdev, rdev->wowlan); + ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); @@ -132,6 +133,7 @@ static int wiphy_resume(struct device *dev) return ret; } +#endif static const void *wiphy_namespace(struct device *d) { @@ -146,8 +148,10 @@ struct class ieee80211_class = { .dev_release = wiphy_dev_release, .dev_attrs = ieee80211_dev_attrs, .dev_uevent = wiphy_uevent, +#ifdef CONFIG_PM .suspend = wiphy_suspend, .resume = wiphy_resume, +#endif .ns_type = &net_ns_type_operations, .namespace = wiphy_namespace, }; -- cgit From 0d89d2035fe063461a5ddb609b2c12e7fb006e44 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 23 May 2013 21:02:52 +0000 Subject: MPLS: Add limited GSO support In the case where a non-MPLS packet is received and an MPLS stack is added it may well be the case that the original skb is GSO but the NIC used for transmit does not support GSO of MPLS packets. The aim of this code is to provide GSO in software for MPLS packets whose skbs are GSO. SKB Usage: When an implementation adds an MPLS stack to a non-MPLS packet it should do the following to skb metadata: * Set skb->inner_protocol to the old non-MPLS ethertype of the packet. skb->inner_protocol is added by this patch. * Set skb->protocol to the new MPLS ethertype of the packet. * Set skb->network_header to correspond to the end of the L3 header, including the MPLS label stack. I have posted a patch, "[PATCH v3.29] datapath: Add basic MPLS support to kernel" which adds MPLS support to the kernel datapath of Open vSwtich. That patch sets the above requirements in datapath/actions.c:push_mpls() and was used to exercise this code. The datapath patch is against the Open vSwtich tree but it is intended that it be added to the Open vSwtich code present in the mainline Linux kernel at some point. Features: I believe that the approach that I have taken is at least partially consistent with the handling of other protocols. Jesse, I understand that you have some ideas here. I am more than happy to change my implementation. This patch adds dev->mpls_features which may be used by devices to advertise features supported for MPLS packets. A new NETIF_F_MPLS_GSO feature is added for devices which support hardware MPLS GSO offload. Currently no devices support this and MPLS GSO always falls back to software. Alternate Implementation: One possible alternate implementation is to teach netif_skb_features() and skb_network_protocol() about MPLS, in a similar way to their understanding of VLANs. I believe this would avoid the need for net/mpls/mpls_gso.c and in particular the calls to __skb_push() and __skb_push() in mpls_gso_segment(). I have decided on the implementation in this patch as it should not introduce any overhead in the case where mpls_gso is not compiled into the kernel or inserted as a module. MPLS GSO suggested by Jesse Gross. Based in part on "v4 GRE: Add TCP segmentation offload for GRE" by Pravin B Shelar. Cc: Jesse Gross Cc: Pravin B Shelar Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/Kconfig | 1 + net/Makefile | 1 + net/core/dev.c | 4 ++ net/core/ethtool.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/tcp.c | 1 + net/ipv4/udp.c | 2 +- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 3 +- net/mpls/Kconfig | 9 +++++ net/mpls/Makefile | 4 ++ net/mpls/mpls_gso.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 net/mpls/Kconfig create mode 100644 net/mpls/Makefile create mode 100644 net/mpls/mpls_gso.c (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 08de901415ee..523e43e6da1b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -218,6 +218,7 @@ source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" +source "net/mpls/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index 091e7b04f301..9492e8cb64e9 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ +obj-$(CONFIG_NET_MPLS_GSO) += mpls/ diff --git a/net/core/dev.c b/net/core/dev.c index 50c02ded1d69..2f09cb29cc95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5277,6 +5277,10 @@ int register_netdevice(struct net_device *dev) */ dev->hw_enc_features |= NETIF_F_SG; + /* Make NETIF_F_SG inheritable to MPLS. + */ + dev->mpls_features |= NETIF_F_SG; + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 22efdaa76ebf..4e6f63ade741 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d01be2a3ae53..b05ae96aec44 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1295,6 +1295,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | 0))) goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d87ce72ca8aa..ba4186e1dca9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2917,6 +2917,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..aa5eff46d137 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2381,7 +2381,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 71b766ee821d..a263b990ee11 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 3bb3a891a424..76d401a93c7a 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,7 +63,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig new file mode 100644 index 000000000000..37421db88965 --- /dev/null +++ b/net/mpls/Kconfig @@ -0,0 +1,9 @@ +# +# MPLS configuration +# +config NET_MPLS_GSO + tristate "MPLS: GSO support" + help + This is helper module to allow segmentation of non-MPLS GSO packets + that have had MPLS stack entries pushed onto them and thus + become MPLS GSO packets. diff --git a/net/mpls/Makefile b/net/mpls/Makefile new file mode 100644 index 000000000000..0a3c171be537 --- /dev/null +++ b/net/mpls/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for MPLS. +# +obj-y += mpls_gso.o diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c new file mode 100644 index 000000000000..1bec1219ab81 --- /dev/null +++ b/net/mpls/mpls_gso.c @@ -0,0 +1,108 @@ +/* + * MPLS GSO Support + * + * Authors: Simon Horman (horms@verge.net.au) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on: GSO portions of net/ipv4/gre.c + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t mpls_features; + __be16 mpls_protocol; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE | + SKB_GSO_MPLS))) + goto out; + + /* Setup inner SKB. */ + mpls_protocol = skb->protocol; + skb->protocol = skb->inner_protocol; + + /* Push back the mac header that skb_mac_gso_segment() has pulled. + * It will be re-pulled by the call to skb_mac_gso_segment() below + */ + __skb_push(skb, skb->mac_len); + + /* Segment inner packet. */ + mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, mpls_features); + + + /* Restore outer protocol. */ + skb->protocol = mpls_protocol; + + /* Re-pull the mac header that the call to skb_mac_gso_segment() + * above pulled. It will be re-pushed after returning + * skb_mac_gso_segment(), an indirect caller of this function. + */ + __skb_push(skb, skb->data - skb_mac_header(skb)); + +out: + return segs; +} + +static int mpls_gso_send_check(struct sk_buff *skb) +{ + return 0; +} + +static struct packet_offload mpls_mc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_MC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static struct packet_offload mpls_uc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static int __init mpls_gso_init(void) +{ + pr_info("MPLS GSO support\n"); + + dev_add_offload(&mpls_uc_offload); + dev_add_offload(&mpls_mc_offload); + + return 0; +} + +static void __exit mpls_gso_exit(void) +{ + dev_remove_offload(&mpls_uc_offload); + dev_remove_offload(&mpls_mc_offload); +} + +module_init(mpls_gso_init); +module_exit(mpls_gso_exit); + +MODULE_DESCRIPTION("MPLS GSO support"); +MODULE_AUTHOR("Simon Horman (horms@verge.net.au)"); +MODULE_LICENSE("GPL"); -- cgit From da6e378ba918cd0feeb90eeb84d8b42148bb0c82 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Mon, 27 May 2013 19:53:31 +0000 Subject: netpoll: remove return value from netpoll_rx_disable() The netpoll_rx_disable() will always return 0, it is no use and looks wordy, so remove the unnecessary code and get rid of it in _dev_open and _dev_close. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- net/core/dev.c | 15 ++++----------- net/core/netpoll.c | 3 +-- 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2f09cb29cc95..5f747974ac58 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1198,9 +1198,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1309,9 +1307,7 @@ static int __dev_close(struct net_device *dev) LIST_HEAD(single); /* Temporarily disable netpoll until the interface is down */ - retval = netpoll_rx_disable(dev); - if (retval) - return retval; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); @@ -1353,14 +1349,11 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); /* Block netpoll rx while the interface is going down */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); dev_close_many(&single); @@ -1368,7 +1361,7 @@ int dev_close(struct net_device *dev) netpoll_rx_enable(dev); } - return ret; + return 0; } EXPORT_SYMBOL(dev_close); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cec074be8c43..37deedd48bcc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -247,7 +247,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -int netpoll_rx_disable(struct net_device *dev) +void netpoll_rx_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -257,7 +257,6 @@ int netpoll_rx_disable(struct net_device *dev) if (ni) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); - return 0; } EXPORT_SYMBOL(netpoll_rx_disable); -- cgit From 351638e7deeed2ec8ce451b53d33921b3da68f83 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 28 May 2013 01:30:21 +0000 Subject: net: pass info struct via netdevice notifier So far, only net_device * could be passed along with netdevice notifier event. This patch provides a possibility to pass custom structure able to provide info that event listener needs to know. Signed-off-by: Jiri Pirko v2->v3: fix typo on simeth shortened dev_getter shortened notifier_info struct name v1->v2: fix notifier_call parameter in call_netdevice_notifier() Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- net/appletalk/aarp.c | 2 +- net/appletalk/ddp.c | 2 +- net/atm/clip.c | 4 +-- net/atm/mpc.c | 6 ++-- net/ax25/af_ax25.c | 6 ++-- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_notify.c | 2 +- net/caif/caif_dev.c | 4 +-- net/caif/caif_usb.c | 4 +-- net/can/af_can.c | 4 +-- net/can/bcm.c | 4 +-- net/can/gw.c | 4 +-- net/can/raw.c | 4 +-- net/core/dev.c | 56 +++++++++++++++++++++++++++++------- net/core/drop_monitor.c | 4 +-- net/core/dst.c | 2 +- net/core/fib_rules.c | 4 +-- net/core/netprio_cgroup.c | 2 +- net/core/pktgen.c | 2 +- net/core/rtnetlink.c | 2 +- net/decnet/af_decnet.c | 4 +-- net/ieee802154/6lowpan.c | 5 ++-- net/ipv4/arp.c | 2 +- net/ipv4/devinet.c | 2 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv6/addrconf.c | 4 +-- net/ipv6/ip6mr.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/ip6t_MASQUERADE.c | 2 +- net/ipv6/route.c | 4 +-- net/ipx/af_ipx.c | 2 +- net/iucv/af_iucv.c | 2 +- net/mac80211/iface.c | 5 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 4 +-- net/netfilter/nfnetlink_queue_core.c | 2 +- net/netfilter/xt_TEE.c | 2 +- net/netlabel/netlabel_unlabeled.c | 7 ++--- net/netrom/af_netrom.c | 2 +- net/openvswitch/dp_notify.c | 2 +- net/packet/af_packet.c | 5 ++-- net/phonet/pn_dev.c | 4 +-- net/rose/af_rose.c | 6 ++-- net/sched/act_mirred.c | 2 +- net/tipc/eth_media.c | 4 +-- net/tipc/ib_media.c | 4 +-- net/wireless/core.c | 5 ++-- net/x25/af_x25.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 51 files changed, 124 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9424f3718ea7..2fb2d88e8c2e 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -341,7 +341,7 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; int i, flgs; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 173a2e82f486..690356fa52b9 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -332,7 +332,7 @@ static void aarp_expire_timeout(unsigned long unused) static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ct; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ef12839a7cfe..7fee50d637f9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -644,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev) static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/clip.c b/net/atm/clip.c index 8ae3a7879335..cce241eb01d9 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -539,9 +539,9 @@ static int clip_create(int number) } static int clip_device_event(struct notifier_block *this, unsigned long event, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index d4cc1be5c364..3af12755cd04 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -998,14 +998,12 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc) } static int mpoa_event_listener(struct notifier_block *mpoa_notifier, - unsigned long event, void *dev_ptr) + unsigned long event, void *ptr) { - struct net_device *dev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpoa_client *mpc; struct lec_priv *priv; - dev = dev_ptr; - if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e277e38f736b..4b4d2b779ec1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -111,9 +111,9 @@ again: * Handle device status changes. */ static int ax25_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -1974,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = { }; static struct notifier_block ax25_dev_notifier = { - .notifier_call =ax25_device_event, + .notifier_call = ax25_device_event, }; static int __init ax25_init(void) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 522243aff2f3..b6504eac0ed8 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -595,7 +595,7 @@ void batadv_hardif_remove_interfaces(void) static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = ptr; + struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 1644b3e1f947..3a3f371b2841 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -31,7 +31,7 @@ struct notifier_block br_device_notifier = { */ static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; bool changed_addr; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1f9ece1a9c34..4dca159435cf 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -352,9 +352,9 @@ EXPORT_SYMBOL(caif_enroll_dev); /* notify Caif of device events */ static int caif_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_device_entry *caifd = NULL; struct caif_dev_common *caifdev; struct cfcnfg *cfg; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 942e00a425fd..75ed04b78fa4 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -121,9 +121,9 @@ static struct packet_type caif_usb_type __read_mostly = { }; static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_dev_common common; struct cflayer *layer, *link_support; struct usbnet *usbnet; diff --git a/net/can/af_can.c b/net/can/af_can.c index c4e50852c9f4..3ab8dd2e1282 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -794,9 +794,9 @@ EXPORT_SYMBOL(can_proto_unregister); * af_can notifier to create/remove CAN netdevice specific structs */ static int can_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8f113e6ff327..46f20bfafc0e 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1350,9 +1350,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, * notification handler for netdevice status changes */ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); struct sock *sk = &bo->sk; struct bcm_op *op; diff --git a/net/can/gw.c b/net/can/gw.c index 3ee690e8c7d3..2f291f961a17 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -445,9 +445,9 @@ static inline void cgw_unregister_filter(struct cgw_job *gwj) } static int cgw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/can/raw.c b/net/can/raw.c index 1085e65f848e..641e1c895123 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -239,9 +239,9 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) } static int raw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; diff --git a/net/core/dev.c b/net/core/dev.c index 5f747974ac58..54fce6006a83 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1391,6 +1391,20 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +static void netdev_notifier_info_init(struct netdev_notifier_info *info, + struct net_device *dev) +{ + info->dev = dev; +} + +static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, + struct net_device *dev) +{ + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return nb->notifier_call(nb, val, &info); +} static int dev_boot_phase = 1; @@ -1423,7 +1437,7 @@ int register_netdevice_notifier(struct notifier_block *nb) goto unlock; for_each_net(net) { for_each_netdev(net, dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); err = notifier_to_errno(err); if (err) goto rollback; @@ -1431,7 +1445,7 @@ int register_netdevice_notifier(struct notifier_block *nb) if (!(dev->flags & IFF_UP)) continue; - nb->notifier_call(nb, NETDEV_UP, dev); + call_netdevice_notifier(nb, NETDEV_UP, dev); } } @@ -1447,10 +1461,11 @@ rollback: goto outroll; if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } @@ -1488,10 +1503,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb) for_each_net(net) { for_each_netdev(net, dev) { if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } unlock: @@ -1500,6 +1516,25 @@ unlock: } EXPORT_SYMBOL(unregister_netdevice_notifier); +/** + * call_netdevice_notifiers_info - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @info: notifier information data + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ + +int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, + struct netdev_notifier_info *info) +{ + ASSERT_RTNL(); + netdev_notifier_info_init(info, dev); + return raw_notifier_call_chain(&netdev_chain, val, info); +} +EXPORT_SYMBOL(call_netdevice_notifiers_info); + /** * call_netdevice_notifiers - call all network notifier blocks * @val: value passed unmodified to notifier function @@ -1511,8 +1546,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - ASSERT_RTNL(); - return raw_notifier_call_chain(&netdev_chain, val, dev); + struct netdev_notifier_info info; + + return call_netdevice_notifiers_info(val, dev, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d23b6682f4e9..5e78d44333b9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb, } static int dropmon_net_event(struct notifier_block *ev_block, - unsigned long event, void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *tmp; diff --git a/net/core/dst.c b/net/core/dst.c index df9cc810ec8e..ca4231ec7347 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dst_entry *dst, *last = NULL; switch (event) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d5a9f8ead0d8..21735440c44a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) static int fib_rules_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct fib_rules_ops *ops; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0777d0aa18c3..e533259dce3c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -261,7 +261,7 @@ struct cgroup_subsys net_prio_subsys = { static int netprio_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netprio_map *old; /* diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c3810..795498fd4587 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1921,7 +1921,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); if (pn->pktgen_exiting) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a08bd2b7fe3f..49c14451d8ab 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2667,7 +2667,7 @@ static void rtnetlink_rcv(struct sk_buff *skb) static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_UP: diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index c21f200eed93..dd4d506ef923 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2078,9 +2078,9 @@ out_err: } static int dn_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 55e1fd5b3e56..3b9d5f20bd1c 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1352,10 +1352,9 @@ static inline void lowpan_netlink_fini(void) } static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); LIST_HEAD(del_list); struct lowpan_dev_record *entry, *tmp; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 247ec1951c35..bf574029a183 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1234,7 +1234,7 @@ out: static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_CHANGEADDR: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b7..b047e2d8a614 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1333,7 +1333,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9d..05a4888dede9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae7855..f975399f3522 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1609,7 +1609,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr_table *mrt; struct vif_device *v; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 5d5d4d1be9c2..dd5508bde799 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 432e084b6b62..bce073b4bbd4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2826,9 +2826,9 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) } static int addrconf_notify(struct notifier_block *this, unsigned long event, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *) data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..583e8d435f9a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1319,7 +1319,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr6_table *mrt; struct mif_device *v; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2712ab22a174..a0962697a257 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1568,7 +1568,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct inet6_dev *idev; diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 60e9053bab05..b76257cd7e1e 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -71,7 +71,7 @@ static int device_cmp(struct nf_conn *ct, void *ifindex) static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86a..194c3cde1536 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2681,9 +2681,9 @@ errout: } static int ip6_route_dev_notify(struct notifier_block *this, - unsigned long event, void *data) + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f547a47d381c..7a1e0fc1bd4d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -330,7 +330,7 @@ static __inline__ void __ipxitf_put(struct ipx_interface *intrfc) static int ipxitf_device_event(struct notifier_block *notifier, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ipx_interface *i, *tmp; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ae691651b721..168aff5e60de 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2293,7 +2293,7 @@ out_unlock: static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *event_dev = (struct net_device *)ptr; + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); struct sock *sk; struct iucv_sock *iucv; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 60f1ce5e5e52..d2c3fd178dbe 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1717,10 +1717,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } static int netdev_notify(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5b142fb16480..7c3ed429789e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1487,9 +1487,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) * Currently only NETDEV_DOWN is handled to release refs to cached dsts */ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 4e27fa035814..0f2ac8f2e7b7 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -800,7 +800,7 @@ static int nfqnl_rcv_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index bd93e51d30ac..292934d23482 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct xt_tee_priv *priv; priv = container_of(this, struct xt_tee_priv, notifier); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 8a6c6ea466d8..af3531926ee0 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -708,7 +708,7 @@ unlhsh_remove_return: * netlbl_unlhsh_netdev_handler - Network device notification handler * @this: notifier block * @event: the event - * @ptr: the network device (cast to void) + * @ptr: the netdevice notifier info (cast to void) * * Description: * Handle network device events, although at present all we care about is a @@ -717,10 +717,9 @@ unlhsh_remove_return: * */ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netlbl_unlhsh_iface *iface = NULL; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec0c80fde69f..698814bfa7ad 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -117,7 +117,7 @@ static void nr_kill_by_device(struct net_device *dev) */ static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index ef4feec6cd84..c3235675f359 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -78,7 +78,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct ovs_net *ovs_net; - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vport *vport = NULL; if (!ovs_is_internal_dev(dev)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8ec1bca7f859..79fe63246b27 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3331,10 +3331,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } -static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int packet_notifier(struct notifier_block *this, + unsigned long msg, void *ptr) { struct sock *sk; - struct net_device *dev = data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 45a7df6575de..56a6146ac94b 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -292,9 +292,9 @@ static void phonet_route_autodel(struct net_device *dev) /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (what) { case NETDEV_REGISTER: diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9c8347451597..e98fcfbe6007 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -202,10 +202,10 @@ static void rose_kill_by_device(struct net_device *dev) /* * Handle device status changes. */ -static int rose_device_event(struct notifier_block *this, unsigned long event, - void *ptr) +static int rose_device_event(struct notifier_block *this, + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5d676edc22a6..977c10e0631b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -243,7 +243,7 @@ nla_put_failure: static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; if (event == NETDEV_UNREGISTER) diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 120a676a3360..fc60bea63169 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -251,9 +251,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 2a2864c25e15..baa9df4327d9 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -244,9 +244,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; diff --git a/net/wireless/core.c b/net/wireless/core.c index 73405e00c800..01e41191f1bf 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -886,10 +886,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; int ret; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aabe..1d964e23853f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -224,7 +224,7 @@ static void x25_kill_by_device(struct net_device *dev) static int x25_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct x25_neigh *nb; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 23cea0f74336..536ccc95de89 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2784,7 +2784,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net) static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_DOWN: -- cgit From be9efd3653284f2827fd82861e8e9db9a8f726e1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 28 May 2013 01:30:22 +0000 Subject: net: pass changed flags along with NETDEV_CHANGE event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use new netdevice notifier infrastructure to pass along changed flags. Signed-off-by: Timo Teräs Signed-off-by: Jiri Pirko v2->v3: shortened notifier_info struct name Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 54fce6006a83..6eb621cc3b81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4771,8 +4771,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) } if (dev->flags & IFF_UP && - (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); + (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = changes; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); + } } /** -- cgit From 6c8b4e3ff81b82fc153625e81e60af1d89de2c32 Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Tue, 28 May 2013 01:30:23 +0000 Subject: arp: flush arp cache on IFF_NOARP change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IFF_NOARP affects what kind of neighbor entries are created (nud NOARP or nud INCOMPLETE). If the flag changes, flush the arp cache to refresh all entries. Signed-off-by: Timo Teräs Signed-off-by: Jiri Pirko v2->v3: shortened notifier_info struct name Signed-off-by: David S. Miller --- net/ipv4/arp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index bf574029a183..4429b013f269 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1235,12 +1235,18 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); rt_cache_flush(dev_net(dev)); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&arp_tbl, dev); + break; default: break; } -- cgit From 06ecf24bdf2b7afc6c8fd13de6dba2a96dd331b6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 28 May 2013 13:15:50 -0700 Subject: net: Fix build warnings after mac_header and transport_header became __u16. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/core/skbuff.c: In function ‘__alloc_skb_head’: net/core/skbuff.c:203:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c: In function ‘__alloc_skb’: net/core/skbuff.c:279:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c:280:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c: In function ‘build_skb’: net/core/skbuff.c:348:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c:349:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d6298914f4e7..f45de077ab9e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -200,7 +200,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) atomic_set(&skb->users, 1); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; + skb->mac_header = (__u16) ~0U; #endif out: return skb; @@ -276,8 +276,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb_reset_tail_pointer(skb); skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; + skb->mac_header = (__u16) ~0U; + skb->transport_header = (__u16) ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -345,8 +345,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb_reset_tail_pointer(skb); skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; + skb->mac_header = (__u16) ~0U; + skb->transport_header = (__u16) ~0U; #endif /* make sure we initialize shinfo sequentially */ -- cgit From de68d1003d9eb0a5f7d4714315614e4bc956f68e Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 9 Mar 2013 23:14:22 +0100 Subject: batman-adv: split batadv_is_wifi_iface() into two functions Previously batadv_is_wifi_iface() did two things at once: looking up a net_device from an interface index, and determining if it is a wifi device. The second part is useful itself when the caller already has a net_device reference. Signed-off-by: Matthias Schiffer Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 84 +++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b6504eac0ed8..d5ec67b63253 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -117,6 +117,58 @@ static int batadv_is_valid_iface(const struct net_device *net_dev) return 1; } +/** + * batadv_is_wifi_netdev - check if the given net_device struct is a wifi + * interface + * @net_device: the device to check + * + * Returns true if the net device is a 802.11 wireless device, false otherwise. + */ +static bool batadv_is_wifi_netdev(struct net_device *net_device) +{ +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL + */ + if (net_device->wireless_handlers) + return true; +#endif + + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + return true; + + return false; +} + +/** + * batadv_is_wifi_iface - check if the given interface represented by ifindex + * is a wifi interface + * @ifindex: interface index to check + * + * Returns true if the interface represented by ifindex is a 802.11 wireless + * device, false otherwise. + */ +bool batadv_is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == BATADV_NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + + ret = batadv_is_wifi_netdev(net_device); + +out: + if (net_device) + dev_put(net_device); + return ret; +} + static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { @@ -657,38 +709,6 @@ out: return NOTIFY_DONE; } -/* This function returns true if the interface represented by ifindex is a - * 802.11 wireless device - */ -bool batadv_is_wifi_iface(int ifindex) -{ - struct net_device *net_device = NULL; - bool ret = false; - - if (ifindex == BATADV_NULL_IFINDEX) - goto out; - - net_device = dev_get_by_index(&init_net, ifindex); - if (!net_device) - goto out; - -#ifdef CONFIG_WIRELESS_EXT - /* pre-cfg80211 drivers have to implement WEXT, so it is possible to - * check for wireless_handlers != NULL - */ - if (net_device->wireless_handlers) - ret = true; - else -#endif - /* cfg80211 drivers have to set ieee80211_ptr */ - if (net_device->ieee80211_ptr) - ret = true; -out: - if (net_device) - dev_put(net_device); - return ret; -} - struct notifier_block batadv_hard_if_notifier = { .notifier_call = batadv_hard_if_event, }; -- cgit From caf65bfcc5dbabd7222fa45fdcd42ce0783d7a42 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 9 Mar 2013 23:14:23 +0100 Subject: batman-adv: send each broadcast only once on non-wireless interfaces While it makes sense to send each broadcast thrice on 802.11 (WLAN) interfaces as broadcasts are often unreliable on these, there is no reason to do so on other interface types. The increased the overhead can be harmful on low-bandwidth links like VPN connections over slow internet lines, therefore it is better to reduce the number of broadcast packets sent on non-wireless links to one. Signed-off-by: Matthias Schiffer Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 4 ++++ net/batman-adv/main.h | 5 +++++ net/batman-adv/send.c | 5 ++++- net/batman-adv/types.h | 2 ++ 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index d5ec67b63253..4a76ed654c92 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -593,6 +593,10 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); + hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; + if (batadv_is_wifi_netdev(net_dev)) + hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + /* extra reference for return */ atomic_set(&hard_iface->refcount, 2); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 59a0d6af15c8..ea1a3bafe9c3 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -76,6 +76,11 @@ #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ +/* number of packets to send for broadcasts on different interface types */ +#define BATADV_NUM_BCASTS_DEFAULT 1 +#define BATADV_NUM_BCASTS_WIRELESS 3 +#define BATADV_NUM_BCASTS_MAX 3 + /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ #define ARP_REQ_DELAY 250 /* numbers of originator to contact for any PUT/GET DHT operation */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 263cfd1ccee7..eb16b04d4bee 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -260,6 +260,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; + if (forw_packet->num_packets >= hard_iface->num_bcasts) + continue; + /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) @@ -271,7 +274,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) forw_packet->num_packets++; /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < 3) { + if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, msecs_to_jiffies(5)); return; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index aba8364c3689..5f542bdd9a4d 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -61,6 +61,7 @@ struct batadv_hard_iface_bat_iv { * @if_status: status of the interface for batman-adv * @net_dev: pointer to the net_device * @frag_seqno: last fragment sequence number sent by this interface + * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used * @batman_adv_ptype: packet type describing packets that should be processed by @@ -76,6 +77,7 @@ struct batadv_hard_iface { char if_status; struct net_device *net_dev; atomic_t frag_seqno; + uint8_t num_bcasts; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; -- cgit From 93178018eb35aaa6ebcb5a136dce7eb3add011ab Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sun, 10 Mar 2013 19:29:15 +0800 Subject: batman-adv: fix typos in kernel doc & comments Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/distributed-arp-table.c | 82 ++++++++++++++++------------------ 1 file changed, 39 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 239992021b1d..06345d401588 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -45,9 +45,9 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly + * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly * free it - * @dat_entry: the oentry to free + * @dat_entry: the entry to free */ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) { @@ -56,10 +56,10 @@ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) } /** - * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not + * batadv_dat_to_purge - check whether a dat_entry has to be purged or not * @dat_entry: the entry to check * - * Returns true if the entry has to be purged now, false otherwise + * Returns true if the entry has to be purged now, false otherwise. */ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) { @@ -75,8 +75,8 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) * returns a boolean value: true is the entry has to be deleted, * false otherwise * - * Loops over each entry in the DAT local storage and delete it if and only if - * the to_purge function passed as argument returns true + * Loops over each entry in the DAT local storage and deletes it if and only if + * the to_purge function passed as argument returns true. */ static void __batadv_dat_purge(struct batadv_priv *bat_priv, bool (*to_purge)(struct batadv_dat_entry *)) @@ -97,7 +97,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, spin_lock_bh(list_lock); hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { - /* if an helper function has been passed as parameter, + /* if a helper function has been passed as parameter, * ask it if the entry has to be purged or not */ if (to_purge && !to_purge(dat_entry)) @@ -134,7 +134,7 @@ static void batadv_dat_purge(struct work_struct *work) * @node: node in the local table * @data2: second object to compare the node to * - * Returns 1 if the two entry are the same, 0 otherwise + * Returns 1 if the two entries are the same, 0 otherwise. */ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) { @@ -149,7 +149,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_src field in the ARP packet + * Returns the value of the hw_src field in the ARP packet. */ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) { @@ -166,7 +166,7 @@ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_src field in the ARP packet + * Returns the value of the ip_src field in the ARP packet. */ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) { @@ -178,7 +178,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_dst field in the ARP packet + * Returns the value of the hw_dst field in the ARP packet. */ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) { @@ -190,7 +190,7 @@ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_dst field in the ARP packet + * Returns the value of the ip_dst field in the ARP packet. */ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) { @@ -202,7 +202,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) * @data: data to hash * @size: size of the hash table * - * Returns the selected index in the hash table for the given data + * Returns the selected index in the hash table for the given data. */ static uint32_t batadv_hash_dat(const void *data, uint32_t size) { @@ -224,12 +224,12 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size) } /** - * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash + * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash * table * @bat_priv: the bat priv with all the soft interface information * @ip: search key * - * Returns the dat_entry if found, NULL otherwise + * Returns the dat_entry if found, NULL otherwise. */ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) @@ -343,9 +343,6 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, if (hdr_size == 0) return; - /* if the ARP packet is encapsulated in a batman packet, let's print - * some debug messages - */ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; switch (unicast_4addr_packet->u.header.packet_type) { @@ -409,7 +406,8 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, * @candidate: orig_node under evaluation * @max_orig_node: last selected candidate * - * Returns true if the node has been elected as next candidate or false othrwise + * Returns true if the node has been elected as next candidate or false + * otherwise. */ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, int select, batadv_dat_addr_t tmp_max, @@ -472,7 +470,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, */ cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; - /* iterate over the originator list and find the node with closest + /* iterate over the originator list and find the node with the closest * dat_address which has not been selected yet */ for (i = 0; i < hash->size; i++) { @@ -480,7 +478,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - /* the dht space is a ring and addresses are unsigned */ + /* the dht space is a ring using unsigned addresses */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -512,7 +510,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, } /** - * batadv_dat_select_candidates - selects the nodes which the DHT message has to + * batadv_dat_select_candidates - select the nodes which the DHT message has to * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT @@ -521,7 +519,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * closest values (from the LEFT, with wrap around if needed) then the hash * value of the key. ip_dst is the key. * - * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM + * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) @@ -558,10 +556,11 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @ip: the DHT key * @packet_subtype: unicast4addr packet subtype to use * - * In this function the skb is copied by means of pskb_copy() and is sent as - * unicast packet to each of the selected candidates + * This function copies the skb with pskb_copy() and is sent as unicast packet + * to each of the selected candidates. * - * Returns true if the packet is sent to at least one candidate, false otherwise + * Returns true if the packet is sent to at least one candidate, false + * otherwise. */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, @@ -727,7 +726,7 @@ out: * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * - * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise + * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. */ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -754,9 +753,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); - /* Check whether the ARP packet carries a valid - * IP information - */ + /* check whether the ARP packet carries a valid IP information */ if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) goto out; @@ -784,7 +781,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) goto out; - /* we don't care about the destination MAC address in ARP requests */ + /* don't care about the destination MAC address in ARP requests */ if (arphdr->ar_op != htons(ARPOP_REQUEST)) { hw_dst = batadv_arp_hw_dst(skb, hdr_size); if (is_zero_ether_addr(hw_dst) || @@ -804,8 +801,8 @@ out: * @skb: packet to check * * Returns true if the message has been sent to the dht candidates, false - * otherwise. In case of true the message has to be enqueued to permit the - * fallback + * otherwise. In case of a positive return value the message has to be enqueued + * to permit the fallback. */ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -867,7 +864,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); ret = true; } else { - /* Send the request on the DHT */ + /* Send the request to the DHT */ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_GET); } @@ -884,7 +881,7 @@ out: * @skb: packet to check * @hdr_size: size of the encapsulation header * - * Returns true if the request has been answered, false otherwise + * Returns true if the request has been answered, false otherwise. */ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -924,10 +921,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; - /* to preserve backwards compatibility, here the node has to answer - * using the same packet type it received for the request. This is due - * to that if a node is not using the 4addr packet format it may not - * support it. + /* To preserve backwards compatibility, the node has choose the outgoing + * format based on the incoming request packet type. The assumption is + * that a node not using the 4addr packet format doesn't support it. */ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, @@ -977,7 +973,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); /* Send the ARP reply to the candidates for both the IP addresses that - * the node got within the ARP reply + * the node obtained from the ARP reply */ batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); @@ -987,7 +983,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, * DAT storage only * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check - * @hdr_size: siaze of the encapsulation header + * @hdr_size: size of the encapsulation header */ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -1031,11 +1027,11 @@ out: /** * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped - * (because the node has already got the reply via DAT) or not + * (because the node has already obtained the reply via DAT) or not * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the broadcast packet * - * Returns true if the node can drop the packet, false otherwise + * Returns true if the node can drop the packet, false otherwise. */ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) -- cgit From 863dd7a82a2fba3e1a094c3d10a9cc8d1afd10d6 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 25 Mar 2013 13:49:46 +0100 Subject: batman-adv: drop useless argument seqno in neighbor creation the sequence number is not stored in struct neigh_node, therefore there is no need to pass such value to the neigh_node creation procedure. At the moment the value is only used by a debug message, but given the fact that the seqno is not related to the neighbor object, it is better to print it elsewhere. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 11 ++++------- net/batman-adv/originator.c | 5 ++--- net/batman-adv/originator.h | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 071f288b77a8..da239c5424b8 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -33,12 +33,11 @@ static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh, __be32 seqno) + struct batadv_orig_node *orig_neigh) { struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, - ntohl(seqno)); + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr); if (!neigh_node) goto out; @@ -696,8 +695,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, - orig_node, orig_tmp, - batadv_ogm_packet->seqno); + orig_node, orig_tmp); batadv_orig_node_free_ref(orig_tmp); if (!neigh_node) @@ -829,8 +827,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, orig_neigh_node, - orig_neigh_node, - batadv_ogm_packet->seqno); + orig_neigh_node); if (!neigh_node) goto out; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index fad1a2093e15..ddf563484798 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno) + const uint8_t *neigh_addr) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_neigh_node *neigh_node; @@ -110,8 +110,7 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, atomic_set(&neigh_node->refcount, 2); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM, initial seqno %d\n", - neigh_addr, seqno); + "Creating new neighbor %pM\n", neigh_addr); out: return neigh_node; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 734e5a3d8a5b..7887b84a9af4 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -31,7 +31,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno); + const uint8_t *neigh_addr); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_node_get_router(struct batadv_orig_node *orig_node); -- cgit From d1dc30739c587fe65f4120c045258ab01c79db1b Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 25 Mar 2013 13:54:45 +0100 Subject: batman-adv: slightly improve neighbor creation debug message print the interface along with the new neighbor mac address Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ddf563484798..f50553a7de62 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -110,7 +110,8 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, atomic_set(&neigh_node->refcount, 2); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM\n", neigh_addr); + "Creating new neighbor %pM on interface %s\n", neigh_addr, + hard_iface->net_dev->name); out: return neigh_node; -- cgit From 281581d3e79eaacfcdc0827e9bf990422252ba5c Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 25 Mar 2013 22:27:00 +0100 Subject: batman-adv: don't check the source address twice The source address has already been checked in batadv_check_management_packet() upon packet reception and therefore it does not need to be checked again in ogm_process() Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index da239c5424b8..6d62e2992dfa 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -988,7 +988,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, struct batadv_neigh_node *orig_neigh_router = NULL; int has_directlink_flag; int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_broadcast = 0, is_bidirect; + int is_bidirect; bool is_single_hop_neigh = false; bool is_from_best_next_hop = false; int is_duplicate, sameseq, simlar_ttl; @@ -1051,9 +1051,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (batadv_compare_eth(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - - if (is_broadcast_ether_addr(ethhdr->h_source)) - is_broadcast = 1; } rcu_read_unlock(); @@ -1071,13 +1068,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; } - if (is_broadcast) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all packets with broadcast source addr (sender: %pM)\n", - ethhdr->h_source); - return; - } - if (is_my_orig) { unsigned long *word; int offset; -- cgit From a3b81b67de6ae0475c0c34c552c059f7bea2f520 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 28 Mar 2013 14:21:12 +0100 Subject: batman-adv: don't check compat version twice Compatibility version is checked upon packet reception before calling any handler. For this reason it does need to be checked once more in the handler itself. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 6d62e2992dfa..38183dc6965d 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1054,13 +1054,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, } rcu_read_unlock(); - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); - return; - } - if (is_my_addr) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: received my own broadcast (sender: %pM)\n", -- cgit From 38dc40ef52d882e08b2af71fc1b5413ac7009952 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 30 Mar 2013 17:22:00 +0100 Subject: batman-adv: do not silently ignore wrong condition Only one neigh_node per orig_node should match a given neighbor address, therefore, if more than one matching neigh_node is found, a WARNING has to be triggered to let the user know that something is wrong in the originator state instead of silently skipping the error. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 38183dc6965d..bd50e0d76aea 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -669,7 +669,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && tmp_neigh_node->if_incoming == if_incoming && atomic_inc_not_zero(&tmp_neigh_node->refcount)) { - if (neigh_node) + if (WARN(neigh_node, "too many matching neigh_nodes")) batadv_neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; continue; -- cgit From 7db3fc291bb22bf43667b009dd0e701ed4eb7c96 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 2 Apr 2013 12:16:53 +0200 Subject: batman-adv: don't initialise batman_iv private members in hard-interface.c hard-interface.c has to do not contain any routing algorithm specific code. Allocate the hard-interface with kzalloc() and remove any useless and algorithm specific member initialisation Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/hard-interface.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4a76ed654c92..c478e6bcf89b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -577,7 +577,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); + hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) goto release_dev; @@ -603,12 +603,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_check_known_mac_addr(hard_iface->net_dev); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); - /* This can't be called via a bat_priv callback because - * we have no bat_priv yet. - */ - atomic_set(&hard_iface->bat_iv.ogm_seqno, 1); - hard_iface->bat_iv.ogm_buff = NULL; - return hard_iface; free_if: -- cgit From 7ed4be9523455a061e62236dc3caa9211cd7edda Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 8 Apr 2013 15:08:18 +0200 Subject: batman-adv: use eth_hdr() when it makes sense Instead of casting the result of skb_mac_header() to "struct ethhdr *" every time, the eth_hdr inline function can be use to beautify the code and improve its readability. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 2 +- net/batman-adv/bridge_loop_avoidance.c | 8 ++++---- net/batman-adv/network-coding.c | 10 +++++----- net/batman-adv/routing.c | 12 ++++++------ net/batman-adv/send.c | 2 +- net/batman-adv/soft-interface.c | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index bd50e0d76aea..ef41be49b314 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1268,7 +1268,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, skb->len + ETH_HLEN); packet_len = skb_headlen(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); packet_buff = skb->data; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 379061c72549..082189e2e40d 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -864,7 +864,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, short vid = -1; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { vhdr = (struct vlan_ethhdr *)ethhdr; @@ -885,7 +885,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, return 0; /* pskb_may_pull() may have modified the pointers, get ethhdr again */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen); /* Check whether the ARP frame carries a valid @@ -1432,7 +1432,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, struct batadv_hard_iface *primary_if; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1539,7 +1539,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) if (batadv_bla_process_claim(bat_priv, primary_if, skb)) goto handled; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e84629ece9b7..0787a34609b9 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1245,7 +1245,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, return; /* Set the mac header as if we actually sent the packet uncoded */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); @@ -1423,7 +1423,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1482,7 +1482,7 @@ out: void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return; @@ -1533,7 +1533,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, skb_reset_network_header(skb); /* Reconstruct original mac header */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); /* Select the correct unicast header information based on the location @@ -1677,7 +1677,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, return NET_RX_DROP; coded_packet = (struct batadv_coded_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b27a4d792d15..beeab2e8cd66 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -256,7 +256,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, header_len))) return false; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -392,7 +392,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -569,7 +569,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, if (unlikely(!pskb_may_pull(skb, hdr_size))) return -ENODATA; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -803,7 +803,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); int ret = NET_RX_DROP; struct sk_buff *new_skb; @@ -1165,7 +1165,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -1265,7 +1265,7 @@ int batadv_recv_vis_packet(struct sk_buff *skb, return NET_RX_DROP; vis_packet = (struct batadv_vis_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* not for me */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index eb16b04d4bee..ed7072ad84e8 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -61,7 +61,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 819dfb006cdf..b26a6cdb934c 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -316,7 +316,7 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: -- cgit From 24a5deeb8a198f0a26ae04485d9976c5e414f723 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 8 Apr 2013 09:38:12 +0200 Subject: batman-adv: move ring_buffer helper functions in bat_iv_ogm the two lonely ring_buffer helper functions are used by the bat_iv_ogm module only and therefore they can be moved inside it. Reported-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/Makefile | 1 - net/batman-adv/bat_iv_ogm.c | 43 ++++++++++++++++++++++++++++++++++++- net/batman-adv/ring_buffer.c | 51 -------------------------------------------- net/batman-adv/ring_buffer.h | 27 ----------------------- 4 files changed, 42 insertions(+), 80 deletions(-) delete mode 100644 net/batman-adv/ring_buffer.c delete mode 100644 net/batman-adv/ring_buffer.h (limited to 'net') diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index acbac2a9c62f..489bb36f1b94 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -32,7 +32,6 @@ batman-adv-y += icmp_socket.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o -batman-adv-y += ring_buffer.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index ef41be49b314..31c2891c2cd0 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -19,7 +19,6 @@ #include "main.h" #include "translation-table.h" -#include "ring_buffer.h" #include "originator.h" #include "routing.h" #include "gateway_common.h" @@ -29,6 +28,48 @@ #include "bat_algo.h" #include "network-coding.h" +/** + * batadv_ring_buffer_set - update the ring buffer with the given value + * @lq_recv: pointer to the ring buffer + * @lq_index: index to store the value at + * @value: value to store in the ring buffer + */ +static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, + uint8_t value) +{ + lq_recv[*lq_index] = value; + *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; +} + +/** + * batadv_ring_buffer_set - compute the average of all non-zero values stored + * in the given ring buffer + * @lq_recv: pointer to the ring buffer + * + * Returns computed average value. + */ +static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) +{ + const uint8_t *ptr; + uint16_t count = 0, i = 0, sum = 0; + + ptr = lq_recv; + + while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { + if (*ptr != 0) { + count++; + sum += *ptr; + } + + i++; + ptr++; + } + + if (count == 0) + return 0; + + return (uint8_t)(sum / count); +} static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c deleted file mode 100644 index ccab0bbdbb59..000000000000 --- a/net/batman-adv/ring_buffer.c +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#include "main.h" -#include "ring_buffer.h" - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value) -{ - lq_recv[*lq_index] = value; - *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; -} - -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) -{ - const uint8_t *ptr; - uint16_t count = 0, i = 0, sum = 0; - - ptr = lq_recv; - - while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { - if (*ptr != 0) { - count++; - sum += *ptr; - } - - i++; - ptr++; - } - - if (count == 0) - return 0; - - return (uint8_t)(sum / count); -} diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h deleted file mode 100644 index 3f92ae248e83..000000000000 --- a/net/batman-adv/ring_buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_ -#define _NET_BATMAN_ADV_RING_BUFFER_H_ - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value); -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]); - -#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ -- cgit From d98966173213704873864c4e5057d823996ae95d Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 17 Apr 2013 17:44:43 +0200 Subject: batman-adv: move batadv_slide_own_bcast_window to bat_iv_ogm.c batadv_slide_own_bcast_window() is used only in bat_iv_ogm.c and it is currently touching only batman_iv specific attributes. Move it into bat_iv_ogm.c and make it static. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 37 ++++++++++++++++++++++++++++++++++++- net/batman-adv/routing.c | 29 ----------------------------- net/batman-adv/routing.h | 1 - 3 files changed, 36 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 31c2891c2cd0..42b7a94d61b3 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -630,6 +630,41 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, if_incoming, 0, batadv_iv_ogm_fwd_send_time()); } +/** + * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for + * the given interface + * @hard_iface: the interface for which the windows have to be shifted + */ +static void +batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + unsigned long *word; + uint32_t i; + size_t word_index; + uint8_t *w; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); + word_index = hard_iface->if_num * BATADV_NUM_WORDS; + word = &(orig_node->bcast_own[word_index]); + + batadv_bit_get_packet(bat_priv, word, 1, 0); + w = &orig_node->bcast_own_sum[hard_iface->if_num]; + *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); + spin_unlock_bh(&orig_node->ogm_cnt_lock); + } + rcu_read_unlock(); + } +} + static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -674,7 +709,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; } - batadv_slide_own_bcast_window(hard_iface); + batadv_iv_ogm_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, batadv_iv_ogm_emit_send_time(bat_priv)); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index beeab2e8cd66..fad08469767d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -34,35 +34,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - unsigned long *word; - uint32_t i; - size_t word_index; - uint8_t *w; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); - word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bcast_own[word_index]); - - batadv_bit_get_packet(bat_priv, word, 1, 0); - w = &orig_node->bcast_own_sum[hard_iface->if_num]; - *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_node->ogm_cnt_lock); - } - rcu_read_unlock(); - } -} - static void _batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 99eeafaba407..72a29bde2010 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -20,7 +20,6 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface); bool batadv_check_management_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, int header_len); -- cgit From 3abe4adbfb293e37d2d6f4fe22366534dc2675d9 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 3 Apr 2013 11:15:33 +0200 Subject: batman-adv: refactor batadv_tt_local_event() Instead of passing a generic combination of flags as argument, it is easier to pass the entire tt_common structure (containing the flags already set) plus a bitfield of event flags that will be unified with the already existing ones before inserting the client in the event queue. In this way invocations of the modified function can be simplified. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/translation-table.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 9e8748575845..d35b73904e04 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -163,10 +163,19 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } +/** + * batadv_tt_local_event - store a local TT event (ADD/DEL) + * @bat_priv: the bat priv with all the soft interface information + * @tt_local_entry: the TT entry involved in the event + * @event_flags: flags to store in the event structure + */ static void batadv_tt_local_event(struct batadv_priv *bat_priv, - const uint8_t *addr, uint8_t flags) + struct batadv_tt_local_entry *tt_local_entry, + uint8_t event_flags) { struct batadv_tt_change_node *tt_change_node, *entry, *safe; + struct batadv_tt_common_entry *common = &tt_local_entry->common; + uint8_t flags = common->flags | event_flags; bool event_removed = false; bool del_op_requested, del_op_entry; @@ -176,7 +185,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - memcpy(tt_change_node->change.addr, addr, ETH_ALEN); + memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -184,7 +193,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tt.changes_list_lock); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { - if (!batadv_compare_eth(entry->change.addr, addr)) + if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; /* DEL+ADD in the same orig interval have no effect and can be @@ -332,7 +341,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, } add_event: - batadv_tt_local_event(bat_priv, addr, tt_local->common.flags); + batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); check_roaming: /* Check whether it is a roaming, but don't do anything if the roaming @@ -529,8 +538,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, uint16_t flags, const char *message) { - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - tt_local_entry->common.flags | flags); + batadv_tt_local_event(bat_priv, tt_local_entry, flags); /* The local client has to be marked as "pending to be removed" but has * to be kept in the table in order to send it in a full table @@ -584,8 +592,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, /* if this client has been added right now, it is possible to * immediately purge it */ - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - curr_flags | BATADV_TT_CLIENT_DEL); + batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); hlist_del_rcu(&tt_local_entry->common.hash_entry); batadv_tt_local_entry_free_ref(tt_local_entry); -- cgit From 41ab6c4891ed4cdd855ae569924acb1da424a614 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 2 Apr 2013 22:28:44 +0200 Subject: batman-adv: don't deal with NET_IP_ALIGN manually Instead of dealing with NET_IP_ALIGN during allocation and headroom reservation, it is possible to use netdev_alloc_skb_ip_align() which transparently allocate and reserve the correct amount of data Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 6 +++--- net/batman-adv/icmp_socket.c | 4 ++-- net/batman-adv/translation-table.c | 20 ++++++++++---------- net/batman-adv/vis.c | 12 ++++++------ 4 files changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 42b7a94d61b3..5b0a043c47c8 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -453,16 +453,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, else skb_size = packet_len; - skb_size += ETH_HLEN + NET_IP_ALIGN; + skb_size += ETH_HLEN; - forw_packet_aggr->skb = dev_alloc_skb(skb_size); + forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!forw_packet_aggr->skb) { if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); kfree(forw_packet_aggr); goto out; } - skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(forw_packet_aggr->skb, ETH_HLEN); INIT_HLIST_NODE(&forw_packet_aggr->list); diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 0ba6c899b2d3..b27508b8085c 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -177,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (len >= sizeof(struct batadv_icmp_packet_rr)) packet_len = sizeof(struct batadv_icmp_packet_rr); - skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN); if (!skb) { len = -ENOMEM; goto out; } - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); if (copy_from_user(icmp_packet, buff, packet_len)) { diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d35b73904e04..52808c4ae081 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1607,11 +1607,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = tt_query_size + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); tt_response->ttvn = ttvn; @@ -1672,11 +1672,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_req_len = sizeof(*tt_request); tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); @@ -1769,11 +1769,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1885,11 +1885,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -2219,11 +2219,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 1625e5793a89..94eaeb521c17 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -392,12 +392,12 @@ batadv_add_packet(struct batadv_priv *bat_priv, return NULL; len = sizeof(*packet) + vis_info_len; - info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!info->skb_packet) { kfree(info); return NULL; } - skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(info->skb_packet, ETH_HLEN); packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); kref_init(&info->refcount); @@ -854,13 +854,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv) if (!bat_priv->vis.my_info) goto err; - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; - len += ETH_HLEN + NET_IP_ALIGN; - bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); + len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; + bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL, + len); if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); -- cgit From d4ff40f683221d46c351cd9ab61f37a6ea5d2444 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 18 Apr 2013 15:13:01 +0200 Subject: batman-adv: pass a 16bit long flag argument to tt_global_add() it may be the case that we want to store some local TT client flags in a global entry, therefore the tt_global_add needs to get a proper argument for this Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/translation-table.c | 19 +++++++++++++++++-- net/batman-adv/translation-table.h | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 52808c4ae081..e272f68e1b08 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -798,10 +798,25 @@ out: batadv_tt_orig_list_entry_free_ref(orig_entry); } -/* caller must hold orig_node refcount */ +/** + * batadv_tt_global_add - add a new TT global entry or update an existing one + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the originator announcing the client + * @tt_addr: the mac address of the non-mesh client + * @flags: TT flags that have to be set for this non-mesh client + * @ttvn: the tt version number ever announcing this non-mesh client + * + * Add a new TT global entry for the given originator. If the entry already + * exists add a new reference to the given originator (a global entry can have + * references to multiple originators) and adjust the flags attribute to reflect + * the function argument. + * If a TT local entry exists for this non-mesh client remove it. + * + * The caller must hold orig_node refcount. + */ int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_addr, uint8_t flags, + const unsigned char *tt_addr, uint16_t flags, uint8_t ttvn) { struct batadv_tt_global_entry *tt_global_entry; diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index ab8e683b402f..659a3bb759ce 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -33,7 +33,7 @@ void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, const unsigned char *tt_buff, int tt_buff_len); int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr, uint8_t flags, + const unsigned char *addr, uint16_t flags, uint8_t ttvn); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, -- cgit From e54c77f08ec62434ac8b24e402aa7b787cf42198 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Fri, 19 Apr 2013 12:06:56 +0200 Subject: batman-adv: Remove unnecessary INIT_HLIST_NODE() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need to for an explicit hlist_node initialization if it is added to a list right away, like it's the case with the hlist_add_head()s here. Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 2 -- net/batman-adv/send.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 5b0a043c47c8..d07323b3e9b8 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -464,8 +464,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, } skb_reserve(forw_packet_aggr->skb, ETH_HLEN); - INIT_HLIST_NODE(&forw_packet_aggr->list); - skb_buff = skb_put(forw_packet_aggr->skb, packet_len); forw_packet_aggr->packet_len = packet_len; memcpy(skb_buff, packet_buff, packet_len); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index ed7072ad84e8..ce69f458a754 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -152,8 +152,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { - INIT_HLIST_NODE(&forw_packet->list); - /* add new packet to packet list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); -- cgit From aa27c31265f111ff73d948a5846a3f193376491e Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 18 Apr 2013 04:56:03 +0800 Subject: batman-adv: do not print orig nodes without nc neighbors on nc table print Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/network-coding.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 0787a34609b9..22cd51fea730 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1763,6 +1763,13 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) /* For each orig_node in this bin */ rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + /* no need to print the orig node if it does not have + * network coding neighbors + */ + if (list_empty(&orig_node->in_coding_list) && + list_empty(&orig_node->out_coding_list)) + continue; + seq_printf(seq, "Node: %pM\n", orig_node->orig); seq_puts(seq, " Ingoing: "); -- cgit From eb2deb6b39b1597577c1635e9ebf319f1ae02213 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 19 Apr 2013 18:07:00 +0200 Subject: batman-adv: change VID semantic in the BLA code In order to make batman-adv fully vlan aware later, the semantic used for variables storing the VLAN ID values has to be changed in order to be adapted to the new one which will be used batman-adv wide. In particular, the VID has to be an "_unsigned_ short int" and its 4 MSB will be used as a flag bitfield, while the remaining 12 bits are used to store the real VID value Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner Acked-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 51 ++++++++++++++++++---------------- net/batman-adv/bridge_loop_avoidance.h | 12 ++++---- net/batman-adv/main.h | 11 ++++++++ net/batman-adv/soft-interface.c | 4 +-- net/batman-adv/types.h | 4 +-- 5 files changed, 49 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 082189e2e40d..7354063567bd 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -180,7 +180,7 @@ static struct batadv_bla_claim */ static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, - uint8_t *addr, short vid) + uint8_t *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; @@ -257,7 +257,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, - short vid, int claimtype) + unsigned short vid, int claimtype) { struct sk_buff *skb; struct ethhdr *ethhdr; @@ -335,13 +335,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); memcpy(ethhdr->h_dest, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", + "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, vid); break; } - if (vid != -1) - skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); + if (vid & BATADV_VLAN_HAS_TAG) + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); @@ -367,7 +368,7 @@ out: */ static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, - short vid, bool own_backbone) + unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; struct batadv_orig_node *orig_node; @@ -434,7 +435,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, static void batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -456,7 +457,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, */ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct hlist_head *head; struct batadv_hashtable *hash; @@ -547,7 +548,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, * @backbone_gw: the backbone gateway which claims it */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid, + const uint8_t *mac, const unsigned short vid, struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_bla_claim *claim; @@ -611,7 +612,7 @@ claim_free_ref: * given mac address and vid. */ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid) + const uint8_t *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; @@ -637,7 +638,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, /* check for ANNOUNCE frame, return 1 if handled */ static int batadv_handle_announce(struct batadv_priv *bat_priv, uint8_t *an_addr, uint8_t *backbone_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; uint16_t crc; @@ -685,7 +686,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, static int batadv_handle_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - struct ethhdr *ethhdr, short vid) + struct ethhdr *ethhdr, unsigned short vid) { /* check for REQUEST frame */ if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) @@ -709,7 +710,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, static int batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - uint8_t *claim_addr, short vid) + uint8_t *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -738,7 +739,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, static int batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, uint8_t *claim_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -861,7 +862,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_bla_claim_dst *bla_dst; uint16_t proto; int headlen; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; int ret; ethhdr = eth_hdr(skb); @@ -869,6 +870,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { vhdr = (struct vlan_ethhdr *)ethhdr; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; proto = ntohs(vhdr->h_vlan_encapsulated_proto); headlen = sizeof(*vhdr); } else { @@ -1358,7 +1360,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_bla_backbone_gw *backbone_gw; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) return 0; @@ -1375,6 +1377,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; } /* see if this originator is a backbone gw for this VLAN */ @@ -1424,8 +1427,8 @@ void batadv_bla_free(struct batadv_priv *bat_priv) * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast) +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; @@ -1519,7 +1522,8 @@ out: * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; @@ -1623,7 +1627,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); - seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", + seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", claim->addr, claim->vid, claim->backbone_gw->orig, (is_own ? 'x' : ' '), @@ -1676,10 +1680,9 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) if (is_own) continue; - seq_printf(seq, - " * %pM on % 5d % 4i.%03is (%#.4x)\n", - backbone_gw->orig, backbone_gw->vid, - secs, msecs, backbone_gw->crc); + seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", + backbone_gw->orig, backbone_gw->vid, secs, + msecs, backbone_gw->crc); } rcu_read_unlock(); } diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index dea2fbc5d98d..4b102e71e5bd 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -21,9 +21,10 @@ #define _NET_BATMAN_ADV_BLA_H_ #ifdef CONFIG_BATMAN_ADV_BLA -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast); -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast); +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); @@ -42,13 +43,14 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid, bool is_bcast) + struct sk_buff *skb, unsigned short vid, + bool is_bcast) { return 0; } static inline int batadv_bla_tx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid) + struct sk_buff *skb, unsigned short vid) { return 0; } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ea1a3bafe9c3..6f25ef29f305 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -162,6 +162,17 @@ enum batadv_uev_type { #include #include "types.h" +/** + * batadv_vlan_flags - flags for the four MSB of any vlan ID field + * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not + */ +enum batadv_vlan_flags { + BATADV_VLAN_HAS_TAG = BIT(15), +}; + +#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \ + (int)(vid & VLAN_VID_MASK) : -1) + extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b26a6cdb934c..700d0b49742d 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -154,7 +154,7 @@ static int batadv_interface_tx(struct sk_buff *skb, 0x00, 0x00}; unsigned int header_len = 0; int data_len = skb->len, ret; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; bool do_bcast = false; uint32_t seqno; unsigned long brd_delay = 1; @@ -303,7 +303,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_header *batadv_header = (struct batadv_header *)skb->data; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; __be16 ethertype = __constant_htons(ETH_P_BATMAN); bool is_bcast; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 5f542bdd9a4d..b2c94e139319 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -642,7 +642,7 @@ struct batadv_socket_packet { #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { uint8_t orig[ETH_ALEN]; - short vid; + unsigned short vid; struct hlist_node hash_entry; struct batadv_priv *bat_priv; unsigned long lasttime; @@ -665,7 +665,7 @@ struct batadv_bla_backbone_gw { */ struct batadv_bla_claim { uint8_t addr[ETH_ALEN]; - short vid; + unsigned short vid; struct batadv_bla_backbone_gw *backbone_gw; unsigned long lasttime; struct hlist_node hash_entry; -- cgit From 5f80df6705fcd8153f93bd0e82109dbeb7ff535b Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 19 Apr 2013 18:07:01 +0200 Subject: batman-adv: print the VID properly Since the MSB bits of any vid variable are now used for storing flags, print the vid properly by taking the flags away and printing -1 in case of VID representing no real VLAN. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bridge_loop_avoidance.c | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 7354063567bd..e9d8e0b3c3d0 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -307,7 +307,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, */ memcpy(ethhdr->h_source, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); + "bla_send_claim(): CLAIM %pM on vid %d\n", mac, + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -316,7 +317,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - vid); + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -325,7 +326,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, vid); + ethhdr->h_source, BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -336,7 +337,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(ethhdr->h_dest, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", - ethhdr->h_source, ethhdr->h_dest, vid); + ethhdr->h_source, ethhdr->h_dest, + BATADV_PRINT_VID(vid)); break; } @@ -381,7 +383,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, vid); + orig, BATADV_PRINT_VID(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -573,7 +575,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, atomic_set(&claim->refcount, 2); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, @@ -592,7 +594,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); @@ -623,7 +625,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -659,12 +661,13 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - vid, backbone_gw->orig, crc); + BATADV_PRINT_VID(vid), backbone_gw->orig, crc); if (backbone_gw->crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", - backbone_gw->orig, backbone_gw->vid, + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), backbone_gw->crc, crc); batadv_bla_send_request(backbone_gw); @@ -700,7 +703,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - vid, ethhdr->h_source); + BATADV_PRINT_VID(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return 1; @@ -728,7 +731,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, vid, backbone_gw->orig); + claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_free_ref(backbone_gw); @@ -912,7 +915,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + hw_dst); if (ret < 2) return ret; @@ -947,7 +951,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); return 1; } @@ -1628,7 +1632,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", - claim->addr, claim->vid, + claim->addr, BATADV_PRINT_VID(claim->vid), claim->backbone_gw->orig, (is_own ? 'x' : ' '), claim->backbone_gw->crc); @@ -1681,7 +1685,8 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) continue; seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", - backbone_gw->orig, backbone_gw->vid, secs, + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), secs, msecs, backbone_gw->crc); } rcu_read_unlock(); -- cgit From e91ecfc64ad691176be119e627e36cec8564f44b Mon Sep 17 00:00:00 2001 From: Martin Hundebøll Date: Sat, 20 Apr 2013 13:54:39 +0200 Subject: batman-adv: Move call to batadv_nc_skb_forward() from routing.c to send.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to batadv_nc_skb_forward() fits better in batadv_send_skb_to_orig(), as this is where the actual next hop is looked up. To let the caller of batadv_send_skb_to_orig() know wether the skb is transmitted, buffered or failed, the return value is changed from boolean to int. Signed-off-by: Martin Hundebøll Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/network-coding.c | 5 ++--- net/batman-adv/network-coding.h | 6 ++---- net/batman-adv/routing.c | 23 +++++++++++++---------- net/batman-adv/send.c | 27 +++++++++++++++++++-------- net/batman-adv/send.h | 6 +++--- net/batman-adv/translation-table.c | 12 +++++++----- net/batman-adv/unicast.c | 2 +- net/batman-adv/vis.c | 7 ++++--- 8 files changed, 51 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 22cd51fea730..a487d46e0aec 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1359,18 +1359,17 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, * buffer * @skb: data skb to forward * @neigh_node: next hop to forward packet to - * @ethhdr: pointer to the ethernet header inside the skb * * Returns true if the skb was consumed (encoded packet sent) or false otherwise */ bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { const struct net_device *netdev = neigh_node->if_incoming->soft_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 4fa6d0caddbd..85a4ec81ad50 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -36,8 +36,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); void batadv_nc_init_orig(struct batadv_orig_node *orig_node); bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr); + struct batadv_neigh_node *neigh_node); void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb); void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, @@ -87,8 +86,7 @@ static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } static inline bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { return false; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index fad08469767d..2f0bd3ffe6e8 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -285,7 +285,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_ECHO_REPLY; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -333,7 +333,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -410,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmp_packet->header.ttl--; /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) + if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -775,7 +775,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr = eth_hdr(skb); - int ret = NET_RX_DROP; + int res, ret = NET_RX_DROP; struct sk_buff *new_skb; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -835,16 +835,19 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet->header.ttl--; - /* network code packet if possible */ - if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { - ret = NET_RX_SUCCESS; - } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { - ret = NET_RX_SUCCESS; + res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - /* Update stats counter */ + /* translate transmit result into receive result */ + if (res == NET_XMIT_SUCCESS) { + /* skb was transmitted and consumed */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, skb->len + ETH_HLEN); + + ret = NET_RX_SUCCESS; + } else if (res == NET_XMIT_POLICED) { + /* skb was buffered and consumed */ + ret = NET_RX_SUCCESS; } out: diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index ce69f458a754..e9ff8d801201 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -96,26 +96,37 @@ send_skb_err: * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Returns TRUE on success; FALSE otherwise. + * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or + * NET_XMIT_POLICED if the skb is buffered for later transmit. */ -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if) +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; + int ret = NET_XMIT_DROP; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) - return false; + return ret; - /* route it */ - batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + /* try to network code the packet, if it is received on an interface + * (i.e. being forwarded). If the packet originates from this node or if + * network coding fails, then send the packet as usual. + */ + if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { + ret = NET_XMIT_POLICED; + } else { + batadv_send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); + ret = NET_XMIT_SUCCESS; + } batadv_neigh_node_free_ref(neigh_node); - return true; + return ret; } void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 38e662f619ac..e7b17880fca4 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -23,9 +23,9 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const uint8_t *dst_addr); -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if); +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if); void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e272f68e1b08..429aeef3d8b2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1713,7 +1713,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); - if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: @@ -1737,7 +1737,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; uint8_t orig_ttvn, req_ttvn, ttvn; - int ret = false; + int res, ret = false; unsigned char *tt_buff; bool full_table; uint16_t tt_len, tt_tot; @@ -1832,8 +1832,10 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL); + if (res != NET_XMIT_DROP) ret = true; + goto out; unlock: @@ -1947,7 +1949,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = true; goto out; @@ -2260,7 +2262,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 0bb3b5982f94..dc8b5d4dd636 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -464,7 +464,7 @@ find_router: goto out; } - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 94eaeb521c17..4983340f1943 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -697,7 +697,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; struct sk_buff *skb; - uint32_t i; + uint32_t i, res; packet = (struct batadv_vis_packet *)info->skb_packet->data; @@ -724,7 +724,8 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, if (!skb) continue; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res == NET_XMIT_DROP) kfree_skb(skb); } rcu_read_unlock(); @@ -748,7 +749,7 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv, if (!skb) goto out; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) kfree_skb(skb); out: -- cgit From 6715fd3f0538e805b6a769d66823ec16b8b647ac Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sat, 20 Apr 2013 17:15:09 +0200 Subject: batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 6f25ef29f305..5e9aebb7d56b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.2.0" +#define BATADV_SOURCE_VERSION "2013.3.0" #endif /* B.A.T.M.A.N. parameters */ -- cgit From 75538c2b85cf22eb9af6adfaf26ed7219025adeb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 May 2013 11:30:50 +0800 Subject: net: always pass struct netdev_notifier_info to netdevice notifiers commit 351638e7deeed2ec8ce451b53d3 (net: pass info struct via netdevice notifier) breaks booting of my KVM guest, this is due to we still forget to pass struct netdev_notifier_info in several places. This patch completes it. Cc: Jiri Pirko Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/atm/clip.c | 4 +++- net/core/dev.c | 6 ------ net/ipv4/netfilter/ipt_MASQUERADE.c | 5 ++++- net/ipv6/addrconf.c | 7 +++++-- net/ipv6/netfilter/ip6t_MASQUERADE.c | 4 +++- 5 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index cce241eb01d9..8215f7cb170b 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -575,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, void *ifa) { struct in_device *in_dev; + struct netdev_notifier_info info; in_dev = ((struct in_ifaddr *)ifa)->ifa_dev; /* @@ -583,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, */ if (event != NETDEV_UP) return NOTIFY_DONE; - return clip_device_event(this, NETDEV_CHANGE, in_dev->dev); + netdev_notifier_info_init(&info, in_dev->dev); + return clip_device_event(this, NETDEV_CHANGE, &info); } static struct notifier_block clip_dev_notifier = { diff --git a/net/core/dev.c b/net/core/dev.c index 6eb621cc3b81..b2e9057be3bf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1391,12 +1391,6 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); -static void netdev_notifier_info_init(struct netdev_notifier_info *info, - struct net_device *dev) -{ - info->dev = dev; -} - static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index dd5508bde799..30e4de940567 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this, void *ptr) { struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - return masq_device_event(this, event, dev); + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bce073b4bbd4..7b34f06af344 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4645,13 +4645,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, static void dev_disable_change(struct inet6_dev *idev) { + struct netdev_notifier_info info; + if (!idev || !idev->dev) return; + netdev_notifier_info_init(&info, idev->dev); if (idev->cnf.disable_ipv6) - addrconf_notify(NULL, NETDEV_DOWN, idev->dev); + addrconf_notify(NULL, NETDEV_DOWN, &info); else - addrconf_notify(NULL, NETDEV_UP, idev->dev); + addrconf_notify(NULL, NETDEV_UP, &info); } static void addrconf_disable_change(struct net *net, __s32 newf) diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index b76257cd7e1e..47bff6107519 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; + struct netdev_notifier_info info; - return masq_device_event(this, event, ifa->idev->dev); + netdev_notifier_info_init(&info, ifa->idev->dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_inet_notifier = { -- cgit From ced14f6804a979d1972415bc23f2f8ddb18595dd Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:25 +0000 Subject: net: Correct comparisons and calculations using skb->tail and skb-transport_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->transport_header will be an offset from head. This is corrected by using wrappers that ensure that comparisons and calculations are always made using pointers. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b2e9057be3bf..d4d874a25e45 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1724,7 +1724,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb2); if (skb_network_header(skb2) < skb2->data || - skb2->network_header > skb2->tail) { + skb_network_header(skb2) > skb_tail_pointer(skb2)) { net_crit_ratelimited("protocol %04x is buggy, dev %s\n", ntohs(skb2->protocol), dev->name); @@ -3892,7 +3892,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - if (skb->mac_header == skb->tail && + if (skb_mac_header(skb) == skb_tail_pointer(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); -- cgit From 29a3cad5c6ae9e7fbf1509d01d39c3c3c38f11f9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:26 +0000 Subject: ipv6: Correct comparisons and calculations using skb->tail and skb-transport_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->transport_header will be an offset from head. This is corrected by using wrappers that ensure that comparisons and calculations are always made using pointers. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/exthdrs_core.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/mcast.c | 5 +++-- net/ipv6/mip6.c | 6 ++++-- net/ipv6/ndisc.c | 9 +++++---- net/ipv6/output_core.c | 3 ++- net/ipv6/raw.c | 3 ++- net/ipv6/route.c | 2 +- 8 files changed, 19 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index c5e83fae4df4..140748debc4a 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr); int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - skb->network_header; + int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); struct ipv6_opt_hdr *hdr; int len; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1d2902e61786..4b4890bbe16d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -399,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int err = 0; if ((u8 *)hdr < skb->head || - (skb->network_header + sizeof(*hdr)) > skb->tail) + (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; /* diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2a..72c8bfe06bb4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1409,8 +1409,9 @@ static void mld_sendpack(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); - mldlen = skb->tail - skb->transport_header; + payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - + sizeof(*pip6); + mldlen = skb_tail_pointer(skb) - skb_transport_header(skb); pip6->payload_len = htons(payload_len); pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 0f9bdc5ee9f3..9ac01dc9402e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; @@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a0962697a257..781dd3c99680 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -693,7 +693,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -853,7 +853,7 @@ static void ndisc_recv_na(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -1069,7 +1069,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) __u8 * opt = (__u8 *)(ra_msg + 1); - optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); + optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - + sizeof(struct ra_msg); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); @@ -1346,7 +1347,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) u8 *hdr; struct ndisc_options ndopts; struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); #ifdef CONFIG_IPV6_NDISC_NODETYPE diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index c2e73e647e44..ab92a3673fbb 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -40,7 +40,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) u16 offset = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eedff8ccded5..4f8886aa8429 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1132,7 +1132,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) - amount = skb->tail - skb->transport_header; + amount = skb_tail_pointer(skb) - + skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 194c3cde1536..2b874185ebb2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1649,7 +1649,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu int optlen, on_link; u8 *lladdr; - optlen = skb->tail - skb->transport_header; + optlen = skb_tail_pointer(skb) - skb_transport_header(skb); optlen -= sizeof(*msg); if (optlen < 0) { -- cgit From f7c0c2ae843b74f8dba55820cb0a3de19c976703 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:27 +0000 Subject: ipv4: Correct comparisons and calculations using skb->tail and skb-transport_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->transport_header will be an offset from head. This is corrected by using wrappers that ensure that comparisons and calculations are always made using pointers. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 ++- net/ipv4/igmp.c | 2 +- net/ipv4/tcp.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 562efd91f457..5d0d379b0152 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -503,7 +503,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph = ip_hdr(skb_in); if ((u8 *)iph < skb_in->head || - (skb_in->network_header + sizeof(*iph)) > skb_in->tail) + (skb_network_header(skb_in) + sizeof(*iph)) > + skb_tail_pointer(skb_in)) goto out; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8c232794bcb..450f625361e4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -363,7 +363,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) static int igmpv3_sendpack(struct sk_buff *skb) { struct igmphdr *pig = igmp_hdr(skb); - const int igmplen = skb->tail - skb->transport_header; + const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb); pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ba4186e1dca9..1f58594d5a85 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2989,7 +2989,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, swap(gso_skb->truesize, skb->truesize); } - delta = htonl(oldlen + (skb->tail - skb->transport_header) + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); -- cgit From 158874cac61245b84e939c92c53db7000122b7b0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:28 +0000 Subject: sctp: Correct access to skb->{network, transport}_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case sk_buff_data_t will be a pointer, however, skb->{network,transport}_header is now __u16. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- net/sctp/ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index 4b2c83146aa7..e328fe8f93f4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_association *asoc = NULL; struct sctp_transport *transport; struct inet_sock *inet; - sk_buff_data_t saveip, savesctp; + __be16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 391a245d5203..8ee553b499ce 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -153,7 +153,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sctp_association *asoc; struct sctp_transport *transport; struct ipv6_pinfo *np; - sk_buff_data_t saveip, savesctp; + __be16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); -- cgit From 7cc461900549fc480eb133948649a1edb7eaaa6f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:29 +0000 Subject: net, ipv4, ipv6: Correct assignment of skb->network_header to skb->tail This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer however skb->network_header is now an offset. This patch corrects the problem by adding a wrapper to return skb tail as an offset regardless of the value of NET_SKBUFF_DATA_USES_OFFSET. It seems that skb->tail that this offset may be more than 64k and some care has been taken to treat such cases as an error. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 9 ++++++++- net/core/pktgen.c | 16 ++++++++++++++-- net/ipv4/ipmr.c | 8 +++++++- 3 files changed, 29 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 37deedd48bcc..688517c7ff17 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -676,6 +676,8 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo spin_lock_irqsave(&npinfo->rx_lock, flags); list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + unsigned long tail_offset; + if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) continue; @@ -700,7 +702,12 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo hdr->saddr = *saddr; hdr->daddr = *daddr; - send_skb->transport_header = send_skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(send_skb); + continue; + } + skb_set_network_header(send_skb, tail_offset); skb_put(send_skb, size); icmp6h = (struct icmp6hdr *)skb_transport_header(skb); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 795498fd4587..d2ede89662be 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2642,6 +2642,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; + unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2708,7 +2709,12 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - skb->network_header = skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(skb); + return NULL; + } + skb_set_network_header(skb, tail_offset); skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); @@ -2775,6 +2781,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; + unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2822,7 +2829,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - skb->network_header = skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(skb); + return NULL; + } + skb_set_network_header(skb, tail_offset); skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index f975399f3522..df97f0ac1a1c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -945,6 +945,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct igmpmsg *msg; struct sock *mroute_sk; int ret; + unsigned long tail_offset; #ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) @@ -980,7 +981,12 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Copy the IP header */ - skb->network_header = skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(skb); + return -EINVAL; + } + skb_set_network_header(skb, tail_offset); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ -- cgit From e057d3c31bdf87616b415c4b2cbf7310f54b9219 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 28 May 2013 13:01:52 +0200 Subject: cfg80211: support an active monitor interface flag An active monitor interface is one that is used for communication (via injection). It is expected to ACK incoming unicast packets. This is useful for running various 802.11 testing utilities that associate to an AP via injection and manage the state in user space. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fb6abcb359a1..31d265f36d2c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2227,6 +2227,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, + [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) @@ -2338,6 +2339,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } + if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + if (change) err = cfg80211_change_iface(rdev, dev, ntype, flags, ¶ms); else @@ -2395,6 +2400,11 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); + + if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); -- cgit From 31eba5bc56a9324f056d28569a4f89f39c1c3f70 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 28 May 2013 13:01:53 +0200 Subject: mac80211: support active monitor interfaces Support them only if the driver advertises support for them via IEEE80211_HW_SUPPORTS_ACTIVE_MONITOR. Unlike normal monitor interfaces, they are added to the driver, along with their MAC address. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 11 +++++++---- net/mac80211/driver-ops.h | 3 ++- net/mac80211/iface.c | 29 +++++++++++++++++++++++------ net/mac80211/util.c | 6 ++++++ 4 files changed, 38 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 232edf78d5a9..9034da16cf1b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -73,16 +73,19 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; if (ieee80211_sdata_running(sdata)) { + u32 mask = MONITOR_FLAG_COOK_FRAMES | + MONITOR_FLAG_ACTIVE; + /* - * Prohibit MONITOR_FLAG_COOK_FRAMES to be - * changed while the interface is up. + * Prohibit MONITOR_FLAG_COOK_FRAMES and + * MONITOR_FLAG_ACTIVE to be changed while the + * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ - if ((*flags & MONITOR_FLAG_COOK_FRAMES) != - (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) + if ((*flags & mask) != (sdata->u.mntr_flags & mask)) return -EBUSY; ieee80211_adjust_monitor_flags(sdata, -1); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 169664c122e2..b931c96a596f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -146,7 +146,8 @@ static inline int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)))) + !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; trace_drv_add_interface(local, sdata); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ceef64426a8d..7cabaf261fed 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -159,7 +159,8 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) +static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr, + bool check_dup) { struct ieee80211_sub_if_data *sdata; u64 new, mask, tmp; @@ -179,10 +180,13 @@ static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); + if (!check_dup) + return ret; mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) continue; m = sdata->vif.addr; @@ -204,12 +208,17 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sockaddr *sa = addr; + bool check_dup = true; int ret; if (ieee80211_sdata_running(sdata)) return -EBUSY; - ret = ieee80211_verify_mac(sdata->local, sa->sa_data); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + check_dup = false; + + ret = ieee80211_verify_mac(sdata->local, sa->sa_data, check_dup); if (ret) return ret; @@ -541,7 +550,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } - if (local->monitors == 0 && local->open_count == 0) { + if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + res = drv_add_interface(local, sdata); + if (res) + goto err_stop; + } else if (local->monitors == 0 && local->open_count == 0) { res = ieee80211_add_virtual_monitor(local); if (res) goto err_stop; @@ -919,7 +932,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->mtx); ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - break; + + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + break; + + /* fall through */ default: if (going_down) drv_remove_interface(local, sdata); @@ -1068,7 +1085,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, }; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2a8d759324c2..89a83770d152 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -560,6 +560,9 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: @@ -598,6 +601,9 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: -- cgit From f4d57941bf89997bad3294f94987caebf2771a33 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 28 May 2013 17:24:15 +0200 Subject: mac80211: always send multicast on CAB queue If the driver advertised support for a CAB queue, then we should put all multicast frames there, otherwise sending them can be racy with clients going to sleep while we TX a frame. To avoid this, always TX multicast frames on the multicast queue. It seems like even drivers not using the queue framework might want to do this which would mean also moving the IEEE80211_TX_CTL_SEND_AFTER_DTIM flag assignment, but it also seems that drivers behave differently here so that just moving it wouldn't be a good idea. It'd be better to modify those drivers to use the queue framework. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9972e07a2f96..34be9336b5d1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -398,13 +398,14 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; + if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = tx->sdata->vif.cab_queue; + /* no stations in PS mode */ if (!atomic_read(&ps->num_sta_ps)) return TX_CONTINUE; info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; - if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) - info->hw_queue = tx->sdata->vif.cab_queue; /* device releases frame after DTIM beacon */ if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) -- cgit From 6804973ffb4288bba14d53223e2fbb2bbd1d2e1b Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:11 +0000 Subject: tcp: consolidate PRR packet accounting This patch series fixes an undo bug in fast recovery: the sender mistakenly undos the cwnd too early but continues fast retransmits until all pending data are acked. This also multiplies the SNMP stat PARTIALUNDO events by the degree of the network reordering. The first patch prepares the fix by consolidating the accounting of newly_acked_sacked in tcp_cwnd_reduction(), instead of updating newly_acked_sacked everytime sacked_out is adjusted. Also pass acked and prior_unsacked as const type because they are readonly in the rest of recovery processing. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9579e1a5a144..86b5fa72ff9e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2430,12 +2430,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) TCP_ECN_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, +static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + int newly_acked_sacked = prior_unsacked - + (tp->packets_out - tp->sacked_out); tp->prr_delivered += newly_acked_sacked; if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { @@ -2492,7 +2494,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) +static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2509,7 +2511,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) tcp_moderate_cwnd(tp); } else { - tcp_cwnd_reduction(sk, newly_acked_sacked, 0); + tcp_cwnd_reduction(sk, prior_unsacked, 0); } } @@ -2678,15 +2680,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int prior_sacked, int prior_packets, +static void tcp_fastretrans_alert(struct sock *sk, const int acked, + const int prior_unsacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2739,9 +2740,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else - do_lost = tcp_try_undo_partial(sk, pkts_acked); - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; + do_lost = tcp_try_undo_partial(sk, acked); break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); @@ -2755,14 +2754,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag, newly_acked_sacked); + tcp_try_to_open(sk, flag, prior_unsacked); return; } @@ -2784,7 +2781,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); + tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); } @@ -3268,9 +3265,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; - int prior_sacked = tp->sacked_out; - int pkts_acked = 0; - int previous_packets_out = 0; + const int prior_unsacked = tp->packets_out - tp->sacked_out; + int acked = 0; /* Number of packets newly acked */ /* If the ack is older than previous acks * then we can probably ignore it. @@ -3345,18 +3341,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - previous_packets_out = tp->packets_out; + acked = tp->packets_out; flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); - - pkts_acked = previous_packets_out - tp->packets_out; + acked -= tp->packets_out; if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } else { if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3378,8 +3373,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3401,8 +3396,8 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); -- cgit From 6a63df46a7363833a0dc0c431027f522b3487972 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:12 +0000 Subject: tcp: refactor undo functions Refactor and relocate various functions or variables to prepare the undo fix. Remove some unused function arguments. Rename tcp_undo_cwr to tcp_undo_cwnd_reduction to be consistent with the rest of CWR related function names. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 97 +++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 86b5fa72ff9e..fcb668d1860d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2243,10 +2243,23 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) +static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, + bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); + if (unmark_loss) { + struct sk_buff *skb; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; + } + tp->lost_out = 0; + tcp_clear_all_retrans_hints(tp); + } + if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2263,6 +2276,9 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; + + if (undo_ssthresh) + tp->undo_marker = 0; } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2282,14 +2298,13 @@ static bool tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(sk, true); + tcp_undo_cwnd_reduction(sk, true, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS_BH(sock_net(sk), mib_idx); - tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2309,8 +2324,7 @@ static void tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwr(sk, true); - tp->undo_marker = 0; + tcp_undo_cwnd_reduction(sk, true, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } } @@ -2344,60 +2358,20 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } -/* Undo during fast recovery after partial ACK. */ - -static int tcp_try_undo_partial(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); - - if (tcp_may_undo(tp)) { - /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. - */ - if (!tcp_any_retrans_done(sk)) - tp->retrans_stamp = 0; - - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - - DBGUNDO(sk, "Hoe"); - tcp_undo_cwr(sk, false); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = 0; - } - return failed; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { - struct sk_buff *skb; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - } - - tcp_clear_all_retrans_hints(tp); + tcp_undo_cwnd_reduction(sk, true, true); DBGUNDO(sk, "partial loss"); - tp->lost_out = 0; - tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - tp->undo_marker = 0; if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; @@ -2669,6 +2643,35 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) tcp_xmit_retransmit_queue(sk); } +/* Undo during fast recovery after partial ACK. */ +static bool tcp_try_undo_partial(struct sock *sk, int acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + /* Partial ACK arrived. Force Hoe's retransmit. */ + bool failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); + + if (tcp_may_undo(tp)) { + /* Plain luck! Hole if filled with delayed + * packet, rather than with a retransmit. + */ + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; + + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + + DBGUNDO(sk, "Hoe"); + tcp_undo_cwnd_reduction(sk, false, false); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + + /* So... Do not make Hoe's retransmit yet. + * If the first packet was delayed, the rest + * ones are most probably delayed as well. + */ + failed = false; + } + return failed; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2686,7 +2689,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && + bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); int fast_rexmit = 0; -- cgit From 7026b912f97d912476dff5465ed9a127be094208 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:13 +0000 Subject: tcp: fix undo on partial ack in recovery Upon detecting spurious fast retransmit via timestamps during recovery, use PRR to clock out new data packet instead of retransmission. Once all retransmission are proven spurious, the sender then reverts the cwnd reduction and congestion state to open or disorder. The current code does the opposite: it undoes cwnd as soon as any retransmission is spurious and continues to retransmit until all data are acked. This nullifies the point to undo the cwnd because the sender is still retransmistting spuriously. This patch fixes it. The undo_ssthresh argument of tcp_undo_cwnd_reductiuon() is no longer needed and is removed. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 59 +++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fcb668d1860d..c35b22751982 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2243,8 +2243,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, - bool unmark_loss) +static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); @@ -2268,7 +2267,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); - if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { + if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } @@ -2276,9 +2275,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; - - if (undo_ssthresh) - tp->undo_marker = 0; + tp->undo_marker = 0; } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2298,7 +2295,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwnd_reduction(sk, true, false); + tcp_undo_cwnd_reduction(sk, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else @@ -2324,7 +2321,7 @@ static void tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwnd_reduction(sk, true, false); + tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } } @@ -2364,7 +2361,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { - tcp_undo_cwnd_reduction(sk, true, true); + tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); @@ -2644,32 +2641,37 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) } /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, int acked) +static bool tcp_try_undo_partial(struct sock *sk, const int acked, + const int prior_unsacked) { struct tcp_sock *tp = tcp_sk(sk); - /* Partial ACK arrived. Force Hoe's retransmit. */ - bool failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); - if (tcp_may_undo(tp)) { + if (tp->undo_marker && tcp_packet_delayed(tp)) { /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + + /* We are getting evidence that the reordering degree is higher + * than we realized. If there are no retransmits out then we + * can undo. Otherwise we clock out new packets but do not + * mark more packets lost or retransmit more. + */ + if (tp->retrans_out) { + tcp_cwnd_reduction(sk, prior_unsacked, 0); + return true; + } + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - - DBGUNDO(sk, "Hoe"); - tcp_undo_cwnd_reduction(sk, false, false); + DBGUNDO(sk, "partial recovery"); + tcp_undo_cwnd_reduction(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = false; + tcp_try_keep_open(sk); + return true; } - return failed; + return false; } /* Process an event, which can update packets-in-flight not trivially. @@ -2742,8 +2744,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else - do_lost = tcp_try_undo_partial(sk, acked); + } else { + if (tcp_try_undo_partial(sk, acked, prior_unsacked)) + return; + /* Partial ACK arrived. Force fast retransmit. */ + do_lost = tcp_is_reno(tp) || + tcp_fackets_out(tp) > tp->reordering; + } break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); -- cgit From c7d9d6a185a7ea383b719b79c428d34ec1470275 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:14 +0000 Subject: tcp: undo on DSACK during recovery If the receiver supports DSACK, sender can detect false recoveries and revert cwnd reductions triggered by either severe network reordering or concurrent reordering and loss event. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c35b22751982..907311c9a012 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2315,7 +2315,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) } /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ -static void tcp_try_undo_dsack(struct sock *sk) +static bool tcp_try_undo_dsack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2323,7 +2323,9 @@ static void tcp_try_undo_dsack(struct sock *sk) DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + return true; } + return false; } /* We can clear retrans_stamp when there are no retransmissions in the @@ -2751,6 +2753,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, do_lost = tcp_is_reno(tp) || tcp_fackets_out(tp) > tp->reordering; } + if (tcp_try_undo_dsack(sk)) { + tcp_try_keep_open(sk); + return; + } break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); -- cgit From aef6de511a2aea78098e0d507ad7280d38b6b019 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 30 May 2013 20:36:11 +0000 Subject: sctp: Correct byte order of access to skb->{network, transport}_header Corrects an byte order conflict introduced by "sctp: Correct access to skb->{network, transport}_header". All the values in question are host byte order. Reported-by: Ben Hutchings Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index e328fe8f93f4..6533d81a638d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_association *asoc = NULL; struct sctp_transport *transport; struct inet_sock *inet; - __be16 saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); -- cgit From c3f1dbaf6e281642848b78fe101764170c15f168 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 31 May 2013 13:15:38 +0000 Subject: net: Update RFS target at poll for tcp/udp The current state of affairs is that read()/write() will setup RFS (Receive Flow Steering) for internet protocol sockets while poll()/epoll() does not. When poll() gets called with a TCP or UDP socket, we should update the flow target. This permits to RFS (if enabled) to select the appropriate CPU for following incoming packets. Note: Only connected UDP sockets can benefit from RFS. Signed-off-by: David Majnemer Signed-off-by: Eric Dumazet Cc: Paul Turner Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 ++ net/ipv4/udp.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f58594d5a85..b5d4ad988053 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -436,6 +436,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + sock_rps_record_flow(sk); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa5eff46d137..c7338ec79cc0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1967,6 +1967,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; + sock_rps_record_flow(sk); + /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) -- cgit From db8caf3dbc77599dc90f4ea0a803cd1d97116f30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 May 2013 11:18:10 +0000 Subject: gro: should aggregate frames without DF GRO on IPv4 doesn't aggregate frames if they don't have DF bit set. Some servers use IP_MTU_DISCOVER/IP_PMTUDISC_PROBE, so linux receivers are unable to aggregate this kind of traffic. The right thing to do is to allow aggregation as long as the DF bit has same value on all segments. bnx2x LRO does this correctly. Signed-off-by: Eric Dumazet Cc: Jerry Chu Cc: Herbert Xu Cc: Ben Hutchings Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b05ae96aec44..9c090c7daea1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1385,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out_unlock; id = ntohl(*(__be32 *)&iph->id); - flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1407,6 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | + ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; -- cgit From 938177e9f3e0238c1712210f7bb6def38a5c8d7f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 30 May 2013 16:39:29 +0000 Subject: netfilter: Correct calculation using skb->tail and skb-network_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->network_header will be an offset from head. This is corrected by using wrappers that ensure that calculations are always made using pointers. Reported-by: Stephen Rothwell Reported-by: Chen Gang Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/netfilter/nf_nat_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 5fea563afe30..85e20a919081 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -104,7 +104,7 @@ static void mangle_contents(struct sk_buff *skb, /* move post-replacement */ memmove(data + match_offset + rep_len, data + match_offset + match_len, - skb->tail - (skb->network_header + dataoff + + skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff + match_offset + match_len)); /* insert data from buffer */ -- cgit From 35d0461061f27eeb62de63174959edbbb9e434de Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 May 2013 15:16:05 +0800 Subject: net: clean up skb headers code commit 1a37e412a0225fcba5587 (net: Use 16bits for *_headers fields of struct skbuff) converts skb->*_header to u16, some #if NET_SKBUFF_DATA_USES_OFFSET are now useless, and to be safe, we could just use "X = (typeof(X)) ~0U;" as suggested by David. Cc: David S. Miller Cc: Simon Horman Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/skbuff.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f45de077ab9e..6b1b52c5593b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -199,9 +199,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) skb->truesize = sizeof(struct sk_buff); atomic_set(&skb->users, 1); -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = (__u16) ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; out: return skb; } @@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = (__u16) ~0U; - skb->transport_header = (__u16) ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -344,10 +340,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = (__u16) ~0U; - skb->transport_header = (__u16) ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); -- cgit From bf3d6a8f791b2a81279b9ce3201b4970f6fbe51a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 May 2013 23:48:15 +0000 Subject: iptunnel: specify protocol outside IP header Before this patch, ip_tunnel_xmit() was using the field protocol from the IP header passed into argument. There is no functional change, this patch prepares the support of IPv4 over IPv4 for module sit. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 4 ++-- net/ipv4/ipip.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a83591492dd..a982657d05e7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -429,7 +429,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, return; } - ip_tunnel_xmit(skb, dev, tnl_params); + ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } static netdev_tx_t ipgre_xmit(struct sk_buff *skb, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e4147ec1665a..b89095c1518f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -487,7 +487,7 @@ drop: EXPORT_SYMBOL_GPL(ip_tunnel_rcv); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const struct iphdr *tnl_params) + const struct iphdr *tnl_params, const u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; @@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; - iph->protocol = tnl_params->protocol; + iph->protocol = protocol; iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe5..9df7ecd393f2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -222,7 +222,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->encapsulation = 1; } - ip_tunnel_xmit(skb, dev, tiph); + ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; tx_error: -- cgit From 32b8a8e59c9c8fa56051d6e9ab2924e469ac4d92 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 May 2013 23:48:16 +0000 Subject: sit: add IPv4 over IPv4 support This patch adds the support of IPv4 over Ipv4 for the module sit. The gain of this feature is to be able to have 4in4 and 6in4 over the same interface instead of having one interface for 6in4 and another for 4in4 even if encapsulation addresses are the same. To avoid conflicting with ipip module, sit IPv4 over IPv4 protocol is registered with a smaller priority. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/sit.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 05a5df2febc9..06347dbd32c1 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -63,7 +63,7 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 2, + .priority = 3, }; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bbf..6b9c1f128eaf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -577,6 +577,10 @@ static int ipip6_rcv(struct sk_buff *skb) if (tunnel != NULL) { struct pcpu_tstats *tstats; + if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && + tunnel->parms.iph.protocol != 0) + goto out; + secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); @@ -629,6 +633,35 @@ out: return 0; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + +static int ipip_rcv(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip_tunnel *tunnel; + + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->saddr, iph->daddr); + if (tunnel != NULL) { + if (tunnel->parms.iph.protocol != IPPROTO_IPIP && + tunnel->parms.iph.protocol != 0) + goto drop; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + } + + return 1; + +drop: + kfree_skb(skb); + return 0; +} + /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. @@ -877,6 +910,43 @@ tx_error: return NETDEV_TX_OK; } +static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tiph = &tunnel->parms.iph; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); + return NETDEV_TX_OK; +} + +static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + ipip_tunnel_xmit(skb, dev); + break; + case htons(ETH_P_IPV6): + ipip6_tunnel_xmit(skb, dev); + break; + default: + goto tx_err; + } + + return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + +} + static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -1027,7 +1097,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || + if (p.iph.protocol != IPPROTO_IPV6 && + p.iph.protocol != IPPROTO_IPIP && + p.iph.protocol != 0) + goto done; + if (p.iph.version != 4 || p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) goto done; if (p.iph.ttl) @@ -1164,7 +1238,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, - .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, @@ -1232,6 +1306,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + u8 proto; + + if (!data) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} + static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { @@ -1268,6 +1358,10 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_FLAGS]) parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + } #ifdef CONFIG_IPV6_SIT_6RD @@ -1391,6 +1485,8 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + #ifdef CONFIG_IPV6_SIT_6RD /* IFLA_IPTUN_6RD_PREFIX */ nla_total_size(sizeof(struct in6_addr)) + @@ -1416,6 +1512,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; @@ -1445,6 +1542,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, #ifdef CONFIG_IPV6_SIT_6RD [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, @@ -1459,6 +1557,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, + .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, @@ -1471,6 +1570,12 @@ static struct xfrm_tunnel sit_handler __read_mostly = { .priority = 1, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = ipip_rcv, + .err_handler = ipip6_err, + .priority = 2, +}; + static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { int prio; @@ -1553,6 +1658,7 @@ static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ @@ -1569,9 +1675,14 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - pr_info("%s: can't add protocol\n", __func__); + pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: can't register ip4ip4\n", __func__); + goto xfrm_tunnel4_failed; + } err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1580,6 +1691,8 @@ out: return err; rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); -- cgit From 387aa65a89434abe3128d36d1a6fc3842c94905d Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Mon, 27 May 2013 20:46:31 +0000 Subject: ipv4: properly refresh rtable entries on pmtu/redirect events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 05ab86c5 (xfrm4: Invalidate all ipv4 routes on IPsec pmtu events). Flushing all cached entries is not needed. Instead, invalidate only the related next hop dsts to recheck for the added next hop exception where needed. This also fixes a subtle race due to bumping generation id's before updating the pmtu. Cc: Steffen Klassert Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 7 ++----- net/ipv4/esp4.c | 7 ++----- net/ipv4/ipcomp.c | 7 ++----- net/ipv4/route.c | 63 ++++++++++++++++++++++++++++++++----------------------- 4 files changed, 43 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2e7f1948216f..717902669d2f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4cfe34d4cc96..ab3d814bc80a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 59cb8c769056..826be4cb482a 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 550781a17b34..561a37833d86 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -594,11 +594,25 @@ static inline u32 fnhe_hashfun(__be32 daddr) return hval & (FNHE_HASH_SIZE - 1); } +static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) +{ + rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->dst.expires = fnhe->fnhe_expires; + + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; + rt->rt_uses_gateway = 1; + } +} + static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, u32 pmtu, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; + struct rtable *rt; + unsigned int i; int depth; u32 hval = fnhe_hashfun(daddr); @@ -627,8 +641,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; if (pmtu) { fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = expires; + fnhe->fnhe_expires = max(1UL, expires); } + /* Update all cached dsts too */ + rt = rcu_dereference(fnhe->fnhe_rth); + if (rt) + fill_route_from_fnhe(rt, fnhe); } else { if (depth > FNHE_RECLAIM_DEPTH) fnhe = fnhe_oldest(hash); @@ -644,6 +662,18 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_expires = expires; + + /* Exception created; mark the cached routes for the nexthop + * stale, so anyone caching it rechecks if this exception + * applies to them. + */ + for_each_possible_cpu(i) { + struct rtable __rcu **prt; + prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + rt = rcu_dereference(*prt); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + } } fnhe->fnhe_stamp = jiffies; @@ -917,13 +947,6 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); - } - rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); @@ -1063,11 +1086,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. * - * When a PMTU/redirect information update invalidates a - * route, this is indicated by setting obsolete to - * DST_OBSOLETE_KILL. + * When a PMTU/redirect information update invalidates a route, + * this is indicated by setting obsolete to DST_OBSOLETE_KILL or + * DST_OBSOLETE_DEAD by dst_free(). */ - if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) + if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; return dst; } @@ -1215,20 +1238,8 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; } - if (fnhe->fnhe_pmtu) { - unsigned long expires = fnhe->fnhe_expires; - unsigned long diff = expires - jiffies; - - if (time_before(jiffies, expires)) { - rt->rt_pmtu = fnhe->fnhe_pmtu; - dst_set_expires(&rt->dst, diff); - } - } - if (fnhe->fnhe_gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = fnhe->fnhe_gw; - rt->rt_uses_gateway = 1; - } else if (!rt->rt_gateway) + fill_route_from_fnhe(rt, fnhe); + if (!rt->rt_gateway) rt->rt_gateway = daddr; rcu_assign_pointer(fnhe->fnhe_rth, rt); -- cgit From f016229e303c294afac721de4cd4427e634950ea Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Mon, 27 May 2013 20:46:32 +0000 Subject: ipv4: rate limit updating of next hop exceptions with same pmtu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tunnel devices call update_pmtu for each packet sent, this causes contention on the fnhe_lock. Ignore the pmtu update if pmtu is not actually changed, and there is still plenty of time before the entry expires. Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 561a37833d86..a4082be1b9b4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -947,6 +947,10 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (rt->rt_pmtu == mtu && + time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) + return; + rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); -- cgit From 5aad1de5ea2c260b4cd2f70b70e146d55dbbc528 Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Mon, 27 May 2013 20:46:33 +0000 Subject: ipv4: use separate genid for next hop exceptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13d82bf5 (ipv4: Fix flushing of cached routing informations) added the support to flush learned pmtu information. However, using rt_genid is quite heavy as it is bumped on route add/change and multicast events amongst other places. These can happen quite often, especially if using dynamic routing protocols. While this is ok with routes (as they are just recreated locally), the pmtu information is learned from remote systems and the icmp notification can come with long delays. It is worthy to have separate genid to avoid excessive pmtu resets. Cc: Steffen Klassert Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/route.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a4082be1b9b4..403e28302869 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -658,6 +658,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } + fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; @@ -1236,8 +1237,11 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { + int genid = fnhe_genid(dev_net(rt->dst.dev)); struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); - if (orig && rt_is_expired(orig)) { + + if (fnhe->fnhe_genid != genid) { + fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; @@ -2443,8 +2447,11 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = (struct net *)__ctl->extra1; + if (write) { - rt_cache_flush((struct net *)__ctl->extra1); + rt_cache_flush(net); + fnhe_genid_bump(net); return 0; } @@ -2619,6 +2626,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->rt_genid, 0); + atomic_set(&net->fnhe_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit From 08578d8d4eb76b7afe314fa03abe167761462fe4 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 3 Jun 2013 00:23:25 +0000 Subject: ] icmp: fix icmp_unreach() comment. ICMP_PARAMETERPROB is handled by icmp_unreach(); This patch adds ICMP_PARAMETERPROB to the list of ICMP message types handled by icmp_unreach(). Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5d0d379b0152..2864ca33bedc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -658,7 +658,8 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * ICMP_PARAMETERPROB. */ static void icmp_unreach(struct sk_buff *skb) -- cgit From 9a99d4a50cb8ce516adf0f2436138d4c8e6e4535 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Jun 2013 15:00:52 +0000 Subject: icmp: avoid allocating large struct on stack struct icmp_bxm is a large struct, reduce stack usage by allocating it on heap. Cc: Eric Dumazet Cc: Joe Perches Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2864ca33bedc..5f7d11a45871 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -482,7 +482,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; - struct icmp_bxm icmp_param; + struct icmp_bxm *icmp_param; struct rtable *rt = skb_rtable(skb_in); struct ipcm_cookie ipc; struct flowi4 fl4; @@ -558,9 +558,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } + icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC); + if (!icmp_param) + return; + sk = icmp_xmit_lock(net); if (sk == NULL) - return; + goto out_free; /* * Construct source address and options. @@ -586,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -594,19 +598,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) * Prepare data for ICMP header. */ - icmp_param.data.icmph.type = type; - icmp_param.data.icmph.code = code; - icmp_param.data.icmph.un.gateway = info; - icmp_param.data.icmph.checksum = 0; - icmp_param.skb = skb_in; - icmp_param.offset = skb_network_offset(skb_in); + icmp_param->data.icmph.type = type; + icmp_param->data.icmph.code = code; + icmp_param->data.icmph.un.gateway = info; + icmp_param->data.icmph.checksum = 0; + icmp_param->skb = skb_in; + icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts.opt; + ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, - type, code, &icmp_param); + type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -618,19 +622,21 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) room = dst_mtu(&rt->dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; + room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); - icmp_param.data_len = skb_in->len - icmp_param.offset; - if (icmp_param.data_len > room) - icmp_param.data_len = room; - icmp_param.head_len = sizeof(struct icmphdr); + icmp_param->data_len = skb_in->len - icmp_param->offset; + if (icmp_param->data_len > room) + icmp_param->data_len = room; + icmp_param->head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); +out_free: + kfree(icmp_param); out:; } EXPORT_SYMBOL(icmp_send); -- cgit From ed405be5cb5e8e494e8dcd83b6eb1add34dc752a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Jun 2013 13:51:59 +0200 Subject: mac80211: fix sdata locking around __ieee80211_request_smps My cfg80211/mac80211 locking unification broke the sdata locking in ieee80211_set_power_mgmt, it needs to acquire the lock for __ieee80211_request_smps(). Add the locking. Reported-by: Jakub Kicinski Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9034da16cf1b..30622101d3b5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2378,7 +2378,9 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); + sdata_unlock(sdata); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); -- cgit From 866403a7bdd3941cbb4e2085d8ac368dcabe800c Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Fri, 31 May 2013 17:41:47 -0700 Subject: mac80211: don't check local mesh TTL on TX nl80211 has already verified the mesh TTL on setting the mesh config, so no need to check it again in mac80211. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 34be9336b5d1..4105d0ca963e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1790,12 +1790,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { - /* Do not send frames with mesh_ttl == 0 */ - sdata->u.mesh.mshstats.dropped_frames_ttl++; - goto fail_rcu; - } - if (!is_multicast_ether_addr(skb->data)) { struct sta_info *next_hop; bool mpp_lookup = true; -- cgit From e05ecccdf752122a439b03c3190458d2c8f0bac6 Mon Sep 17 00:00:00 2001 From: Jacob Minshall Date: Wed, 29 May 2013 14:32:36 -0700 Subject: mac80211: set mesh formation field properly Cap max peerings at 63 in accordance with IEEE-2012 8.4.2.100.7. Triggers a beacon regeneration every time the number of peerings changes. Previously this would only happen if the "accepting peerings" bit changed. Signed-off-by: Jacob Minshall Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 3 +-- net/mac80211/mesh.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index b3d1fdd46368..73a597bad6e0 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -274,8 +274,7 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, *pos++ = ifmsh->mesh_auth_id; /* Mesh Formation Info - number of neighbors */ neighbors = atomic_read(&ifmsh->estab_plinks); - /* Number of neighbor mesh STAs or 15 whichever is smaller */ - neighbors = (neighbors > 15) ? 15 : neighbors; + neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS); *pos++ = neighbors << 1; /* Mesh capability */ *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index da158774eebb..8b4d9a3e9eee 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -324,14 +324,14 @@ static inline u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; } static inline u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; } static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) -- cgit From 964dc9e2c3aaccacacd40640964a58544fb5769a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Jun 2013 17:25:34 +0200 Subject: cfg80211: take WoWLAN support information out of wiphy struct There's no need to take up the space for devices that don't support WoWLAN, and most drivers can even make the support data static const (except where it's modified at runtime.) Signed-off-by: Johannes Berg --- net/mac80211/main.c | 3 +-- net/wireless/core.c | 20 ++++++++++-------- net/wireless/nl80211.c | 56 ++++++++++++++++++++++++-------------------------- 3 files changed, 40 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1998f1475267..626c83c042d7 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -686,8 +686,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; #ifdef CONFIG_PM - if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) && - (!local->ops->suspend || !local->ops->resume)) + if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume)) return -EINVAL; #endif diff --git a/net/wireless/core.c b/net/wireless/core.c index 41cec1776f4f..f553b9484c1e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -449,8 +449,13 @@ int wiphy_register(struct wiphy *wiphy) u16 ifmodes = wiphy->interface_modes; #ifdef CONFIG_PM - if (WARN_ON((wiphy->wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && - !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) + if (WARN_ON(wiphy->wowlan && + (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + !(wiphy->wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) + return -EINVAL; + if (WARN_ON(wiphy->wowlan && + !wiphy->wowlan->flags && !wiphy->wowlan->n_patterns && + !wiphy->wowlan->tcp)) return -EINVAL; #endif @@ -540,12 +545,11 @@ int wiphy_register(struct wiphy *wiphy) } #ifdef CONFIG_PM - if (rdev->wiphy.wowlan.n_patterns) { - if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len || - rdev->wiphy.wowlan.pattern_min_len > - rdev->wiphy.wowlan.pattern_max_len)) - return -EINVAL; - } + if (WARN_ON(rdev->wiphy.wowlan && rdev->wiphy.wowlan->n_patterns && + (!rdev->wiphy.wowlan->pattern_min_len || + rdev->wiphy.wowlan->pattern_min_len > + rdev->wiphy.wowlan->pattern_max_len))) + return -EINVAL; #endif /* check and set up bitrates */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 31d265f36d2c..7ee9af3283a8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -908,7 +908,7 @@ nla_put_failure: static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { - const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan->tcp; struct nlattr *nl_tcp; if (!tcp) @@ -951,37 +951,37 @@ static int nl80211_send_wowlan(struct sk_buff *msg, { struct nlattr *nl_wowlan; - if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns) + if (!dev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; - if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && + if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; - if (dev->wiphy.wowlan.n_patterns) { + if (dev->wiphy.wowlan->n_patterns) { struct nl80211_wowlan_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan.n_patterns, - .min_pattern_len = dev->wiphy.wowlan.pattern_min_len, - .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, - .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset, + .max_patterns = dev->wiphy.wowlan->n_patterns, + .min_pattern_len = dev->wiphy.wowlan->pattern_min_len, + .max_pattern_len = dev->wiphy.wowlan->pattern_max_len, + .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, @@ -7580,8 +7580,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) void *hdr; u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && - !rdev->wiphy.wowlan.tcp) + if (!rdev->wiphy.wowlan) return -EOPNOTSUPP; if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { @@ -7654,7 +7653,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, u32 data_size, wake_size, tokens_size = 0, wake_mask_size; int err, port; - if (!rdev->wiphy.wowlan.tcp) + if (!rdev->wiphy.wowlan->tcp) return -EINVAL; err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, @@ -7674,16 +7673,16 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, return -EINVAL; data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); - if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + if (data_size > rdev->wiphy.wowlan->tcp->data_payload_max) return -EINVAL; if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > - rdev->wiphy.wowlan.tcp->data_interval_max || + rdev->wiphy.wowlan->tcp->data_interval_max || nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0) return -EINVAL; wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); - if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + if (wake_size > rdev->wiphy.wowlan->tcp->wake_payload_max) return -EINVAL; wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); @@ -7698,13 +7697,13 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (!tok->len || tokens_size % tok->len) return -EINVAL; - if (!rdev->wiphy.wowlan.tcp->tok) + if (!rdev->wiphy.wowlan->tcp->tok) return -EINVAL; - if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + if (tok->len > rdev->wiphy.wowlan->tcp->tok->max_len) return -EINVAL; - if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + if (tok->len < rdev->wiphy.wowlan->tcp->tok->min_len) return -EINVAL; - if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + if (tokens_size > rdev->wiphy.wowlan->tcp->tok->bufsize) return -EINVAL; if (tok->offset + tok->len > data_size) return -EINVAL; @@ -7712,7 +7711,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); - if (!rdev->wiphy.wowlan.tcp->seq) + if (!rdev->wiphy.wowlan->tcp->seq) return -EINVAL; if (seq->len == 0 || seq->len > 4) return -EINVAL; @@ -7793,12 +7792,11 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG]; struct cfg80211_wowlan new_triggers = {}; struct cfg80211_wowlan *ntrig; - struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan; + const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan; int err, i; bool prev_enabled = rdev->wiphy.wowlan_config; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && - !rdev->wiphy.wowlan.tcp) + if (!wowlan) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { -- cgit From d6d23de2786edca61fb9813ff7cdc7d2543d08a7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 4 Jun 2013 12:15:42 +0200 Subject: mac80211: add a tx control flag to indicate PS-Poll/uAPSD response Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a04c5671d7fd..b4297982d34a 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1132,6 +1132,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, * ends the poll/service period. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_PS_RESPONSE | IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; @@ -1269,7 +1270,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * STA may still remain is PS mode after this frame * exchange. */ - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_PS_RESPONSE; /* * Use MoreData flag to indicate whether there are -- cgit From 9c90a9f64c21b0a3983655c9c08cf98489057a43 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 12:46:03 +0200 Subject: nl80211: remove bogus genlmsg_end() error checking genlmsg_end() can't return an error since it returns the skb length so remove checks treating the return value as an error code. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ee9af3283a8..ce949e38178c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9848,7 +9848,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) @@ -9869,12 +9868,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return true; - } - + genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; @@ -10317,10 +10311,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10386,7 +10377,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_probe_status(dev, addr, cookie, acked); @@ -10408,11 +10398,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, (acked && nla_put_flag(msg, NL80211_ATTR_ACK))) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10478,7 +10464,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err, size = 200; + int size = 200; trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); @@ -10564,9 +10550,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, nla_nest_end(msg, reasons); } - err = genlmsg_end(msg, hdr); - if (err < 0) - goto free_msg; + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10586,7 +10570,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, reason_code); @@ -10609,11 +10592,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10671,7 +10650,6 @@ void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_ft_event(wiphy, netdev, ft_event); @@ -10697,11 +10675,7 @@ void cfg80211_ft_event(struct net_device *netdev, nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, ft_event->ric_ies); - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); -- cgit From ff40b425f04144771920b79672d6691910c7def7 Mon Sep 17 00:00:00 2001 From: Pontus Fuchs Date: Tue, 4 Jun 2013 12:44:52 +0200 Subject: mac80211: set IEEE80211_TX_CTL_REQ_TX_STATUS on nullframes The connection monitor needs to know the tx status of nullframes to work properly. Signed-off-by: Pontus Fuchs Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f44f4caa69ee..9950e13f641b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -880,6 +880,10 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_INTFL_OFFCHAN_TX_OK; + + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; -- cgit From 6ff57cf88807dd81300b5b9c623dc5eb6422b9f6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 May 2013 00:55:00 +0200 Subject: cfg80211/mac80211: clean up cfg80211 SME APIs Do some cleanups in the cfg80211 SME APIs, which are only used by mac80211. Most of these functions get a frame passed, and there isn't really any reason to export multiple functions as cfg80211 can check the frame type instead, do that. Additionally, the API functions have confusing names like cfg80211_send_...() which was meant to indicate that it sends an event to userspace, but gets a bit confusing when there's both TX and RX and they're not all clearly labeled. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 44 ++++++++++++++------------ net/mac80211/rx.c | 26 ++++++--------- net/wireless/mlme.c | 86 +++++++++++++++++++++++++++++++------------------- net/wireless/nl80211.c | 30 ++++++++---------- net/wireless/trace.h | 46 ++++++++++++++++++++------- 5 files changed, 134 insertions(+), 98 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9950e13f641b..df8170a80a56 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2155,7 +2155,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); sdata_unlock(sdata); } @@ -2302,7 +2303,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; } @@ -2337,7 +2338,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, * Report auth frame to user space for processing since another * round of Authentication frames is still needed. */ - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; } @@ -2354,7 +2355,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&sdata->local->sta_mtx); - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; out_err: mutex_unlock(&sdata->local->sta_mtx); @@ -2387,7 +2388,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); } @@ -2413,7 +2414,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -2711,7 +2712,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); cfg80211_put_bss(sdata->local->hw.wiphy, bss); - cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); + cfg80211_assoc_timeout(sdata->dev, mgmt->bssid); return; } sdata_info(sdata, "associated\n"); @@ -2724,7 +2725,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_assoc_data(sdata, true); } - cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, len); + cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -3117,8 +3118,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); - cfg80211_send_deauth(sdata->dev, deauth_buf, - sizeof(deauth_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, deauth_buf, + sizeof(deauth_buf)); return; } @@ -3236,7 +3237,8 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, tx, frame_buf); - cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); } static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) @@ -3427,7 +3429,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_auth_data(sdata, false); - cfg80211_send_auth_timeout(sdata->dev, bssid); + cfg80211_auth_timeout(sdata->dev, bssid); } } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) run_again(sdata, ifmgd->auth_data->timeout); @@ -3443,7 +3445,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_assoc_data(sdata, false); - cfg80211_send_assoc_timeout(sdata->dev, bssid); + cfg80211_assoc_timeout(sdata->dev, bssid); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(sdata, ifmgd->assoc_data->timeout); @@ -3992,8 +3994,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + sizeof(frame_buf)); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4055,8 +4057,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + sizeof(frame_buf)); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -4309,8 +4311,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, out: if (report_frame) - cfg80211_send_deauth(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4340,8 +4342,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, req->reason_code, !req->local_state_change, frame_buf); - cfg80211_send_disassoc(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bdd7b4a719e9..23dbcfc69b3b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1747,27 +1747,21 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_has_protected(fc) && ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && rx->key)) { - if (ieee80211_is_deauth(fc)) - cfg80211_send_unprot_deauth(rx->sdata->dev, - rx->skb->data, - rx->skb->len); - else if (ieee80211_is_disassoc(fc)) - cfg80211_send_unprot_disassoc(rx->sdata->dev, - rx->skb->data, - rx->skb->len); + if (ieee80211_is_deauth(fc) || + ieee80211_is_disassoc(fc)) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; } /* BIP does not use Protected field, so need to check MMIE */ if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && ieee80211_get_mmie_keyidx(rx->skb) < 0)) { - if (ieee80211_is_deauth(fc)) - cfg80211_send_unprot_deauth(rx->sdata->dev, - rx->skb->data, - rx->skb->len); - else if (ieee80211_is_disassoc(fc)) - cfg80211_send_unprot_disassoc(rx->sdata->dev, - rx->skb->data, - rx->skb->len); + if (ieee80211_is_deauth(fc) || + ieee80211_is_disassoc(fc)) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; } /* diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 7bde5d9c0003..4b9c2be0d56d 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -18,20 +18,7 @@ #include "rdev-ops.h" -void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_send_rx_auth(dev); - - nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); - cfg80211_sme_rx_auth(dev, buf, len); -} -EXPORT_SYMBOL(cfg80211_send_rx_auth); - -void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, +void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, const u8 *buf, size_t len) { u16 status_code; @@ -84,10 +71,10 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, status_code, status_code == WLAN_STATUS_SUCCESS, bss); } -EXPORT_SYMBOL(cfg80211_send_rx_assoc); +EXPORT_SYMBOL(cfg80211_rx_assoc_resp); -void cfg80211_send_deauth(struct net_device *dev, - const u8 *buf, size_t len) +static void cfg80211_process_deauth(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -96,9 +83,6 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *bssid = mgmt->bssid; bool was_current = false; - trace_cfg80211_send_deauth(dev); - ASSERT_WDEV_LOCK(wdev); - if (wdev->current_bss && ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { cfg80211_unhold_bss(wdev->current_bss); @@ -123,10 +107,9 @@ void cfg80211_send_deauth(struct net_device *dev, false, NULL); } } -EXPORT_SYMBOL(cfg80211_send_deauth); -void cfg80211_send_disassoc(struct net_device *dev, - const u8 *buf, size_t len) +static void cfg80211_process_disassoc(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -136,9 +119,6 @@ void cfg80211_send_disassoc(struct net_device *dev, u16 reason_code; bool from_ap; - trace_cfg80211_send_disassoc(dev); - ASSERT_WDEV_LOCK(wdev); - nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); if (wdev->sme_state != CFG80211_SME_CONNECTED) @@ -153,15 +133,38 @@ void cfg80211_send_disassoc(struct net_device *dev, } else WARN_ON(1); - reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); } -EXPORT_SYMBOL(cfg80211_send_disassoc); -void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct ieee80211_mgmt *mgmt = (void *)buf; + + ASSERT_WDEV_LOCK(wdev); + + trace_cfg80211_rx_mlme_mgmt(dev, buf, len); + + if (WARN_ON(len < 2)) + return; + + if (ieee80211_is_auth(mgmt->frame_control)) { + nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); + cfg80211_sme_rx_auth(dev, buf, len); + } else if (ieee80211_is_deauth(mgmt->frame_control)) { + cfg80211_process_deauth(dev, buf, len); + } else if (ieee80211_is_disassoc(mgmt->frame_control)) { + cfg80211_process_disassoc(dev, buf, len); + } +} +EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt); + +void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -175,9 +178,9 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); } -EXPORT_SYMBOL(cfg80211_send_auth_timeout); +EXPORT_SYMBOL(cfg80211_auth_timeout); -void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_assoc_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -191,7 +194,26 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); } -EXPORT_SYMBOL(cfg80211_send_assoc_timeout); +EXPORT_SYMBOL(cfg80211_assoc_timeout); + +void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct ieee80211_mgmt *mgmt = (void *)buf; + + ASSERT_WDEV_LOCK(wdev); + + trace_cfg80211_tx_mlme_mgmt(dev, buf, len); + + if (WARN_ON(len < 2)) + return; + + if (ieee80211_is_deauth(mgmt->frame_control)) + cfg80211_process_deauth(dev, buf, len); + else + cfg80211_process_disassoc(dev, buf, len); +} +EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt); void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, enum nl80211_key_type key_type, int key_id, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ce949e38178c..444f5effb77f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9313,31 +9313,27 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE, gfp); } -void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, - size_t len) +void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, + size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + const struct ieee80211_mgmt *mgmt = (void *)buf; + u32 cmd; - trace_cfg80211_send_unprot_deauth(dev); - nl80211_send_mlme_event(rdev, dev, buf, len, - NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC); -} -EXPORT_SYMBOL(cfg80211_send_unprot_deauth); + if (WARN_ON(len < 2)) + return; -void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, - size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + if (ieee80211_is_deauth(mgmt->frame_control)) + cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; + else + cmd = NL80211_CMD_UNPROT_DISASSOCIATE; - trace_cfg80211_send_unprot_disassoc(dev); - nl80211_send_mlme_event(rdev, dev, buf, len, - NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC); + trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); + nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC); } -EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); +EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt); static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 23fafeae8a10..e1534baf2ebb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,24 +1911,46 @@ TRACE_EVENT(cfg80211_send_rx_assoc, NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_deauth, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DECLARE_EVENT_CLASS(netdev_frame_event, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len), + TP_STRUCT__entry( + NETDEV_ENTRY + __dynamic_array(u8, frame, len) + ), + TP_fast_assign( + NETDEV_ASSIGN; + memcpy(__get_dynamic_array(frame), buf, len); + ), + TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", + NETDEV_PR_ARG, + le16_to_cpup((__le16 *)__get_dynamic_array(frame))) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_disassoc, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DEFINE_EVENT(netdev_frame_event, cfg80211_rx_unprot_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_deauth, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_disassoc, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +TRACE_EVENT(cfg80211_tx_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len), + TP_STRUCT__entry( + NETDEV_ENTRY + __dynamic_array(u8, frame, len) + ), + TP_fast_assign( + NETDEV_ASSIGN; + memcpy(__get_dynamic_array(frame), buf, len); + ), + TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", + NETDEV_PR_ARG, + le16_to_cpup((__le16 *)__get_dynamic_array(frame))) ); DECLARE_EVENT_CLASS(netdev_mac_evt, -- cgit From ceca7b7121795ef81bd598a240d53a925662d0c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 May 2013 00:55:45 +0200 Subject: cfg80211: separate internal SME implementation The current internal SME implementation in cfg80211 is very mixed up with the MLME handling, which has been causing issues for a long time. There are three things that the implementation has to provide: * a basic SME implementation for nl80211's connect() call (for drivers implementing auth/assoc, which is really just mac80211) and wireless extensions * MLME events for the userspace SME * SME events (connected, disconnected etc.) for all different SME implementation possibilities (driver, cfg80211 and userspace) To achieve these goals it isn't necessary to track the software SME's connection status outside of it's state (which is the part that caused many issues.) Instead, track it only in the SME data (wdev->conn) and in the general case only track whether the wdev is connected or not (via wdev->current_bss.) Also separate the internal implementation to not have callbacks from the SME events, but rather call it from the API functions that the driver (or rather mac80211) calls. This separates the code better. Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 - net/wireless/core.h | 30 +-- net/wireless/ibss.c | 6 - net/wireless/mlme.c | 191 ++++++----------- net/wireless/nl80211.c | 5 +- net/wireless/sme.c | 542 +++++++++++++++++++++++------------------------- net/wireless/wext-sme.c | 8 +- 7 files changed, 335 insertions(+), 448 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index f553b9484c1e..221e76b53a97 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -821,7 +821,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, pr_err("failed to add phy80211 symlink to netdev!\n"); } wdev->netdev = dev; - wdev->sme_state = CFG80211_SME_IDLE; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; diff --git a/net/wireless/core.h b/net/wireless/core.h index a65eaf8a84c1..a6b45bf00f33 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -308,11 +308,6 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, bool local_state_change); void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev); -void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - u16 status, bool wextev, - struct cfg80211_bss *bss); int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, int match_len); @@ -328,12 +323,19 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); -/* SME */ +/* SME events */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, struct cfg80211_cached_keys *connkeys, const u8 *prev_bssid); +void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, bool wextev, + struct cfg80211_bss *bss); +void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, + size_t ie_len, u16 reason, bool from_ap); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); @@ -344,21 +346,21 @@ void __cfg80211_roamed(struct wireless_dev *wdev, int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +/* SME implementation */ void cfg80211_conn_work(struct work_struct *work); -void cfg80211_sme_failed_assoc(struct wireless_dev *wdev); -bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev); +void cfg80211_sme_scan_done(struct net_device *dev); +bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status); +void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len); +void cfg80211_sme_disassoc(struct wireless_dev *wdev); +void cfg80211_sme_deauth(struct wireless_dev *wdev); +void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); +void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); -void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, - size_t ie_len, u16 reason, bool from_ap); -void cfg80211_sme_scan_done(struct net_device *dev); -void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len); -void cfg80211_sme_disassoc(struct net_device *dev, - struct cfg80211_internal_bss *bss); void __cfg80211_scan_done(struct work_struct *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); void __cfg80211_sched_scan_results(struct work_struct *wk); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 5449c5a6de84..39bff7d36768 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -43,7 +43,6 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - wdev->sme_state = CFG80211_SME_CONNECTED; cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, @@ -64,8 +63,6 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp) trace_cfg80211_ibss_joined(dev, bssid); - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING); - ev = kzalloc(sizeof(*ev), gfp); if (!ev) return; @@ -120,7 +117,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif - wdev->sme_state = CFG80211_SME_CONNECTING; err = cfg80211_can_use_chan(rdev, wdev, params->chandef.chan, params->channel_fixed @@ -134,7 +130,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, err = rdev_join_ibss(rdev, dev, params); if (err) { wdev->connect_keys = NULL; - wdev->sme_state = CFG80211_SME_IDLE; return err; } @@ -186,7 +181,6 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) } wdev->current_bss = NULL; - wdev->sme_state = CFG80211_SME_IDLE; wdev->ssid_len = 0; #ifdef CONFIG_CFG80211_WEXT if (!nowext) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 4b9c2be0d56d..a61a44bc6cf0 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -21,129 +21,85 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, const u8 *buf, size_t len) { - u16 status_code; struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u8 *ie = mgmt->u.assoc_resp.variable; int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); + u16 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); trace_cfg80211_send_rx_assoc(dev, bss); - status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); - /* * This is a bit of a hack, we don't notify userspace of * a (re-)association reply if we tried to send a reassoc * and got a reject -- we only try again with an assoc * frame instead of reassoc. */ - if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && - cfg80211_sme_failed_reassoc(wdev)) { + if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) { cfg80211_put_bss(wiphy, bss); return; } nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL); - - if (status_code != WLAN_STATUS_SUCCESS && wdev->conn) { - cfg80211_sme_failed_assoc(wdev); - /* - * do not call connect_result() now because the - * sme will schedule work that does it later. - */ - cfg80211_put_bss(wiphy, bss); - return; - } - - if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) { - /* - * This is for the userspace SME, the CONNECTING - * state will be changed to CONNECTED by - * __cfg80211_connect_result() below. - */ - wdev->sme_state = CFG80211_SME_CONNECTING; - } - - /* this consumes the bss reference */ + /* update current_bss etc., consumes the bss reference */ __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, status_code, status_code == WLAN_STATUS_SUCCESS, bss); } EXPORT_SYMBOL(cfg80211_rx_assoc_resp); -static void cfg80211_process_deauth(struct net_device *dev, +static void cfg80211_process_auth(struct wireless_dev *wdev, + const u8 *buf, size_t len) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL); + cfg80211_sme_rx_auth(wdev, buf, len); +} + +static void cfg80211_process_deauth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; - bool was_current = false; - - if (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - was_current = true; - } + u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); + bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); + nl80211_send_deauth(rdev, wdev->netdev, buf, len, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) { - u16 reason_code; - bool from_ap; - - reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); + if (!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) + return; - from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); - __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); - } else if (wdev->sme_state == CFG80211_SME_CONNECTING) { - __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); - } + __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); + cfg80211_sme_deauth(wdev); } -static void cfg80211_process_disassoc(struct net_device *dev, +static void cfg80211_process_disassoc(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; - u16 reason_code; - bool from_ap; + u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); + bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); + nl80211_send_disassoc(rdev, wdev->netdev, buf, len, GFP_KERNEL); - if (wdev->sme_state != CFG80211_SME_CONNECTED) + if (WARN_ON(!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) return; - if (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { - cfg80211_sme_disassoc(dev, wdev->current_bss); - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } else - WARN_ON(1); - - reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - - from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); - __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); + __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); + cfg80211_sme_disassoc(wdev); } void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (void *)buf; ASSERT_WDEV_LOCK(wdev); @@ -153,14 +109,12 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) if (WARN_ON(len < 2)) return; - if (ieee80211_is_auth(mgmt->frame_control)) { - nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); - cfg80211_sme_rx_auth(dev, buf, len); - } else if (ieee80211_is_deauth(mgmt->frame_control)) { - cfg80211_process_deauth(dev, buf, len); - } else if (ieee80211_is_disassoc(mgmt->frame_control)) { - cfg80211_process_disassoc(dev, buf, len); - } + if (ieee80211_is_auth(mgmt->frame_control)) + cfg80211_process_auth(wdev, buf, len); + else if (ieee80211_is_deauth(mgmt->frame_control)) + cfg80211_process_deauth(wdev, buf, len); + else if (ieee80211_is_disassoc(mgmt->frame_control)) + cfg80211_process_disassoc(wdev, buf, len); } EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt); @@ -173,10 +127,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) trace_cfg80211_send_auth_timeout(dev, addr); nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + cfg80211_sme_auth_timeout(wdev); } EXPORT_SYMBOL(cfg80211_auth_timeout); @@ -189,10 +140,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, const u8 *addr) trace_cfg80211_send_assoc_timeout(dev, addr); nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + cfg80211_sme_assoc_timeout(wdev); } EXPORT_SYMBOL(cfg80211_assoc_timeout); @@ -209,9 +157,9 @@ void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) return; if (ieee80211_is_deauth(mgmt->frame_control)) - cfg80211_process_deauth(dev, buf, len); + cfg80211_process_deauth(wdev, buf, len); else - cfg80211_process_disassoc(dev, buf, len); + cfg80211_process_disassoc(wdev, buf, len); } EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt); @@ -336,21 +284,12 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - bool was_connected = false; ASSERT_WDEV_LOCK(wdev); - if (wdev->current_bss && req->prev_bssid && - ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) { - /* - * Trying to reassociate: Allow this to proceed and let the old - * association to be dropped when the new one is completed. - */ - if (wdev->sme_state == CFG80211_SME_CONNECTED) { - was_connected = true; - wdev->sme_state = CFG80211_SME_CONNECTING; - } - } else if (wdev->current_bss) + if (wdev->current_bss && + (!req->prev_bssid || !ether_addr_equal(wdev->current_bss->pub.bssid, + req->prev_bssid))) return -EALREADY; cfg80211_oper_and_ht_capa(&req->ht_capa_mask, @@ -360,11 +299,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (!req->bss) { - if (was_connected) - wdev->sme_state = CFG80211_SME_CONNECTED; + if (!req->bss) return -ENOENT; - } err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); if (err) @@ -373,11 +309,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, err = rdev_assoc(rdev, dev, req); out: - if (err) { - if (was_connected) - wdev->sme_state = CFG80211_SME_CONNECTED; + if (err) cfg80211_put_bss(&rdev->wiphy, req->bss); - } return err; } @@ -398,8 +331,9 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (local_state_change && (!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + if (local_state_change && + (!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) return 0; return rdev_deauth(rdev, dev, &req); @@ -417,13 +351,11 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, .ie = ie, .ie_len = ie_len, }; + int err; ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_CONNECTED) - return -ENOTCONN; - - if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) + if (!wdev->current_bss) return -ENOTCONN; if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) @@ -431,7 +363,13 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, else return -ENOTCONN; - return rdev_disassoc(rdev, dev, &req); + err = rdev_disassoc(rdev, dev, &req); + if (err) + return err; + + /* driver should have reported the disassoc */ + WARN_ON(wdev->current_bss); + return 0; } void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, @@ -439,10 +377,6 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, { struct wireless_dev *wdev = dev->ieee80211_ptr; u8 bssid[ETH_ALEN]; - struct cfg80211_deauth_request req = { - .reason_code = WLAN_REASON_DEAUTH_LEAVING, - .bssid = bssid, - }; ASSERT_WDEV_LOCK(wdev); @@ -453,13 +387,8 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, return; memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); - rdev_deauth(rdev, dev, &req); - - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&rdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } + cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); } struct cfg80211_mgmt_registration { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 444f5effb77f..88e820b73674 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -800,12 +800,9 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: - if (!wdev->current_bss) - return -ENOLINK; - break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->sme_state != CFG80211_SME_CONNECTED) + if (!wdev->current_bss) return -ENOLINK; break; default: diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 81be95f3be74..ae7e2cbf45cb 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1,5 +1,7 @@ /* - * SME code for cfg80211's connect emulation. + * SME code for cfg80211 + * both driver SME event handling and the SME implementation + * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg * Copyright (C) 2009 Intel Corporation. All rights reserved. @@ -18,18 +20,24 @@ #include "reg.h" #include "rdev-ops.h" +/* + * Software SME in cfg80211, using auth/assoc/deauth calls to the + * driver. This is is for implementing nl80211's connect/disconnect + * and wireless extensions (if configured.) + */ + struct cfg80211_conn { struct cfg80211_connect_params params; /* these are sub-states of the _CONNECTING sme_state */ enum { - CFG80211_CONN_IDLE, CFG80211_CONN_SCANNING, CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, - CFG80211_CONN_DEAUTH_ASSOC_FAIL, + CFG80211_CONN_DEAUTH, + CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 *ie; @@ -37,39 +45,16 @@ struct cfg80211_conn { bool auto_auth, prev_bssid_valid; }; -static bool cfg80211_is_all_idle(void) +static void cfg80211_sme_free(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev; - struct wireless_dev *wdev; - bool is_all_idle = true; - - /* - * All devices must be idle as otherwise if you are actively - * scanning some new beacon hints could be learned and would - * count as new regulatory hints. - */ - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry(wdev, &rdev->wdev_list, list) { - wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) - is_all_idle = false; - wdev_unlock(wdev); - } - } - - return is_all_idle; -} + if (!wdev->conn) + return; -static void disconnect_work(struct work_struct *work) -{ - rtnl_lock(); - if (cfg80211_is_all_idle()) - regulatory_hint_disconnect(); - rtnl_unlock(); + kfree(wdev->conn->ie); + kfree(wdev->conn); + wdev->conn = NULL; } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); - static int cfg80211_conn_scan(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -164,6 +149,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) params = &wdev->conn->params; switch (wdev->conn->state) { + case CFG80211_CONN_SCANNING: + /* didn't find it during scan ... */ + return -ENOENT; case CFG80211_CONN_SCAN_AGAIN: return cfg80211_conn_scan(wdev); case CFG80211_CONN_AUTHENTICATE_NEXT: @@ -200,12 +188,11 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) WLAN_REASON_DEAUTH_LEAVING, false); return err; - case CFG80211_CONN_DEAUTH_ASSOC_FAIL: + case CFG80211_CONN_DEAUTH: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); - /* return an error so that we call __cfg80211_connect_result() */ - return -EINVAL; + return 0; default: return 0; } @@ -229,7 +216,8 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); continue; } - if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) { + if (!wdev->conn || + wdev->conn->state == CFG80211_CONN_CONNECTED) { wdev_unlock(wdev); continue; } @@ -237,12 +225,14 @@ void cfg80211_conn_work(struct work_struct *work) memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); bssid = bssid_buf; } - if (cfg80211_conn_do_work(wdev)) + if (cfg80211_conn_do_work(wdev)) { __cfg80211_connect_result( wdev->netdev, bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); + cfg80211_sme_free(wdev); + } wdev_unlock(wdev); } @@ -286,9 +276,6 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - if (!wdev->conn) return; @@ -297,20 +284,10 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) return; bss = cfg80211_get_conn_bss(wdev); - if (bss) { + if (bss) cfg80211_put_bss(&rdev->wiphy, bss); - } else { - /* not found */ - if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) - schedule_work(&rdev->conn_work); - else - __cfg80211_connect_result( - wdev->netdev, - wdev->conn->params.bssid, - NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); - } + else + schedule_work(&rdev->conn_work); } void cfg80211_sme_scan_done(struct net_device *dev) @@ -322,10 +299,8 @@ void cfg80211_sme_scan_done(struct net_device *dev) wdev_unlock(wdev); } -void cfg80211_sme_rx_auth(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; @@ -333,11 +308,7 @@ void cfg80211_sme_rx_auth(struct net_device *dev, ASSERT_WDEV_LOCK(wdev); - /* should only RX auth frames when connecting */ - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - - if (WARN_ON(!wdev->conn)) + if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED) return; if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG && @@ -366,46 +337,226 @@ void cfg80211_sme_rx_auth(struct net_device *dev, wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; schedule_work(&rdev->conn_work); } else if (status_code != WLAN_STATUS_SUCCESS) { - __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0, + __cfg80211_connect_result(wdev->netdev, mgmt->bssid, + NULL, 0, NULL, 0, status_code, false, NULL); - } else if (wdev->sme_state == CFG80211_SME_CONNECTING && - wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { + } else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; schedule_work(&rdev->conn_work); } } -bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev) +bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) { - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - if (WARN_ON(!wdev->conn)) + if (!wdev->conn) return false; - if (!wdev->conn->prev_bssid_valid) + if (status == WLAN_STATUS_SUCCESS) { + wdev->conn->state = CFG80211_CONN_CONNECTED; return false; + } - /* - * Some stupid APs don't accept reassoc, so we - * need to fall back to trying regular assoc. - */ - wdev->conn->prev_bssid_valid = false; - wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; + if (wdev->conn->prev_bssid_valid) { + /* + * Some stupid APs don't accept reassoc, so we + * need to fall back to trying regular assoc; + * return true so no event is sent to userspace. + */ + wdev->conn->prev_bssid_valid = false; + wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; + schedule_work(&rdev->conn_work); + return true; + } + + wdev->conn->state = CFG80211_CONN_DEAUTH; schedule_work(&rdev->conn_work); + return false; +} - return true; +void cfg80211_sme_deauth(struct wireless_dev *wdev) +{ + cfg80211_sme_free(wdev); } -void cfg80211_sme_failed_assoc(struct wireless_dev *wdev) +void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + cfg80211_sme_free(wdev); +} - wdev->conn->state = CFG80211_CONN_DEAUTH_ASSOC_FAIL; +void cfg80211_sme_disassoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_DEAUTH; schedule_work(&rdev->conn_work); } +void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) +{ + cfg80211_sme_disassoc(wdev); +} + +static int cfg80211_sme_connect(struct wireless_dev *wdev, + struct cfg80211_connect_params *connect, + const u8 *prev_bssid) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_bss *bss; + int err; + + if (!rdev->ops->auth || !rdev->ops->assoc) + return -EOPNOTSUPP; + + if (wdev->current_bss) + return -EALREADY; + + if (WARN_ON(wdev->conn)) + return -EINPROGRESS; + + wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); + if (!wdev->conn) + return -ENOMEM; + + /* + * Copy all parameters, and treat explicitly IEs, BSSID, SSID. + */ + memcpy(&wdev->conn->params, connect, sizeof(*connect)); + if (connect->bssid) { + wdev->conn->params.bssid = wdev->conn->bssid; + memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); + } + + if (connect->ie) { + wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, + GFP_KERNEL); + wdev->conn->params.ie = wdev->conn->ie; + if (!wdev->conn->ie) { + kfree(wdev->conn); + wdev->conn = NULL; + return -ENOMEM; + } + } + + if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { + wdev->conn->auto_auth = true; + /* start with open system ... should mostly work */ + wdev->conn->params.auth_type = + NL80211_AUTHTYPE_OPEN_SYSTEM; + } else { + wdev->conn->auto_auth = false; + } + + wdev->conn->params.ssid = wdev->ssid; + wdev->conn->params.ssid_len = connect->ssid_len; + + /* see if we have the bss already */ + bss = cfg80211_get_conn_bss(wdev); + + if (prev_bssid) { + memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); + wdev->conn->prev_bssid_valid = true; + } + + /* we're good if we have a matching bss struct */ + if (bss) { + wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; + err = cfg80211_conn_do_work(wdev); + cfg80211_put_bss(wdev->wiphy, bss); + } else { + /* otherwise we'll need to scan for the AP first */ + err = cfg80211_conn_scan(wdev); + + /* + * If we can't scan right now, then we need to scan again + * after the current scan finished, since the parameters + * changed (unless we find a good AP anyway). + */ + if (err == -EBUSY) { + err = 0; + wdev->conn->state = CFG80211_CONN_SCAN_AGAIN; + } + } + + if (err) + cfg80211_sme_free(wdev); + + return err; +} + +static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int err; + + if (!wdev->conn) + return 0; + + if (!rdev->ops->deauth) + return -EOPNOTSUPP; + + if (wdev->conn->state == CFG80211_CONN_SCANNING || + wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) { + err = 0; + goto out; + } + + /* wdev->conn->params.bssid must be set if > SCANNING */ + err = cfg80211_mlme_deauth(rdev, wdev->netdev, + wdev->conn->params.bssid, + NULL, 0, reason, false); + out: + cfg80211_sme_free(wdev); + return err; +} + +/* + * code shared for in-device and software SME + */ + +static bool cfg80211_is_all_idle(void) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + bool is_all_idle = true; + + /* + * All devices must be idle as otherwise if you are actively + * scanning some new beacon hints could be learned and would + * count as new regulatory hints. + */ + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { + wdev_lock(wdev); + if (wdev->conn || wdev->current_bss) + is_all_idle = false; + wdev_unlock(wdev); + } + } + + return is_all_idle; +} + +static void disconnect_work(struct work_struct *work) +{ + rtnl_lock(); + if (cfg80211_is_all_idle()) + regulatory_hint_disconnect(); + rtnl_unlock(); +} + +static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); + + +/* + * API calls for drivers implementing connect/disconnect and + * SME event handling + */ + void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, @@ -424,9 +575,6 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, @@ -463,15 +611,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = NULL; } - if (wdev->conn) - wdev->conn->state = CFG80211_CONN_IDLE; - if (status != WLAN_STATUS_SUCCESS) { - wdev->sme_state = CFG80211_SME_IDLE; - if (wdev->conn) - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; kfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; @@ -480,21 +620,16 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } if (!bss) - bss = cfg80211_get_bss(wdev->wiphy, - wdev->conn ? wdev->conn->params.channel : - NULL, - bssid, + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) return; cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - wdev->sme_state = CFG80211_SME_CONNECTED; cfg80211_upload_connect_keys(wdev); rcu_read_lock(); @@ -530,8 +665,6 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING); - ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); if (!ev) return; @@ -572,13 +705,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) goto out; - if (wdev->sme_state != CFG80211_SME_CONNECTED) - goto out; - - /* internal error -- how did we get to CONNECTED w/o BSS? */ - if (WARN_ON(!wdev->current_bss)) { + if (WARN_ON(!wdev->current_bss)) goto out; - } cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); @@ -628,8 +756,6 @@ void cfg80211_roamed(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); @@ -651,8 +777,6 @@ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - if (WARN_ON(!bss)) return; @@ -694,25 +818,14 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (wdev->sme_state != CFG80211_SME_CONNECTED) - return; - if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } wdev->current_bss = NULL; - wdev->sme_state = CFG80211_SME_IDLE; wdev->ssid_len = 0; - if (wdev->conn) { - kfree(wdev->conn->ie); - wdev->conn->ie = NULL; - kfree(wdev->conn); - wdev->conn = NULL; - } - nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); /* @@ -741,8 +854,6 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - ev = kzalloc(sizeof(*ev) + ie_len, gfp); if (!ev) return; @@ -760,6 +871,9 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, } EXPORT_SYMBOL(cfg80211_disconnected); +/* + * API calls for nl80211/wext compatibility code + */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, @@ -767,14 +881,10 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, const u8 *prev_bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_bss *bss = NULL; int err; ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) - return -EALREADY; - if (WARN_ON(wdev->connect_keys)) { kfree(wdev->connect_keys); wdev->connect_keys = NULL; @@ -810,105 +920,22 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, } } - if (!rdev->ops->connect) { - if (!rdev->ops->auth || !rdev->ops->assoc) - return -EOPNOTSUPP; - - if (WARN_ON(wdev->conn)) - return -EINPROGRESS; - - wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); - if (!wdev->conn) - return -ENOMEM; - - /* - * Copy all parameters, and treat explicitly IEs, BSSID, SSID. - */ - memcpy(&wdev->conn->params, connect, sizeof(*connect)); - if (connect->bssid) { - wdev->conn->params.bssid = wdev->conn->bssid; - memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); - } + wdev->connect_keys = connkeys; + memcpy(wdev->ssid, connect->ssid, connect->ssid_len); + wdev->ssid_len = connect->ssid_len; - if (connect->ie) { - wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, - GFP_KERNEL); - wdev->conn->params.ie = wdev->conn->ie; - if (!wdev->conn->ie) { - kfree(wdev->conn); - wdev->conn = NULL; - return -ENOMEM; - } - } - - if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { - wdev->conn->auto_auth = true; - /* start with open system ... should mostly work */ - wdev->conn->params.auth_type = - NL80211_AUTHTYPE_OPEN_SYSTEM; - } else { - wdev->conn->auto_auth = false; - } - - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; - wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = connect->ssid_len; - - /* see if we have the bss already */ - bss = cfg80211_get_conn_bss(wdev); - - wdev->sme_state = CFG80211_SME_CONNECTING; - wdev->connect_keys = connkeys; - - if (prev_bssid) { - memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); - wdev->conn->prev_bssid_valid = true; - } - - /* we're good if we have a matching bss struct */ - if (bss) { - wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; - err = cfg80211_conn_do_work(wdev); - cfg80211_put_bss(wdev->wiphy, bss); - } else { - /* otherwise we'll need to scan for the AP first */ - err = cfg80211_conn_scan(wdev); - /* - * If we can't scan right now, then we need to scan again - * after the current scan finished, since the parameters - * changed (unless we find a good AP anyway). - */ - if (err == -EBUSY) { - err = 0; - wdev->conn->state = CFG80211_CONN_SCAN_AGAIN; - } - } - if (err) { - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; - wdev->sme_state = CFG80211_SME_IDLE; - wdev->connect_keys = NULL; - wdev->ssid_len = 0; - } - - return err; - } else { - wdev->sme_state = CFG80211_SME_CONNECTING; - wdev->connect_keys = connkeys; + if (!rdev->ops->connect) + err = cfg80211_sme_connect(wdev, connect, prev_bssid); + else err = rdev_connect(rdev, dev, connect); - if (err) { - wdev->connect_keys = NULL; - wdev->sme_state = CFG80211_SME_IDLE; - return err; - } - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; - - return 0; + if (err) { + wdev->connect_keys = NULL; + wdev->ssid_len = 0; + return err; } + + return 0; } int cfg80211_disconnect(struct cfg80211_registered_device *rdev, @@ -919,78 +946,17 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state == CFG80211_SME_IDLE) - return -EINVAL; - kfree(wdev->connect_keys); wdev->connect_keys = NULL; - if (!rdev->ops->disconnect) { - if (!rdev->ops->deauth) - return -EOPNOTSUPP; - - /* was it connected by userspace SME? */ - if (!wdev->conn) { - cfg80211_mlme_down(rdev, dev); - goto disconnect; - } - - if (wdev->sme_state == CFG80211_SME_CONNECTING && - (wdev->conn->state == CFG80211_CONN_SCANNING || - wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)) { - wdev->sme_state = CFG80211_SME_IDLE; - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; - wdev->ssid_len = 0; - return 0; - } - - /* wdev->conn->params.bssid must be set if > SCANNING */ - err = cfg80211_mlme_deauth(rdev, dev, - wdev->conn->params.bssid, - NULL, 0, reason, false); - if (err) - return err; + if (wdev->conn) { + err = cfg80211_sme_disconnect(wdev, reason); + } else if (!rdev->ops->disconnect) { + cfg80211_mlme_down(rdev, dev); + err = 0; } else { err = rdev_disconnect(rdev, dev, reason); - if (err) - return err; } - disconnect: - if (wdev->sme_state == CFG80211_SME_CONNECTED) - __cfg80211_disconnected(dev, NULL, 0, 0, false); - else if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - wextev, NULL); - - return 0; -} - -void cfg80211_sme_disassoc(struct net_device *dev, - struct cfg80211_internal_bss *bss) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - u8 bssid[ETH_ALEN]; - - ASSERT_WDEV_LOCK(wdev); - - if (!wdev->conn) - return; - - if (wdev->conn->state == CFG80211_CONN_IDLE) - return; - - /* - * Ok, so the association was made by this SME -- we don't - * want it any more so deauthenticate too. - */ - - memcpy(bssid, bss->pub.bssid, ETH_ALEN); - - cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + return err; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index a53f8404f451..14c9a2583ba0 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -89,7 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { bool event = true; if (wdev->wext.connect.channel == chan) { @@ -188,7 +188,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = 0; - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { bool event = true; if (wdev->wext.connect.ssid && len && @@ -277,7 +277,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { err = 0; /* both automatic */ if (!bssid && !wdev->wext.connect.bssid) @@ -364,7 +364,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, wdev->wext.ie = ie; wdev->wext.ie_len = ie_len; - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, false); if (err) -- cgit From 3430140ad9da9ec1caaf800af6b0378351919f9c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 14:35:07 +0200 Subject: regulatory: use proper enum return value get_reg_request_treatment() returns 0 in one case but is defined to return an enum, use the proper value REG_REQ_OK. Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e1d6749234c6..5a24c986f34b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1345,7 +1345,7 @@ get_reg_request_treatment(struct wiphy *wiphy, return REG_REQ_OK; return REG_REQ_ALREADY_SET; } - return 0; + return REG_REQ_OK; case NL80211_REGDOM_SET_BY_DRIVER: if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { if (regdom_changes(pending_request->alpha2)) -- cgit From 17ef66afc0bdbbdc5c526db5e24bdd2dc3df1205 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 31 May 2013 15:05:48 +0000 Subject: net: ipv6: Unify {raw,udp}6_sock_seq_show. udp6_sock_seq_show and raw6_sock_seq_show are identical, except the UDP version displays ports and the raw version displays the protocol. Refactor most of the code in these two functions into a new common ip6_dgram_sock_seq_show function, in preparation for using it to display ICMPv6 sockets as well. Also reduce the indentation in parts of include/net/transp_v6.h to improve readability. Compiles and displays reasonable results with CONFIG_IPV6={n,m,y} Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 27 +++++++++++++++++++++++++++ net/ipv6/raw.c | 45 ++++++++------------------------------------- net/ipv6/udp.c | 49 +++++++++---------------------------------------- 3 files changed, 44 insertions(+), 77 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4b56cbbc7890..197e6f4a2b74 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -879,3 +879,30 @@ exit_f: return err; } EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); + +void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, + __u16 srcp, __u16 destp, int bucket) +{ + struct ipv6_pinfo *np = inet6_sk(sp); + const struct in6_addr *dest, *src; + + dest = &np->daddr; + src = &np->rcv_saddr; + seq_printf(seq, + "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", + bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp, + sp->sk_state, + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, + sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops)); +} diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4f8886aa8429..c45f7a5c36e9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1227,45 +1227,16 @@ struct proto rawv6_prot = { }; #ifdef CONFIG_PROC_FS -static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) -{ - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = 0; - srcp = inet_sk(sp)->inet_num; - seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - i, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); -} - static int raw6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + struct sock *sp = v; + __u16 srcp = inet_sk(sp)->inet_num; + ip6_dgram_sock_seq_show(seq, v, srcp, 0, + raw_seq_private(seq)->bucket); + } return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..b5808539cd5c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1359,48 +1359,17 @@ static const struct inet6_protocol udpv6_protocol = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS - -static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = ntohs(inet->inet_dport); - srcp = ntohs(inet->inet_sport); - seq_printf(seq, - "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - bucket, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); -} - int udp6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct udp_iter_state *)seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } return 0; } -- cgit From 8cc785f6f429c2a3fb81745dc142cbd72a462c4a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 31 May 2013 15:05:49 +0000 Subject: net: ipv4: make the ping /proc code AF-independent Introduce a ping_seq_afinfo structure (similar to its UDP equivalent) and use it to make some of the ping /proc functions address-family independent. Rename the remaining ping /proc functions from ping_* to ping_v4_*. Compiles and displays reasonable results with CONFIG_IPV6={n,m,y} Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 73 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 71f6ad02fa67..8c2da9b8cd86 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1001,7 +1001,8 @@ static struct sock *ping_get_first(struct seq_file *seq, int start) continue; sk_nulls_for_each(sk, node, hslot) { - if (net_eq(sock_net(sk), net)) + if (net_eq(sock_net(sk), net) && + sk->sk_family == state->family) goto found; } } @@ -1034,16 +1035,23 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *ping_seq_start(struct seq_file *seq, loff_t *pos) +static void *ping_seq_start(struct seq_file *seq, loff_t *pos, + sa_family_t family) { struct ping_iter_state *state = seq->private; state->bucket = 0; + state->family = family; read_lock_bh(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET); +} + static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -1062,7 +1070,7 @@ static void ping_seq_stop(struct seq_file *seq, void *v) read_unlock_bh(&ping_table.lock); } -static void ping_format_sock(struct sock *sp, struct seq_file *f, +static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) { struct inet_sock *inet = inet_sk(sp); @@ -1083,7 +1091,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, atomic_read(&sp->sk_drops), len); } -static int ping_seq_show(struct seq_file *seq, void *v) +static int ping_v4_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", @@ -1094,22 +1102,23 @@ static int ping_seq_show(struct seq_file *seq, void *v) struct ping_iter_state *state = seq->private; int len; - ping_format_sock(v, seq, state->bucket, &len); + ping_v4_format_sock(v, seq, state->bucket, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } return 0; } -static const struct seq_operations ping_seq_ops = { - .show = ping_seq_show, - .start = ping_seq_start, +static const struct seq_operations ping_v4_seq_ops = { + .show = ping_v4_seq_show, + .start = ping_v4_seq_start, .next = ping_seq_next, .stop = ping_seq_stop, }; static int ping_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ping_seq_ops, + struct ping_seq_afinfo *afinfo = PDE_DATA(inode); + return seq_open_net(inode, file, &afinfo->seq_ops, sizeof(struct ping_iter_state)); } @@ -1120,46 +1129,58 @@ static const struct file_operations ping_seq_fops = { .release = seq_release_net, }; -static int ping_proc_register(struct net *net) +static struct ping_seq_afinfo ping_v4_seq_afinfo = { + .name = "icmp", + .family = AF_INET, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v4_seq_start, + .show = ping_v4_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; + +static int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; - int rc = 0; - - p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops); + p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + afinfo->seq_fops, afinfo); if (!p) - rc = -ENOMEM; - return rc; + return -ENOMEM; + return 0; } -static void ping_proc_unregister(struct net *net) +static void ping_proc_unregister(struct net *net, + struct ping_seq_afinfo *afinfo) { - remove_proc_entry("icmp", net->proc_net); + remove_proc_entry(afinfo->name, net->proc_net); } -static int __net_init ping_proc_init_net(struct net *net) +static int __net_init ping_v4_proc_init_net(struct net *net) { - return ping_proc_register(net); + return ping_proc_register(net, &ping_v4_seq_afinfo); } -static void __net_exit ping_proc_exit_net(struct net *net) +static void __net_exit ping_v4_proc_exit_net(struct net *net) { - ping_proc_unregister(net); + ping_proc_unregister(net, &ping_v4_seq_afinfo); } -static struct pernet_operations ping_net_ops = { - .init = ping_proc_init_net, - .exit = ping_proc_exit_net, +static struct pernet_operations ping_v4_net_ops = { + .init = ping_v4_proc_init_net, + .exit = ping_v4_proc_exit_net, }; int __init ping_proc_init(void) { - return register_pernet_subsys(&ping_net_ops); + return register_pernet_subsys(&ping_v4_net_ops); } void ping_proc_exit(void) { - unregister_pernet_subsys(&ping_net_ops); + unregister_pernet_subsys(&ping_v4_net_ops); } #endif -- cgit From d862e546142328d18377a4704be97f2ae301847a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 31 May 2013 15:05:50 +0000 Subject: net: ipv6: Implement /proc/net/icmp6. The format is based on /proc/net/icmp and /proc/net/{udp,raw}6. Compiles and displays reasonable results with CONFIG_IPV6={n,m,y} Couldn't figure out how to test without CONFIG_PROC_FS enabled. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 21 +++++++----- net/ipv6/ping.c | 102 +++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 91 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8c2da9b8cd86..3552a45a6f86 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1035,8 +1035,7 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *ping_seq_start(struct seq_file *seq, loff_t *pos, - sa_family_t family) +void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) { struct ping_iter_state *state = seq->private; state->bucket = 0; @@ -1046,13 +1045,14 @@ static void *ping_seq_start(struct seq_file *seq, loff_t *pos, return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +EXPORT_SYMBOL_GPL(ping_seq_start); static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) { return ping_seq_start(seq, pos, AF_INET); } -static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) +void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -1064,11 +1064,13 @@ static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } +EXPORT_SYMBOL_GPL(ping_seq_next); -static void ping_seq_stop(struct seq_file *seq, void *v) +void ping_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&ping_table.lock); } +EXPORT_SYMBOL_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) @@ -1122,12 +1124,13 @@ static int ping_seq_open(struct inode *inode, struct file *file) sizeof(struct ping_iter_state)); } -static const struct file_operations ping_seq_fops = { +const struct file_operations ping_seq_fops = { .open = ping_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, }; +EXPORT_SYMBOL_GPL(ping_seq_fops); static struct ping_seq_afinfo ping_v4_seq_afinfo = { .name = "icmp", @@ -1141,7 +1144,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = { }, }; -static int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) +int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, @@ -1150,13 +1153,13 @@ static int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) return -ENOMEM; return 0; } +EXPORT_SYMBOL_GPL(ping_proc_register); -static void ping_proc_unregister(struct net *net, - struct ping_seq_afinfo *afinfo) +void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo) { remove_proc_entry(afinfo->name, net->proc_net); } - +EXPORT_SYMBOL_GPL(ping_proc_unregister); static int __net_init ping_v4_proc_init_net(struct net *net) { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a6462d657c15..62ac5f2e0aaf 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -78,29 +78,6 @@ int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int __init pingv6_init(void) -{ - pingv6_ops.ipv6_recv_error = ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; - pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; - pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; - pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; - return inet6_register_protosw(&pingv6_protosw); -} - -/* This never gets called because it's not possible to unload the ipv6 module, - * but just in case. - */ -void pingv6_exit(void) -{ - pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; - pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; - pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; - pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; - inet6_unregister_protosw(&pingv6_protosw); -} - int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -214,3 +191,82 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return err; } + +#ifdef CONFIG_PROC_FS +static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET6); +} + +int ping_v6_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct ping_iter_state *) seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } + return 0; +} + +static struct ping_seq_afinfo ping_v6_seq_afinfo = { + .name = "icmp6", + .family = AF_INET6, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v6_seq_start, + .show = ping_v6_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; + +static int __net_init ping_v6_proc_init_net(struct net *net) +{ + return ping_proc_register(net, &ping_v6_seq_afinfo); +} + +static void __net_init ping_v6_proc_exit_net(struct net *net) +{ + return ping_proc_unregister(net, &ping_v6_seq_afinfo); +} + +static struct pernet_operations ping_v6_net_ops = { + .init = ping_v6_proc_init_net, + .exit = ping_v6_proc_exit_net, +}; +#endif + +int __init pingv6_init(void) +{ +#ifdef CONFIG_PROC_FS + int ret = register_pernet_subsys(&ping_v6_net_ops); + if (ret) + return ret; +#endif + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; +#ifdef CONFIG_PROC_FS + unregister_pernet_subsys(&ping_v6_net_ops); +#endif + inet6_unregister_protosw(&pingv6_protosw); +} -- cgit From 256c90dedf538c59c70e65ba1a1340ce793c5b37 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 19:21:08 +0200 Subject: cfg80211: fix potential deadlock regression My big locking cleanups caused a problem by registering the rfkill instance with the RTNL held, while the callback also acquires the RTNL. This potentially causes a deadlock since the two locks used (rfkill mutex and RTNL) can be acquired in two different orders. Fix this by (un)registering rfkill without holding the RTNL. This needs to be done after the device struct is registered, but that can also be done w/o holding the RTNL. Signed-off-by: Johannes Berg --- net/wireless/core.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 221e76b53a97..99d86ddb6331 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -555,14 +555,18 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - rtnl_lock(); res = device_add(&rdev->wiphy.dev); + if (res) + return res; + + res = rfkill_register(rdev->rfkill); if (res) { - rtnl_unlock(); + device_del(&rdev->wiphy.dev); return res; } + rtnl_lock(); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -589,17 +593,6 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); - res = rfkill_register(rdev->rfkill); - if (res) { - device_del(&rdev->wiphy.dev); - - debugfs_remove_recursive(rdev->wiphy.debugfsdir); - list_del_rcu(&rdev->list); - wiphy_regulatory_deregister(wiphy); - rtnl_unlock(); - return res; - } - rdev->wiphy.registered = true; rtnl_unlock(); return 0; @@ -636,11 +629,11 @@ void wiphy_unregister(struct wiphy *wiphy) rtnl_unlock(); __count == 0; })); + rfkill_unregister(rdev->rfkill); + rtnl_lock(); rdev->wiphy.registered = false; - rfkill_unregister(rdev->rfkill); - BUG_ON(!list_empty(&rdev->wdev_list)); /* -- cgit From 9b881963c1c81f965f89a3e89b1aa5557f67ee30 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 22:23:36 +0200 Subject: cfg80211: make wiphy index start at 0 again The change to use atomic_inc_return() for assigning the wiphy index made the first wiphy index 1 instead of 0. This is fine, but we all habitually type "phy0" when we're testing, so make it go back to 0 instead of 1 by subtracting 1 from the index. Signed-off-by: Johannes Berg --- net/wireless/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 99d86ddb6331..4224e7554a76 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -301,6 +301,9 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) return NULL; } + /* atomic_inc_return makes it start at 1, make it start at 0 */ + rdev->wiphy_idx--; + /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); -- cgit From ebd4687af732a903f6822bb129d2f3a7d830e798 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 1 Jun 2013 16:23:15 +0000 Subject: xfrm: simplify the exit path of xfrm_output_one() Clean up unnecessary assignment and jump. While there, fix up the label name. Signed-off-by: Jean Sacren Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 0cf003dfa8fc..eb4a84288648 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -89,7 +89,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) err = x->type->output(x, skb); if (err == -EINPROGRESS) - goto out_exit; + goto out; resume: if (err) { @@ -107,15 +107,14 @@ resume: x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); - err = 0; + return 0; -out_exit: - return err; error: spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); - goto out_exit; +out: + return err; } int xfrm_output_resume(struct sk_buff *skb, int err) -- cgit From 4960c2c6fa252d2e90796074427db3f2297b523b Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 1 Jun 2013 16:23:17 +0000 Subject: Kconfig: remove dangling references to the deleted file Commit 202dc3fc599c1dded235d3b448d9ca924252e354 (Documentation: remove obsolete networking/multicast.txt file) deleted the obsolete file. After the file has been removed, clean up a couple of places where references to the deleted file were made so that users wouldn't be confused when they consult the Help menu. Signed-off-by: Jean Sacren Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8603ca827104..37cf1a6ea3ad 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -9,10 +9,7 @@ config IP_MULTICAST intend to participate in the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. More information about the MBONE is on the WWW at - . Information about the multicast - capabilities of the various network cards is contained in - . For most people, it's - safe to say N. + . For most people, it's safe to say N. config IP_ADVANCED_ROUTER bool "IP: advanced router" @@ -223,10 +220,8 @@ config IP_MROUTE packets that have several destination addresses. It is needed on the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. In order to do that, you would most - likely run the program mrouted. Information about the multicast - capabilities of the various network cards is contained in - . If you haven't heard - about it, you don't need it. + likely run the program mrouted. If you haven't heard about it, you + don't need it. config IP_MROUTE_MULTIPLE_TABLES bool "IP: multicast policy routing" -- cgit From 430f03cde2fb9596d8b562824471e298a8080df9 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 2 Jun 2013 20:43:55 +0000 Subject: net: mark netdev_create_hash __net_init netdev_create_hash() is only called from netdev_init() which is marked __net_init. Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d4d874a25e45..9c18557f93c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6088,7 +6088,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all, } EXPORT_SYMBOL(netdev_increment_features); -static struct hlist_head *netdev_create_hash(void) +static struct hlist_head * __net_init netdev_create_hash(void) { int i; struct hlist_head *hash; -- cgit From c26d6b46da3ee86fa8a864347331e5513ca84c2b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Jun 2013 22:43:52 +0000 Subject: ping: always initialize ->sin6_scope_id and ->sin6_flowinfo If we don't need scope id, we should initialize it to zero. Same for ->sin6_flowinfo. Cc: Lorenzo Colitti Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3552a45a6f86..1f1b2dd90277 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -892,12 +892,12 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; + sin6->sin6_flowinfo = 0; if (np->sndflow) sin6->sin6_flowinfo = ip6_flowinfo(ip6); - if (__ipv6_addr_needs_scope_id( - ipv6_addr_type(&sin6->sin6_addr))) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); if (inet6_sk(sk)->rxopt.all) pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); -- cgit From 600fed5e97afca10356952e334f362e82fc71466 Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Mon, 3 Jun 2013 02:03:34 +0000 Subject: net/ethtool: Fix comment regarding location of dev_ethtool() call Signed-off-by: Yan Burman Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4e6f63ade741..cd23d314d68a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1414,7 +1414,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } -/* The main entry point in this file. Called from net/core/dev.c */ +/* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) { -- cgit From 525cebedb32a87fa48584bc44e14170beb2c10d1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 3 Jun 2013 11:49:23 +0000 Subject: pktgen: Fix position of ip and udp header skb_set_network_header() expects an offset based on the data pointer whereas skb_tail_offset() also includes the headroom. This resulted in the ip header being written in a wrong location. Use return values of skb_put() directly and rely on skb->len to set mac, network, and transport header. Cc: Simon Horman Cc: Daniel Borkmann Assisted-by: Daniel Borkmann Signed-off-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/pktgen.c | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d2ede89662be..303412d8332b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2642,7 +2642,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; - unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2709,20 +2708,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(skb); - return NULL; - } - skb_set_network_header(skb, tail_offset); - skb->transport_header = skb->network_header + sizeof(struct iphdr); - skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ip_hdr(skb); - udph = udp_hdr(skb); - memcpy(eth, pkt_dev->hh, 12); *(__be16 *) & eth[12] = protocol; @@ -2752,8 +2746,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = 0; iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; pktgen_finalize_skb(pkt_dev, skb, datalen); @@ -2781,7 +2773,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; - unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2829,18 +2820,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(skb); - return NULL; - } - skb_set_network_header(skb, tail_offset); - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ipv6_hdr(skb); - udph = udp_hdr(skb); memcpy(eth, pkt_dev->hh, 12); *(__be16 *) ð[12] = protocol; @@ -2875,8 +2862,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph->daddr = pkt_dev->cur_in6_daddr; iph->saddr = pkt_dev->cur_in6_saddr; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; -- cgit From 00f97da17a0c8d656d0c9a60b1d7f38735f69817 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 3 Jun 2013 16:31:36 +0000 Subject: netpoll: fix position of network header Similar to the problem in pktgen, netpoll uses skb_tail_offset() too, as the code is copied from pktgen. Also use return values of skb_put() directly, this will simiplify the code. Reported-by: Thomas Graf Cc: Thomas Graf Cc: Daniel Borkmann Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 688517c7ff17..03c8ec3edc72 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -676,8 +676,6 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo spin_lock_irqsave(&npinfo->rx_lock, flags); list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - unsigned long tail_offset; - if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) continue; @@ -691,30 +689,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo send_skb->dev = skb->dev; skb_reset_network_header(send_skb); - skb_put(send_skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(send_skb); - + hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = 255; hdr->saddr = *saddr; hdr->daddr = *daddr; - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(send_skb); - continue; - } - skb_set_network_header(send_skb, tail_offset); - skb_put(send_skb, size); - - icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; icmp6h->icmp6_router = 0; icmp6h->icmp6_solicited = 1; - target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); + + target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); *target = msg->target; icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, IPPROTO_ICMPV6, -- cgit From 482a9c74fa17c5d584995c19e1a36eaf710d1193 Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Mon, 3 Jun 2013 17:29:33 +0300 Subject: mac80211: fix powersave bug and clean up ieee80211_rx_bss_info ieee80211_rx_bss_info() deals with dtim_period setting and PS update when associated. Move all these to another locations cleaning this function. Also, the current implementation is buggy because when it calls ieee80211_recalc_ps() bss_conf->dtim_period is notset properly yet and thus nothing will happen. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index df8170a80a56..aa5cd2e138be 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2737,24 +2737,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, int freq; struct ieee80211_bss *bss; struct ieee80211_channel *channel; - bool need_ps = false; sdata_assert_lock(sdata); - if ((sdata->u.mgd.associated && - ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || - (sdata->u.mgd.assoc_data && - ether_addr_equal(mgmt->bssid, - sdata->u.mgd.assoc_data->bss->bssid))) { - /* not previously set so we may need to recalc */ - need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; - - if (elems->tim && !elems->parse_error) { - const struct ieee80211_tim_ie *tim_ie = elems->tim; - sdata->u.mgd.dtim_period = tim_ie->dtim_period; - } - } - if (elems->ds_params) freq = ieee80211_channel_to_frequency(elems->ds_params[0], rx_status->band); @@ -2775,12 +2760,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) return; - if (need_ps) { - mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, -1); - mutex_unlock(&local->iflist_mtx); - } - ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems, true); @@ -2894,6 +2873,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, len - baselen, false, &elems); ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); + if (elems.tim && !elems.parse_error) { + const struct ieee80211_tim_ie *tim_ie = elems.tim; + ifmgd->dtim_period = tim_ie->dtim_period; + } ifmgd->assoc_data->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { @@ -3096,6 +3079,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } changed |= BSS_CHANGED_DTIM_PERIOD; + + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_ps(local, -1); + mutex_unlock(&local->iflist_mtx); + ieee80211_recalc_ps_vif(sdata); } -- cgit From 989c6505cdda587f87573bb6828f23964dd3d19b Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Thu, 16 May 2013 17:34:17 +0300 Subject: mac80211: Use suitable semantics for beacon availability indication Currently beacon availability upon association is marked by have_beacon flag of assoc_data structure that becomes unavailable when association completes. However beacon availability indication is required also after association to inform a driver. Currently dtim_period parameter is used for this purpose. Move have_beacon flag to another structure, persistant throughout a interface's life cycle. Use suitable sematics for beacon availability indication. Signed-off-by: Alexander Bondar [fix another instance of BSS_CHANGED_DTIM_PERIOD in docs] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/mlme.c | 20 +++++++++++--------- net/mac80211/util.c | 5 +++-- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9eed6f1d1614..7a6f1a0207ec 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -366,7 +366,7 @@ struct ieee80211_mgd_assoc_data { u8 ssid_len; u8 supp_rates_len; bool wmm, uapsd; - bool have_beacon, need_beacon; + bool need_beacon; bool synced; bool timeout_started; @@ -404,6 +404,7 @@ struct ieee80211_if_managed { bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ + bool have_beacon; u8 dtim_period; enum ieee80211_smps_mode req_smps, /* requested smps mode */ driver_smps_mode; /* smps mode request */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index aa5cd2e138be..ad9bb9e10cbb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1360,7 +1360,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL)) return false; - if (!sdata->vif.bss_conf.dtim_period) + if (!mgd->have_beacon) return false; rcu_read_lock(); @@ -1771,7 +1771,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - if (sdata->u.mgd.assoc_data->have_beacon) { + if (sdata->u.mgd.have_beacon) { /* * If the AP is buggy we may get here with no DTIM period * known, so assume it's 1 which is the only safe assumption @@ -1779,7 +1779,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, * probably just won't work at all. */ bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; - bss_info_changed |= BSS_CHANGED_DTIM_PERIOD; + bss_info_changed |= BSS_CHANGED_BEACON_INFO; } else { bss_conf->dtim_period = 0; } @@ -1903,6 +1903,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.chswitch_timer); sdata->vif.bss_conf.dtim_period = 0; + ifmgd->have_beacon = false; ifmgd->flags = 0; ieee80211_vif_release_channel(sdata); @@ -2877,7 +2878,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, const struct ieee80211_tim_ie *tim_ie = elems.tim; ifmgd->dtim_period = tim_ie->dtim_period; } - ifmgd->assoc_data->have_beacon = true; + ifmgd->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { sdata->vif.bss_conf.sync_tsf = @@ -3059,7 +3060,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, * If we haven't had a beacon before, tell the driver about the * DTIM period (and beacon timing if desired) now. */ - if (!bss_conf->dtim_period) { + if (!ifmgd->have_beacon) { /* a few bogus AP send dtim_period = 0 or no TIM IE */ if (elems.tim) bss_conf->dtim_period = elems.tim->dtim_period ?: 1; @@ -3078,7 +3079,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.sync_dtim_count = 0; } - changed |= BSS_CHANGED_DTIM_PERIOD; + changed |= BSS_CHANGED_BEACON_INFO; + ifmgd->have_beacon = true; mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); @@ -3424,8 +3426,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { - if ((ifmgd->assoc_data->need_beacon && - !ifmgd->assoc_data->have_beacon) || + if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) || ieee80211_do_assoc(sdata)) { u8 bssid[ETH_ALEN]; @@ -4193,6 +4194,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data = assoc_data; ifmgd->dtim_period = 0; + ifmgd->have_beacon = false; err = ieee80211_prep_connection(sdata, req->bss, true); if (err) @@ -4224,7 +4226,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->dtim_period = tim->dtim_period; dtim_count = tim->dtim_count; } - assoc_data->have_beacon = true; + ifmgd->have_beacon = true; assoc_data->timeout = jiffies; assoc_data->timeout_started = true; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 89a83770d152..5a6c1351d1d3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1584,8 +1584,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_ARP_FILTER | BSS_CHANGED_PS; - if (sdata->u.mgd.dtim_period) - changed |= BSS_CHANGED_DTIM_PERIOD; + /* Re-send beacon info report to the driver */ + if (sdata->u.mgd.have_beacon) + changed |= BSS_CHANGED_BEACON_INFO; sdata_lock(sdata); ieee80211_bss_info_change_notify(sdata, changed); -- cgit From 780b40df12cf0161d8ccc5381940e04584793933 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Jun 2013 09:32:50 +0200 Subject: wireless: fix kernel-doc Some kernel-doc fixes for forgotten fields and renamed things. Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 41c28b977f7c..bd12fc54266c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -297,6 +297,9 @@ struct sta_ampdu_mlme { * @rcu_head: RCU head used for freeing this station struct * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification + * @chains: chains ever used for RX from this station + * @chain_signal_last: last signal (per chain) + * @chain_signal_avg: signal average (per chain) */ struct sta_info { /* General information, mostly static */ -- cgit From 9cefbbc9c8f9abe0bc514dcfca46e8051ee84050 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 4 Jun 2013 22:22:15 +0000 Subject: netfilter: nfnetlink_queue: cleanup copy_range usage For every packet queued, we check if configured copy_range is 0, and treat that as 'copy entire packet'. We can move this check to the queue configuration, and can set copy_range appropriately. Also, convert repetitive '0xffff - NLA_HDRLEN' to a macro. [ queue initialization still used 0xffff, although its harmless since the initial setting is overwritten on queue config ] Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index cff4449f01d2..3c4218141d70 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -41,6 +41,14 @@ #define NFQNL_QMAX_DEFAULT 1024 +/* We're using struct nlattr which has 16bit nla_len. Note that nla_len + * includes the header length. Thus, the maximum packet length that we + * support is 65531 bytes. We send truncated packets if the specified length + * is larger than that. Userspace can check for presence of NFQA_CAP_LEN + * attribute to detect truncation. + */ +#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN) + struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; @@ -122,7 +130,7 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xffff; + inst->copy_range = NFQNL_MAX_COPY_RANGE; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); @@ -333,10 +341,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, return NULL; data_len = ACCESS_ONCE(queue->copy_range); - if (data_len == 0 || data_len > entskb->len) + if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || skb_headlen(entskb) < L1_CACHE_BYTES || skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) @@ -727,13 +734,8 @@ nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* We're using struct nlattr which has 16bit nla_len. Note that - * nla_len includes the header length. Thus, the maximum packet - * length that we support is 65531 bytes. We send truncated - * packets if the specified length is larger than that. - */ - if (range > 0xffff - NLA_HDRLEN) - queue->copy_range = 0xffff - NLA_HDRLEN; + if (range == 0 || range > NFQNL_MAX_COPY_RANGE) + queue->copy_range = NFQNL_MAX_COPY_RANGE; else queue->copy_range = range; break; -- cgit From 7f87712c0152511a1842698ad8dca425fee2dc4f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 4 Jun 2013 22:22:16 +0000 Subject: netfilter: nfnetlink_queue: only add CAP_LEN attr when needed CAP_LEN contains the size of the network packet we're queueing to userspace, i.e. normally it is the same as the NFQA_PAYLOAD attribute len. Include it only in the unlikely case when NFQA_PAYLOAD is truncated due to copy_range limitations. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 3c4218141d70..eb2cde836b9a 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -472,7 +472,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; - if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + if (cap_len > data_len && + nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; if (nfqnl_put_packet_info(skb, entskb)) -- cgit From 4c4d41f200db375b2d2cc6d0a1de0606c8266398 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Thu, 6 Jun 2013 10:15:54 +0800 Subject: xfrm: add LINUX_MIB_XFRMACQUIREERROR statistic counter When host ping its peer, ICMP echo request packet triggers IPsec policy, then host negotiates SA secret with its peer. After IKE installed SA for OUT direction, but before SA for IN direction installed, host get ICMP echo reply from its peer. At the time being, the SA state for IN direction could be XFRM_STATE_ACQ, then the received packet will be dropped after adding LINUX_MIB_XFRMINSTATEINVALID statistic. Adding a LINUX_MIB_XFRMACQUIREERROR statistic counter for such scenario when SA in larval state is much clearer for user than LINUX_MIB_XFRMINSTATEINVALID which indicates the SA is totally bad. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 5 +++++ net/xfrm/xfrm_proc.c | 1 + 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ab2bb42fe094..88843996f935 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -163,6 +163,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; spin_lock(&x->lock); + if (unlikely(x->km.state == XFRM_STATE_ACQ)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + goto drop_unlock; + } + if (unlikely(x->km.state != XFRM_STATE_VALID)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index c721b0d9ab8b..80cd1e55b834 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -44,6 +44,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), + SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_SENTINEL }; -- cgit From 5ee98591577aa63dbb9e78a0d142abc86b9063d0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Jun 2013 05:11:45 +0000 Subject: net: minor: tcp: use tcp_skb_mss helper in tcp_tso_segment We have the minimal inline helper tcp_skb_mss to access skb_shinfo(skb)->gso_size, so also use it here to get mss. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b5d4ad988053..6a1cf95abc98 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2905,7 +2905,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, oldlen = (u16)~skb->len; __skb_pull(skb, thlen); - mss = skb_shinfo(skb)->gso_size; + mss = tcp_skb_mss(skb); if (unlikely(skb->len <= mss)) goto out; @@ -3071,7 +3071,7 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); - mss = skb_shinfo(p)->gso_size; + mss = tcp_skb_mss(p); flush |= (len - 1) >= mss; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); -- cgit From 28850dc7c71da9d0c0e39246e9ff6913f41f8d0a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Jun 2013 05:11:46 +0000 Subject: net: tcp: move GRO/GSO functions to tcp_offload Would be good to make things explicit and move those functions to a new file called tcp_offload.c, thus make this similar to tcpv6_offload.c. While moving all related functions into tcp_offload.c, we can also make some of them static, since they are only used there. Also, add an explicit registration function. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/Makefile | 2 +- net/ipv4/af_inet.c | 13 +- net/ipv4/tcp.c | 241 ----------------------------------- net/ipv4/tcp_ipv4.c | 66 +--------- net/ipv4/tcp_offload.c | 332 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 336 insertions(+), 318 deletions(-) create mode 100644 net/ipv4/tcp_offload.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 089cb9f36387..4d3e138c564f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - datagram.o raw.o udp.o udplite.o \ + tcp_offload.o datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c090c7daea1..7b514290efc6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1559,15 +1559,6 @@ static const struct net_protocol tcp_protocol = { .netns_ok = 1, }; -static const struct net_offload tcp_offload = { - .callbacks = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - }, -}; - static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, @@ -1683,8 +1674,8 @@ static int __init ipv4_offload_init(void) */ if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); - if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) - pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + if (tcpv4_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6a1cf95abc98..bc4246940f6c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2877,247 +2877,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct tcphdr *th; - unsigned int thlen; - unsigned int seq; - __be32 delta; - unsigned int oldlen; - unsigned int mss; - struct sk_buff *gso_skb = skb; - __sum16 newcheck; - bool ooo_okay, copy_destructor; - - if (!pskb_may_pull(skb, sizeof(*th))) - goto out; - - th = tcp_hdr(skb); - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - if (!pskb_may_pull(skb, thlen)) - goto out; - - oldlen = (u16)~skb->len; - __skb_pull(skb, thlen); - - mss = tcp_skb_mss(skb); - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_MPLS | - SKB_GSO_UDP_TUNNEL | - 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - copy_destructor = gso_skb->destructor == tcp_wfree; - ooo_okay = gso_skb->ooo_okay; - /* All segments but the first should have ooo_okay cleared */ - skb->ooo_okay = 0; - - segs = skb_segment(skb, features); - if (IS_ERR(segs)) - goto out; - - /* Only first segment might have ooo_okay set */ - segs->ooo_okay = ooo_okay; - - delta = htonl(oldlen + (thlen + mss)); - - skb = segs; - th = tcp_hdr(skb); - seq = ntohl(th->seq); - - newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - - do { - th->fin = th->psh = 0; - th->check = newcheck; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - - seq += mss; - if (copy_destructor) { - skb->destructor = gso_skb->destructor; - skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; - } - skb = skb->next; - th = tcp_hdr(skb); - - th->seq = htonl(seq); - th->cwr = 0; - } while (skb->next); - - /* Following permits TCP Small Queues to work well with GSO : - * The callback to TCP stack will be called at the time last frag - * is freed at TX completion, and not right now when gso_skb - * is freed by GSO engine - */ - if (copy_destructor) { - swap(gso_skb->sk, skb->sk); - swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); - } - - delta = htonl(oldlen + (skb_tail_pointer(skb) - - skb_transport_header(skb)) + - skb->data_len); - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - -out: - return segs; -} -EXPORT_SYMBOL(tcp_tso_segment); - -struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - struct sk_buff **pp = NULL; - struct sk_buff *p; - struct tcphdr *th; - struct tcphdr *th2; - unsigned int len; - unsigned int thlen; - __be32 flags; - unsigned int mss = 1; - unsigned int hlen; - unsigned int off; - int flush = 1; - int i; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*th); - th = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - skb_gro_pull(skb, thlen); - - len = skb_gro_len(skb); - flags = tcp_flag_word(th); - - for (; (p = *head); head = &p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - th2 = tcp_hdr(p); - - if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - - goto found; - } - - goto out_check_final; - -found: - flush = NAPI_GRO_CB(p)->flush; - flush |= (__force int)(flags & TCP_FLAG_CWR); - flush |= (__force int)((flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); - flush |= (__force int)(th->ack_seq ^ th2->ack_seq); - for (i = sizeof(*th); i < thlen; i += 4) - flush |= *(u32 *)((u8 *)th + i) ^ - *(u32 *)((u8 *)th2 + i); - - mss = tcp_skb_mss(p); - - flush |= (len - 1) >= mss; - flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); - - if (flush || skb_gro_receive(head, skb)) { - mss = 1; - goto out_check_final; - } - - p = *head; - th2 = tcp_hdr(p); - tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); - -out_check_final: - flush = len < mss; - flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | - TCP_FLAG_RST | TCP_FLAG_SYN | - TCP_FLAG_FIN)); - - if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) - pp = head; - -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} -EXPORT_SYMBOL(tcp_gro_receive); - -int tcp_gro_complete(struct sk_buff *skb) -{ - struct tcphdr *th = tcp_hdr(skb); - - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - - if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - return 0; -} -EXPORT_SYMBOL(tcp_gro_complete); - #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; static DEFINE_MUTEX(tcp_md5sig_mutex); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d20ede0c9593..289039b4d8de 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -545,8 +545,7 @@ out: sock_put(sk); } -static void __tcp_v4_send_check(struct sk_buff *skb, - __be32 saddr, __be32 daddr) +void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct tcphdr *th = tcp_hdr(skb); @@ -571,23 +570,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); -int tcp_v4_gso_send_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v4_send_check(skb, iph->saddr, iph->daddr); - return 0; -} - /* * This routine will send an RST to the other tcp. * @@ -2795,52 +2777,6 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -int tcp4_gro_complete(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), - iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - - return tcp_gro_complete(skb); -} - struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c new file mode 100644 index 000000000000..3a7525e6c086 --- /dev/null +++ b/net/ipv4/tcp_offload.c @@ -0,0 +1,332 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TCPv4 GSO/GRO support + */ + +#include +#include +#include + +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct tcphdr *th; + unsigned int thlen; + unsigned int seq; + __be32 delta; + unsigned int oldlen; + unsigned int mss; + struct sk_buff *gso_skb = skb; + __sum16 newcheck; + bool ooo_okay, copy_destructor; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + oldlen = (u16)~skb->len; + __skb_pull(skb, thlen); + + mss = tcp_skb_mss(skb); + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + SKB_GSO_GRE | + SKB_GSO_MPLS | + SKB_GSO_UDP_TUNNEL | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + copy_destructor = gso_skb->destructor == tcp_wfree; + ooo_okay = gso_skb->ooo_okay; + /* All segments but the first should have ooo_okay cleared */ + skb->ooo_okay = 0; + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) + goto out; + + /* Only first segment might have ooo_okay set */ + segs->ooo_okay = ooo_okay; + + delta = htonl(oldlen + (thlen + mss)); + + skb = segs; + th = tcp_hdr(skb); + seq = ntohl(th->seq); + + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + + do { + th->fin = th->psh = 0; + th->check = newcheck; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = + csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); + + seq += mss; + if (copy_destructor) { + skb->destructor = gso_skb->destructor; + skb->sk = gso_skb->sk; + /* {tcp|sock}_wfree() use exact truesize accounting : + * sum(skb->truesize) MUST be exactly be gso_skb->truesize + * So we account mss bytes of 'true size' for each segment. + * The last segment will contain the remaining. + */ + skb->truesize = mss; + gso_skb->truesize -= mss; + } + skb = skb->next; + th = tcp_hdr(skb); + + th->seq = htonl(seq); + th->cwr = 0; + } while (skb->next); + + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (copy_destructor) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + + skb->data_len); + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); +out: + return segs; +} +EXPORT_SYMBOL(tcp_tso_segment); + +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int len; + unsigned int thlen; + __be32 flags; + unsigned int mss = 1; + unsigned int hlen; + unsigned int off; + int flush = 1; + int i; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + skb_gro_pull(skb, thlen); + + len = skb_gro_len(skb); + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); + for (i = sizeof(*th); i < thlen; i += 4) + flush |= *(u32 *)((u8 *)th + i) ^ + *(u32 *)((u8 *)th2 + i); + + mss = tcp_skb_mss(p); + + flush |= (len - 1) >= mss; + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = len < mss; + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | + TCP_FLAG_RST | TCP_FLAG_SYN | + TCP_FLAG_FIN)); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(tcp_gro_receive); + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} +EXPORT_SYMBOL(tcp_gro_complete); + +static int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v4_send_check(skb, iph->saddr, iph->daddr); + return 0; +} + +static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + return tcp_gro_receive(head, skb); +} + +static int tcp4_gro_complete(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} + +static const struct net_offload tcpv4_offload = { + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, +}; + +int __init tcpv4_offload_init(void) +{ + return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); +} -- cgit From c05cdb1b864f548c0c3d8ae3b51264e6739a69b1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 3 Jun 2013 09:46:28 +0000 Subject: netlink: allow large data transfers from user-space I can hit ENOBUFS in the sendmsg() path with a large batch that is composed of many netlink messages. Here that limit is 8 MBytes of skbuff data area as kmalloc does not manage to get more than that. While discussing atomic rule-set for nftables with Patrick McHardy, we decided to put all rule-set updates that need to be applied atomically in one single batch to simplify the existing approach. However, as explained above, the existing netlink code limits us to a maximum of ~20000 rules that fit in one single batch without hitting ENOBUFS. iptables does not have such limitation as it is using vmalloc. This patch adds netlink_alloc_large_skb() which is only used in the netlink_sendmsg() path. It uses alloc_skb if the memory requested is <= one memory page, that should be the common case for most subsystems, else vmalloc for higher memory allocations. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d0b3dd60d386..68c167374394 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -750,6 +750,10 @@ static void netlink_skb_destructor(struct sk_buff *skb) skb->head = NULL; } #endif + if (is_vmalloc_addr(skb->head)) { + vfree(skb->head); + skb->head = NULL; + } if (skb->sk != NULL) sock_rfree(skb); } @@ -1420,6 +1424,35 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } +static struct sk_buff *netlink_alloc_large_skb(unsigned int size) +{ + struct sk_buff *skb; + void *data; + + if (size <= NLMSG_GOODSIZE) + return alloc_skb(size, GFP_KERNEL); + + skb = alloc_skb_head(GFP_KERNEL); + if (skb == NULL) + return NULL; + + data = vmalloc(size); + if (data == NULL) + goto err; + + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; + skb->len = 0; + skb->destructor = netlink_skb_destructor; + + return skb; +err: + kfree_skb(skb); + return NULL; +} + /* * Attach a skb to a netlink socket. * The caller must hold a reference to the destination socket. On error, the @@ -1510,7 +1543,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) return skb; delta = skb->end - skb->tail; - if (delta * 2 < skb->truesize) + if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; if (skb_shared(skb)) { @@ -2096,7 +2129,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = alloc_skb(len, GFP_KERNEL); + skb = netlink_alloc_large_skb(len); if (skb == NULL) goto out; -- cgit From af12fa6e46aa651e7b86a4c4117b562518fef184 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:39:41 +0300 Subject: net: add napi_id and hash Adds a napi_id and a hashing mechanism to lookup a napi by id. This will be used by subsequent patches to implement low latency Ethernet device polling. Based on a code sample by Eric Dumazet. Signed-off-by: Eliezer Tamir Signed-off-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/dev.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9c18557f93c6..fa007dba6beb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -166,6 +167,12 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +/* protects napi_hash addition/deletion and napi_gen_id */ +static DEFINE_SPINLOCK(napi_hash_lock); + +static unsigned int napi_gen_id; +static DEFINE_HASHTABLE(napi_hash, 8); + seqcount_t devnet_rename_seq; static inline void dev_base_seq_inc(struct net *net) @@ -4136,6 +4143,58 @@ void napi_complete(struct napi_struct *n) } EXPORT_SYMBOL(napi_complete); +/* must be called under rcu_read_lock(), as we dont take a reference */ +struct napi_struct *napi_by_id(unsigned int napi_id) +{ + unsigned int hash = napi_id % HASH_SIZE(napi_hash); + struct napi_struct *napi; + + hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node) + if (napi->napi_id == napi_id) + return napi; + + return NULL; +} +EXPORT_SYMBOL_GPL(napi_by_id); + +void napi_hash_add(struct napi_struct *napi) +{ + if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + + spin_lock(&napi_hash_lock); + + /* 0 is not a valid id, we also skip an id that is taken + * we expect both events to be extremely rare + */ + napi->napi_id = 0; + while (!napi->napi_id) { + napi->napi_id = ++napi_gen_id; + if (napi_by_id(napi->napi_id)) + napi->napi_id = 0; + } + + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + + spin_unlock(&napi_hash_lock); + } +} +EXPORT_SYMBOL_GPL(napi_hash_add); + +/* Warning : caller is responsible to make sure rcu grace period + * is respected before freeing memory containing @napi + */ +void napi_hash_del(struct napi_struct *napi) +{ + spin_lock(&napi_hash_lock); + + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + hlist_del_rcu(&napi->napi_hash_node); + + spin_unlock(&napi_hash_lock); +} +EXPORT_SYMBOL_GPL(napi_hash_del); + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { -- cgit From 060212928670593fb89243640bf05cf89560b023 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:39:50 +0300 Subject: net: add low latency socket poll Adds an ndo_ll_poll method and the code that supports it. This method can be used by low latency applications to busy-poll Ethernet device queues directly from the socket code. sysctl_net_ll_poll controls how many microseconds to poll. Default is zero (disabled). Individual protocol support will be added by subsequent patches. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/Kconfig | 12 ++++++++++++ net/core/skbuff.c | 4 ++++ net/core/sock.c | 6 ++++++ net/core/sysctl_net_core.c | 10 ++++++++++ net/ipv4/proc.c | 1 + net/socket.c | 6 ++++++ 6 files changed, 39 insertions(+) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 523e43e6da1b..d6a9ce6e1800 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -243,6 +243,18 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis +config NET_LL_RX_POLL + bool "Low Latency Receive Poll" + depends on X86_TSC + default n + ---help--- + Support Low Latency Receive Queue Poll. + (For network card drivers which support this option.) + When waiting for data in read or poll call directly into the the device driver + to flush packets which may be pending on the device queues into the stack. + + If unsure, say N. + config BQL boolean depends on SYSFS diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 73f57a0e1523..4a4181e16c1a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -733,6 +733,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->vlan_tci = old->vlan_tci; skb_copy_secmark(new, old); + +#ifdef CONFIG_NET_LL_RX_POLL + new->napi_id = old->napi_id; +#endif } /* diff --git a/net/core/sock.c b/net/core/sock.c index 88868a9d21da..788c0da5eed1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include #endif +#include + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -2284,6 +2286,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); +#ifdef CONFIG_NET_LL_RX_POLL + sk->sk_napi_id = 0; +#endif + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 741db5fc7806..4b48f39582b0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,6 +19,7 @@ #include #include #include +#include static int one = 1; @@ -284,6 +285,15 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ +#ifdef CONFIG_NET_LL_RX_POLL + { + .procname = "low_latency_poll", + .data = &sysctl_net_ll_poll, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, +#endif #endif /* CONFIG_NET */ { .procname = "netdev_budget", diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2a5bf86d2415..6577a1149a47 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), + SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), SNMP_MIB_SENTINEL }; diff --git a/net/socket.c b/net/socket.c index 3ebdcb805c51..21fd29f63ed2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,6 +104,12 @@ #include #include #include +#include + +#ifdef CONFIG_NET_LL_RX_POLL +unsigned long sysctl_net_ll_poll __read_mostly; +EXPORT_SYMBOL_GPL(sysctl_net_ll_poll); +#endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, -- cgit From a5b50476f77a8fcc8055c955720d05a7c2d9c532 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:40:00 +0300 Subject: udp: add low latency socket poll support Add upport for busy-polling on UDP sockets. In __udp[46]_lib_rcv add a call to sk_mark_ll() to copy the napi_id from the skb into the sk. This is done at the earliest possible moment, right after we identify which socket this skb is for. In __skb_recv_datagram When there is no data and the user tries to read we busy poll. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/datagram.c | 4 ++++ net/ipv4/udp.c | 6 +++++- net/ipv6/udp.c | 6 +++++- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index b71423db7785..9cbaba98ce4c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -56,6 +56,7 @@ #include #include #include +#include /* * Is a socket 'connection oriented' ? @@ -207,6 +208,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); + if (sk_valid_ll(sk) && sk_poll_ll(sk, flags & MSG_DONTWAIT)) + continue; + /* User doesn't want to wait */ error = -EAGAIN; if (!timeo) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c7338ec79cc0..2955b25aee6d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -109,6 +109,7 @@ #include #include #include +#include #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -1709,7 +1710,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udp_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_ll(sk, skb); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b5808539cd5c..f77e34c5a0e2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -841,7 +842,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udpv6_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_ll(sk, skb); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but -- cgit From d30e383bb856f614ddb5bbbb5a7d3f86240e41ec Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:40:10 +0300 Subject: tcp: add low latency socket poll support. Adds low latency socket poll support for TCP. In tcp_v[46]_rcv() add a call to sk_mark_ll() to copy the napi_id from the skb to the sk. In tcp_recvmsg(), when there is no data in the socket we busy-poll. This is a good example of how to add busy-poll support to more protocols. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 2 ++ 3 files changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bc4246940f6c..46ed9afd1f5e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include #include +#include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -1553,6 +1554,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; + if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue) + && (sk->sk_state == TCP_ESTABLISHED)) + sk_poll_ll(sk, nonblock); + lock_sock(sk); err = -ENOTCONN; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 289039b4d8de..1063bb83e342 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -1993,6 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf39..5cffa5c3e6b8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,6 +63,7 @@ #include #include #include +#include #include @@ -1498,6 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); -- cgit From 30f3a40f9a2a2869a560a9cb9ef488d10c803e14 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 5 Jun 2013 20:14:10 +0800 Subject: net: remove last caller of skb_tail_offset() and itself Similar to the following commits: commit 00f97da17a0c8d656d0c9 (netpoll: fix position of network header) commit 525cebedb32a87fa48584 (pktgen: Fix position of ip and udp header) using skb_tail_offset() seems not correct since the offset is based on head pointer. With the last caller removed, skb_tail_offset() can be killed finally. Cc: Thomas Graf Cc: Daniel Borkmann Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index df97f0ac1a1c..132a09664704 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -945,7 +945,6 @@ static int ipmr_cache_report(struct mr_table *mrt, struct igmpmsg *msg; struct sock *mroute_sk; int ret; - unsigned long tail_offset; #ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) @@ -981,12 +980,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Copy the IP header */ - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(skb); - return -EINVAL; - } - skb_set_network_header(skb, tail_offset); + skb_set_network_header(skb, skb->len); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ -- cgit From 9ba18891f75535eca3ef53138b48970eb60f5255 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 5 Jun 2013 10:08:00 -0400 Subject: bridge: Add flag to control mac learning. Allow user to control whether mac learning is enabled on the port. By default, mac learning is enabled. Disabling mac learning will cause new dynamic FDB entries to not be created for a particular port. Signed-off-by: Vlad Yasevich Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 2 +- net/bridge/br_input.c | 6 ++++-- net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4cdba60926ff..2c08911df578 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = 0; + p->flags = BR_LEARNING; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 828e2bcc1f52..7e993667d4bf 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -75,7 +75,8 @@ int br_handle_frame_finish(struct sk_buff *skb) /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -142,7 +143,8 @@ static int br_handle_local_finish(struct sk_buff *skb) u16 vid = 0; br_vlan_get_tag(skb, &vid); - br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8e3abf564798..ce902bf8a618 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -30,6 +30,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + 0; } @@ -56,7 +57,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || - nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING))) return -EMSGSIZE; return 0; @@ -281,6 +283,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -328,6 +331,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1b0ac95a5c37..04d7f43508f7 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,6 +158,7 @@ struct net_bridge_port #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 +#define BR_LEARNING 0x00000020 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index a1ef1b6e14dc..707f3628e9cd 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -158,6 +158,7 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); +BRPORT_ATTR_FLAG(learning, BR_LEARNING); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -195,6 +196,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, &brport_attr_root_block, + &brport_attr_learning, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, -- cgit From 867a59436fc35593ae0e0efcd56cc6d2f8506586 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 5 Jun 2013 10:08:01 -0400 Subject: bridge: Add a flag to control unicast packet flood. Add a flag to control flood of unicast traffic. By default, flood is on and the bridge will flood unicast traffic if it doesn't know the destination. When the flag is turned off, unicast traffic without an FDB will not be forwarded to the specified port. Signed-off-by: Vlad Yasevich Reviewed-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/bridge/br_device.c | 8 ++++---- net/bridge/br_forward.c | 14 +++++++++----- net/bridge/br_if.c | 2 +- net/bridge/br_input.c | 9 ++++++--- net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 6 ++++-- net/bridge/br_sysfs_if.c | 2 ++ 7 files changed, 31 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 75f3239130f8..2ef66781fedb 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -58,10 +58,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (is_broadcast_ether_addr(dest)) - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); goto out; } if (br_multicast_rcv(br, NULL, skb)) { @@ -73,11 +73,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, true); out: rcu_read_unlock(); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 092b20e4ee4c..4b81b1471789 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -174,7 +174,8 @@ out: static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb0, void (*__packet_hook)(const struct net_bridge_port *p, - struct sk_buff *skb)) + struct sk_buff *skb), + bool unicast) { struct net_bridge_port *p; struct net_bridge_port *prev; @@ -182,6 +183,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { + /* Do not flood unicast traffic to ports that turn it off */ + if (unicast && !(p->flags & BR_FLOOD)) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; @@ -203,16 +207,16 @@ out: /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast) { - br_flood(br, skb, NULL, __br_deliver); + br_flood(br, skb, NULL, __br_deliver, unicast); } /* called under bridge lock */ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2) + struct sk_buff *skb2, bool unicast) { - br_flood(br, skb, skb2, __br_forward); + br_flood(br, skb, skb2, __br_forward, unicast); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2c08911df578..5623be6b9ecd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING; + p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7e993667d4bf..1b8b8b824cd7 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -65,6 +65,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + bool unicast = true; u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) @@ -95,9 +96,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) + if (is_broadcast_ether_addr(dest)) { skb2 = skb; - else if (is_multicast_ether_addr(dest)) { + unicast = false; + } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || @@ -110,6 +112,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } else skb2 = skb; + unicast = false; br->dev->stats.multicast++; } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) { @@ -123,7 +126,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst->used = jiffies; br_forward(dst->dst, skb, skb2); } else - br_flood_forward(br, skb, skb2); + br_flood_forward(br, skb, skb2, unicast); } if (skb2) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ce902bf8a618..1fc30abd3a52 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -31,6 +31,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + 0; } @@ -58,7 +59,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || - nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING))) + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD))) return -EMSGSIZE; return 0; @@ -284,6 +286,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -332,6 +335,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 04d7f43508f7..3be89b3ce17b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -159,6 +159,7 @@ struct net_bridge_port #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 #define BR_LEARNING 0x00000020 +#define BR_FLOOD 0x00000040 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; @@ -414,9 +415,10 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, + bool unicast); extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2); + struct sk_buff *skb2, bool unicast); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 707f3628e9cd..2a2cdb756d51 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -159,6 +159,7 @@ BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); +BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -197,6 +198,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_bpdu_guard, &brport_attr_root_block, &brport_attr_learning, + &brport_attr_unicast_flood, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, -- cgit From da12c90e099789a63073fc82a19542ce54d4efb9 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 6 Jun 2013 14:49:11 +0800 Subject: netlink: Add compare function for netlink_table As we know, netlink sockets are private resource of net namespace, they can communicate with each other only when they in the same net namespace. this works well until we try to add namespace support for other subsystems which use netlink. Don't like ipv4 and route table.., it is not suited to make these subsytems belong to net namespace, Such as audit and crypto subsystems,they are more suitable to user namespace. So we must have the ability to make the netlink sockets in same user namespace can communicate with each other. This patch adds a new function pointer "compare" for netlink_table, we can decide if the netlink sockets can communicate with each other through this netlink_table self-defined compare function. The behavior isn't changed if we don't provide the compare function for netlink_table. Signed-off-by: Gao feng Acked-by: Serge E. Hallyn Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 33 +++++++++++++++++++++++++-------- net/netlink/af_netlink.h | 1 + 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 68c167374394..9b6b115e008f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -858,16 +858,23 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } +static bool netlink_compare(struct net *net, struct sock *sk) +{ + return net_eq(sock_net(sk), net); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_portid_hash *hash = &nl_table[protocol].hash; + struct netlink_table *table = &nl_table[protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *sk; read_lock(&nl_table_lock); head = nl_portid_hashfn(hash, portid); sk_for_each(sk, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { + if (table->compare(net, sk) && + (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -980,7 +987,8 @@ netlink_update_listeners(struct sock *sk) static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -990,7 +998,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) + if (table->compare(net, osk) && + (nlk_sk(osk)->portid == portid)) break; len++; } @@ -1165,6 +1174,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].compare = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1187,7 +1197,8 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *osk; s32 portid = task_tgid_vnr(current); @@ -1199,7 +1210,7 @@ retry: netlink_table_grab(); head = nl_portid_hashfn(hash, portid); sk_for_each(osk, head) { - if (!net_eq(sock_net(osk), net)) + if (!table->compare(net, osk)) continue; if (nlk_sk(osk)->portid == portid) { /* Bind collision, search negative portid values. */ @@ -2315,9 +2326,12 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, rcu_assign_pointer(nl_table[unit].listeners, listeners); nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; + nl_table[unit].compare = netlink_compare; if (cfg) { nl_table[unit].bind = cfg->bind; nl_table[unit].flags = cfg->flags; + if (cfg->compare) + nl_table[unit].compare = cfg->compare; } nl_table[unit].registered = 1; } else { @@ -2740,6 +2754,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *s; struct nl_seq_iter *iter; + struct net *net; int i, j; ++*pos; @@ -2747,11 +2762,12 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); + net = seq_file_net(seq); iter = seq->private; s = v; do { s = sk_next(s); - } while (s && sock_net(s) != seq_file_net(seq)); + } while (s && !nl_table[s->sk_protocol].compare(net, s)); if (s) return s; @@ -2763,7 +2779,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); - while (s && sock_net(s) != seq_file_net(seq)) + + while (s && !nl_table[s->sk_protocol].compare(net, s)) s = sk_next(s); if (s) { iter->link = i; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed8522265f4e..eaa88d187cdc 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -73,6 +73,7 @@ struct netlink_table { struct mutex *cb_mutex; struct module *module; void (*bind)(int group); + bool (*compare)(struct net *net, struct sock *sock); int registered; }; -- cgit From b41abb42bf62a85a32c41dab873220598a6ee266 Mon Sep 17 00:00:00 2001 From: "Peter Pan(潘卫平)" Date: Thu, 6 Jun 2013 21:27:21 +0800 Subject: net: pass correct parameter to skb_headers_offset_update() Since commit 1a37e412a022(net: Use 16bits for *_headers fields of struct skbuff), skb->*_header are relative to skb->head, so copy_skb_header() should not call skb_headers_offset_update() now, and we should pass correct parameter to skb_headers_offset_update() in pskb_expand_head() and skb_copy_expand(). Signed-off-by: Weiping Pan Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/core/skbuff.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4a4181e16c1a..edf37578e21e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -909,18 +909,8 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; -#endif - __copy_skb_header(new, old); -#ifndef NET_SKBUFF_DATA_USES_OFFSET - skb_headers_offset_update(new, offset); -#endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -1112,7 +1102,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->end = skb->head + size; #endif skb->tail += off; - skb_headers_offset_update(skb, off); + skb_headers_offset_update(skb, nhead); /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1207,9 +1197,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, off = newheadroom - oldheadroom; if (n->ip_summed == CHECKSUM_PARTIAL) n->csum_start += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb_headers_offset_update(n, off); -#endif return n; } -- cgit From 45203a3b380cee28f570475c0d28c169f908c209 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 08:43:22 -0700 Subject: net_sched: add 64bit rate estimators struct gnet_stats_rate_est contains u32 fields, so the bytes per second field can wrap at 34360Mbit. Add a new gnet_stats_rate_est64 structure to get 64bit bps/pps fields, and switch the kernel to use this structure natively. This structure is dumped to user space as a new attribute : TCA_STATS_RATE_EST64 Old tc command will now display the capped bps (to 34360Mbit), instead of wrapped values, and updated tc command will display correct information. Old tc command output, after patch : eric:~# tc -s -d qd sh dev lo qdisc pfifo 8001: root refcnt 2 limit 1000p Sent 80868245400 bytes 1978837 pkt (dropped 0, overlimits 0 requeues 0) rate 34360Mbit 189696pps backlog 0b 0p requeues 0 This patch carefully reorganizes "struct Qdisc" layout to get optimal performance on SMP. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 12 ++++++------ net/core/gen_stats.c | 22 +++++++++++++++++----- net/netfilter/xt_rateest.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_qfq.c | 2 +- 8 files changed, 29 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index d9d198aa9fed..6b5b6e7013ca 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -82,7 +82,7 @@ struct gen_estimator { struct list_head list; struct gnet_stats_basic_packed *bstats; - struct gnet_stats_rate_est *rate_est; + struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; u64 last_bytes; @@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est) static struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { struct rb_node *p = est_root.rb_node; @@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator); * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est64 *rate_est) { struct gen_estimator *e; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * Returns 0 on success or a negative error code. */ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { gen_kill_estimator(bstats, rate_est); @@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * Returns true if estimator is active, and false if not. */ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { bool res; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ddedf211e588..9d3d9e78397b 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); int gnet_stats_copy_rate_est(struct gnet_dump *d, const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est *r) + struct gnet_stats_rate_est64 *r) { + struct gnet_stats_rate_est est; + int res; + if (b && !gen_estimator_active(b, r)) return 0; + est.bps = min_t(u64, UINT_MAX, r->bps); + /* we have some time before reaching 2^32 packets per second */ + est.pps = r->pps; + if (d->compat_tc_stats) { - d->tc_stats.bps = r->bps; - d->tc_stats.pps = r->pps; + d->tc_stats.bps = est.bps; + d->tc_stats.pps = est.pps; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); + if (d->tail) { + res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); + if (res < 0 || est.bps == r->bps) + return res; + /* emit 64bit stats only if needed */ + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); + } return 0; } diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index ed0db15ab00e..7720b036d76a 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,7 +18,7 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est *r; + struct gnet_stats_rate_est64 *r; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 1bc210ffcba2..71a568862557 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -130,7 +130,7 @@ struct cbq_class { psched_time_t penalized; struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 759b308d1a8d..8302717ea303 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -25,7 +25,7 @@ struct drr_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct list_head alist; struct Qdisc *qdisc; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9facea03faeb..c4075610502c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -114,7 +114,7 @@ struct hfsc_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adaedd79389c..162fb800754c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -78,7 +78,7 @@ struct htb_class { /* general class parameters */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_htb_xstats xstats; /* our special stats */ int refcnt; /* usage count of this class */ diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01c..7c195d972bf0 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -138,7 +138,7 @@ struct qfq_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ -- cgit From ecccd072b07e7fd09c54d0f86f9374e2645cde97 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 10 Jun 2013 13:17:21 -0700 Subject: mac80211: fix mesh deadlock The patch "cfg80211/mac80211: use cfg80211 wdev mutex in mac80211" introduced several deadlocks by converting the ifmsh->mtx to wdev->mtx. Solve these by: 1. drop the cancel_work_sync() in ieee80211_stop_mesh(). Instead make the mesh work conditional on whether the mesh is running or not. 2. lock the mesh work with sdata_lock() to protect beacon updates and prevent races with wdev->mesh_id_len or cfg80211. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 29 +++++++++++++++++------------ net/mac80211/mesh_plink.c | 7 +------ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 73a597bad6e0..d5faf91632c1 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -579,9 +579,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); - sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + @@ -788,12 +786,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - sdata_lock(sdata); bcn = rcu_dereference_protected(ifmsh->beacon, lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - sdata_unlock(sdata); /* flush STAs and mpaths on this iface */ sta_info_flush(sdata); @@ -806,14 +802,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); - /* - * If the timer fired while we waited for it, it will have - * requeued the work. Now the work will be running again - * but will not rearm the timer again because it checks - * whether the interface is running, which, at this point, - * it no longer is. - */ - cancel_work_sync(&sdata->work); local->fif_other_bss--; atomic_dec(&local->iff_allmultis); @@ -954,6 +942,12 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u16 stype; + sdata_lock(sdata); + + /* mesh already went down */ + if (!sdata->wdev.mesh_id_len) + goto out; + rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; @@ -971,12 +965,20 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } +out: + sdata_unlock(sdata); } void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + sdata_lock(sdata); + + /* mesh already went down */ + if (!sdata->wdev.mesh_id_len) + goto out; + if (ifmsh->preq_queue_len && time_after(jiffies, ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) @@ -996,6 +998,9 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags)) mesh_sync_adjust_tbtt(sdata); + +out: + sdata_unlock(sdata); } void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 6c4da99bc4fb..09bebed99416 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -517,9 +517,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); - sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); } static void mesh_plink_timer(unsigned long data) @@ -1070,9 +1068,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - if (changed) { - sdata_lock(sdata); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); - } } -- cgit From 8e7c053853b7d299e8a2b8733659b0df8eee51f7 Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Mon, 3 Jun 2013 09:53:39 -0700 Subject: {nl,cfg}80211: make peer link expiration time configurable If a STA has a peer that it hasn't seen any tx activity from for a certain length of time, the peer link is expired. This means the inactive STA is removed from the list of peers and that STA is not considered a peer again unless it re-peers. Previously, this inactivity time was always 30 minutes. Now, add it to the mesh configuration and allow it to be configured. Retain 30 minutes as a default value. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/wireless/mesh.c | 2 ++ net/wireless/nl80211.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 5dfb289ab761..0daaf72e1b81 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -18,6 +18,7 @@ #define MESH_PATH_TO_ROOT_TIMEOUT 6000 #define MESH_ROOT_INTERVAL 5000 #define MESH_ROOT_CONFIRMATION_INTERVAL 2000 +#define MESH_DEFAULT_PLINK_TIMEOUT 1800 /* timeout in seconds */ /* * Minimum interval between two consecutive PREQs originated by the same @@ -75,6 +76,7 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, .power_mode = NL80211_MESH_POWER_ACTIVE, .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, + .plink_timeout = MESH_DEFAULT_PLINK_TIMEOUT, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 88e820b73674..8aa83c04d4eb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4575,7 +4575,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, cur_params.power_mode) || nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, - cur_params.dot11MeshAwakeWindowDuration)) + cur_params.dot11MeshAwakeWindowDuration) || + nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT, + cur_params.plink_timeout)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -4616,6 +4618,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, + [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, }; static const struct nla_policy @@ -4753,6 +4756,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff, + mask, NL80211_MESHCONF_PLINK_TIMEOUT, + nla_get_u32); if (mask_out) *mask_out = mask; -- cgit From 66de671374f003467b5ef7c65ecbe1930480c8c9 Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Mon, 3 Jun 2013 09:53:40 -0700 Subject: mac80211: expire mesh peers based on mesh configuration The time it takes to see the peer link expire may differ by a minute since sta_expire() is run once a minute as a mesh housekeeping task. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ net/mac80211/mesh.c | 2 +- net/mac80211/mesh.h | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 30622101d3b5..344a57968079 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1871,6 +1871,8 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) conf->dot11MeshAwakeWindowDuration = nconf->dot11MeshAwakeWindowDuration; + if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask)) + conf->plink_timeout = nconf->plink_timeout; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d5faf91632c1..4ee527f78677 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -575,7 +575,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 changed; - ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); + ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 8b4d9a3e9eee..01a28bca6e9b 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -188,7 +188,6 @@ struct mesh_rmc { u32 idx_mask; }; -#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define MESH_PATH_EXPIRE (600 * HZ) -- cgit From ffb3cf3000aa12facdccbdfcb10bfebda7199209 Mon Sep 17 00:00:00 2001 From: Ashok Nagarajan Date: Mon, 3 Jun 2013 10:33:36 -0700 Subject: {nl,mac,cfg}80211: Allow user to configure basic rates for mesh Currently mesh uses mandatory rates as the default basic rates. Allow basic rates to be configured during mesh join. Basic rates are applied only if channel is also provided with mesh join command. Signed-off-by: Ashok Nagarajan [some whitespace fixes, refuse basic rates w/o channel] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 1 + net/mac80211/mesh.c | 4 ---- net/wireless/mesh.c | 10 ++++++++++ net/wireless/nl80211.c | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 344a57968079..cd6f35f6e714 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1759,6 +1759,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, /* mcast rate setting in Mesh Node */ memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); + sdata->vif.bss_conf.basic_rates = setup->basic_rates; sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4ee527f78677..6c33af482df4 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -738,9 +738,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_supported_band *sband = - sdata->local->hw.wiphy->bands[band]; local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -758,7 +755,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; - sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sband); changed |= ieee80211_mps_local_status_update(sdata); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0daaf72e1b81..30c49202ee4d 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -162,6 +162,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, setup->chandef.center_freq1 = setup->chandef.chan->center_freq; } + /* + * check if basic rates are available otherwise use mandatory rates as + * basic rates + */ + if (!setup->basic_rates) { + struct ieee80211_supported_band *sband = + rdev->wiphy.bands[setup->chandef.chan->band]; + setup->basic_rates = ieee80211_mandatory_rates(sband); + } + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) return -EINVAL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8aa83c04d4eb..687cb6497598 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7487,6 +7487,23 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.chandef.chan = NULL; } + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + int n_rates = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + struct ieee80211_supported_band *sband; + + if (!setup.chandef.chan) + return -EINVAL; + + sband = rdev->wiphy.bands[setup.chandef.chan->band]; + + err = ieee80211_get_ratemask(sband, rates, n_rates, + &setup.basic_rates); + if (err) + return err; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } -- cgit From 3d124ea27ae2fc895f81725f0b4c7f3d9c733df4 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 27 May 2013 18:24:02 +0300 Subject: cfg80211: fix VHT TDLS peer AID verification I (Johannes) accidentally applied the first version of the patch ("Allow TDLS peer AID to be configured for VHT"). Now apply just the changes between v1 and v2 to get the AID verification and prefer the new attribute over the old one. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 687cb6497598..7183410fcd41 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3972,10 +3972,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - if (info->attrs[NL80211_ATTR_STA_AID]) - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); - else + if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); + else + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; @@ -4027,7 +4027,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* TDLS peers cannot be added */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || + info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); @@ -4053,7 +4054,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) return -EINVAL; /* TDLS peers cannot be added */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || + info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; break; case NL80211_IFTYPE_STATION: -- cgit From f7aeb6fb1a3d6b09623b169518314bc7869fffec Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 11 Jun 2013 14:20:00 +0200 Subject: mac80211: make mgmt_tx accept a NULL channel cfg80211 passes a NULL channel to mgmt_tx if the frame has to be sent on the one currently in use by the device. Make the implementation of mgmt_tx correctly handle this case. Fail if offchan is required. Signed-off-by: Antonio Quartulli [fix RCU locking] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index cd6f35f6e714..64cf294c2b96 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2841,6 +2841,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, return -EOPNOTSUPP; } + /* configurations requiring offchan cannot work if no channel has been + * specified + */ + if (need_offchan && !chan) + return -EINVAL; + mutex_lock(&local->mtx); /* Check if the operating channel is the requested channel */ @@ -2850,10 +2856,15 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (chanctx_conf) - need_offchan = chan != chanctx_conf->def.chan; - else + if (chanctx_conf) { + need_offchan = chan && (chan != chanctx_conf->def.chan); + } else if (!chan) { + ret = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } else { need_offchan = true; + } rcu_read_unlock(); } -- cgit From ea141b75ae29636b5c9e9d2e2e77b3dd1ab4c934 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 11 Jun 2013 14:20:03 +0200 Subject: nl80211: allow sending CMD_FRAME without specifying any frequency Users may want to send a frame on the current channel without specifying it. This is particularly useful for the correct implementation of the IBSS/RSN support in wpa_supplicant which requires to receive and send AUTH frames. Make mgmt_tx pass a NULL channel to the driver if none has been specified by the user. Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7183410fcd41..398ce2c59686 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7147,6 +7147,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; switch (wdev->iftype) { + case NL80211_IFTYPE_P2P_DEVICE: + if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) + return -EINVAL; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: @@ -7154,7 +7157,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -7182,9 +7184,18 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); - err = nl80211_parse_chandef(rdev, info, &chandef); - if (err) - return err; + /* get the channel if any has been specified, otherwise pass NULL to + * the driver. The latter will use the current one + */ + chandef.chan = NULL; + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + } + + if (!chandef.chan && offchan) + return -EINVAL; if (!dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); -- cgit From 940d0ac9dbe3fb9d4806e96f006286c2e476deed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Jun 2013 16:51:03 +0200 Subject: cfg80211: fix rtnl leak in wiphy dump error cases In two wiphy dump error cases, most often when the dump allocation must be increased, the RTNL is leaked. This quickly results in a complete system lockup. Release the RTNL correctly. Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 398ce2c59686..e4028197b75d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1541,8 +1541,10 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) + if (!netdev) { + rtnl_unlock(); return -ENODEV; + } if (netdev->ieee80211_ptr) { dev = wiphy_to_dev( netdev->ieee80211_ptr->wiphy); @@ -1586,6 +1588,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) !skb->len && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; + rtnl_unlock(); return 1; } idx--; -- cgit From 130d3d68b52097c7ae081109f700b02776adcb9c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 13:56:19 -0700 Subject: net_sched: psched_ratecfg_precompute() improvements Before allowing 64bits bytes rates, refactor psched_ratecfg_precompute() to get better comments and increased accuracy. rate_bps field is renamed to rate_bytes_ps, as we only have to worry about bytes per second. Signed-off-by: Eric Dumazet Cc: Ben Greear Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc28..4626cef4b76e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -901,37 +901,33 @@ void dev_shutdown(struct net_device *dev) void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf) { - u64 factor; - u64 mult; - int shift; - memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bps = (u64)conf->rate << 3; + r->rate_bytes_ps = conf->rate; r->mult = 1; /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) + if (r->rate_bytes_ps > 0) { + u64 factor = NSEC_PER_SEC; + + for (;;) { + r->mult = div64_u64(factor, r->rate_bytes_ps); + if (r->mult & (1U << 31) || factor & (1ULL << 63)) break; + factor <<= 1; + r->shift++; } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); } } EXPORT_SYMBOL(psched_ratecfg_precompute); -- cgit From 64153ce0a7b61b2a5cacb01805cbf670142339e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 14:53:16 -0700 Subject: net_sched: htb: do not setup default rate estimators With a thousand htb classes, est_timer() spends ~5 million cpu cycles and throws out cpu cache, because each htb class has a default rate estimator (est 4sec 16sec). Most users do not use default rate estimators, so switch htb to not setup ones. Add a module parameter (htb_rate_est) so that users relying on this default rate estimator can revert the behavior. echo 1 >/sys/module/sch_htb/parameters/htb_rate_est Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 162fb800754c..1a3655a606c1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -65,6 +65,10 @@ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis f module_param (htb_hysteresis, int, 0640); MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); +static int htb_rate_est = 0; /* htb classes have a default rate estimator */ +module_param(htb_rate_est, int, 0640); +MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -1366,12 +1370,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); - if (err) { - kfree(cl); - goto failure; + if (htb_rate_est || tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } } cl->refcnt = 1; -- cgit From e9897071350bd9d94a56b5b6f79c85b1a98fc7e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Jun 2013 08:48:57 -0700 Subject: igmp: hash a hash table to speedup ip_check_mc_rcu() After IP route cache removal, multicast applications using a lot of multicast addresses hit a O(N) behavior in ip_check_mc_rcu() Add a per in_device hash table to get faster lookup. This hash table is created only if the number of items in mc_list is above 4. Reported-by: Shawn Bohrer Signed-off-by: Eric Dumazet Tested-by: Shawn Bohrer Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 1 + net/ipv4/igmp.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b047e2d8a614..3469506c106d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); + kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 450f625361e4..f72011df9c59 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im) * Multicast list managers */ +static u32 ip_mc_hash(const struct ip_mc_list *im) +{ + return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG); +} + +static void ip_mc_hash_add(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash; + u32 hash; + + mc_hash = rtnl_dereference(in_dev->mc_hash); + if (mc_hash) { + hash = ip_mc_hash(im); + im->next_hash = rtnl_dereference(mc_hash[hash]); + rcu_assign_pointer(mc_hash[hash], im); + return; + } + + /* do not use a hash table for small number of items */ + if (in_dev->mc_count < 4) + return; + + mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, + GFP_KERNEL); + if (!mc_hash) + return; + + for_each_pmc_rtnl(in_dev, im) { + hash = ip_mc_hash(im); + im->next_hash = rtnl_dereference(mc_hash[hash]); + RCU_INIT_POINTER(mc_hash[hash], im); + } + + rcu_assign_pointer(in_dev->mc_hash, mc_hash); +} + +static void ip_mc_hash_remove(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); + struct ip_mc_list *aux; + + if (!mc_hash) + return; + mc_hash += ip_mc_hash(im); + while ((aux = rtnl_dereference(*mc_hash)) != im) + mc_hash = &aux->next_hash; + *mc_hash = im->next_hash; +} + /* * A socket has joined a multicast group on device dev. @@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) in_dev->mc_count++; rcu_assign_pointer(in_dev->mc_list, im); + ip_mc_hash_add(in_dev, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { + ip_mc_hash_remove(in_dev, i); *ip = i->next_rcu; in_dev->mc_count--; igmp_group_dropped(i); @@ -2321,12 +2375,25 @@ void ip_mc_drop_socket(struct sock *sk) int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; + struct ip_mc_list __rcu **mc_hash; struct ip_sf_list *psf; int rv = 0; - for_each_pmc_rcu(in_dev, im) { - if (im->multiaddr == mc_addr) - break; + mc_hash = rcu_dereference(in_dev->mc_hash); + if (mc_hash) { + u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG); + + for (im = rcu_dereference(mc_hash[hash]); + im != NULL; + im = rcu_dereference(im->next_hash)) { + if (im->multiaddr == mc_addr) + break; + } + } else { + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == mc_addr) + break; + } } if (im && proto == IPPROTO_IGMP) { rv = 1; -- cgit From 946d3bd7231be3b6202759ea0bea59989ae28c4a Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Fri, 7 Jun 2013 12:34:43 -0500 Subject: igmp: remove unnecessary in_device member zeroing ip_mc_init_dev() is passed a freshly kzalloc'd in_device so it is unnecessary to explicitly zero out the members. Signed-off-by: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f72011df9c59..a09190ddffba 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1435,13 +1435,9 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST - in_dev->mr_gq_running = 0; setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); - in_dev->mr_ifc_count = 0; - in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; -- cgit From da5bab079f9b7d90ba234965a14914ace55e45e9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 8 Jun 2013 12:56:03 +0200 Subject: net: udp4: move GSO functions to udp_offload Similarly to TCP offloading and UDPv6 offloading, move all related UDPv4 functions to udp_offload.c to make things more explicit. Also, by this, we can make those functions static. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/Makefile | 2 +- net/ipv4/af_inet.c | 9 +---- net/ipv4/udp.c | 75 +------------------------------------ net/ipv4/udp_offload.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 82 deletions(-) create mode 100644 net/ipv4/udp_offload.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4d3e138c564f..7fcf8101d85f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -9,7 +9,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ - arp.o icmp.o devinet.o af_inet.o igmp.o \ + udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7b514290efc6..5598b06d62db 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1566,13 +1566,6 @@ static const struct net_protocol udp_protocol = { .netns_ok = 1, }; -static const struct net_offload udp_offload = { - .callbacks = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, - }, -}; - static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, @@ -1672,7 +1665,7 @@ static int __init ipv4_offload_init(void) /* * Add offloads */ - if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (tcpv4_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2955b25aee6d..f65bc32c0266 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2290,29 +2290,8 @@ void __init udp_init(void) sysctl_udp_wmem_min = SK_MEM_QUANTUM; } -int udp4_ufo_send_check(struct sk_buff *skb) -{ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - return -EINVAL; - - if (likely(!skb->encapsulation)) { - const struct iphdr *iph; - struct udphdr *uh; - - iph = ip_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - } - return 0; -} - -static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); int mac_len = skb->mac_len; @@ -2371,53 +2350,3 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, out: return segs; } - -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE | SKB_GSO_MPLS) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } -out: - return segs; -} diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c new file mode 100644 index 000000000000..f35eccaa855e --- /dev/null +++ b/net/ipv4/udp_offload.c @@ -0,0 +1,100 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * UDPv4 GSO support + */ + +#include +#include +#include + +static int udp4_ufo_send_check(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + return -EINVAL; + + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; + + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } + + return 0; +} + +static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_GRE | SKB_GSO_MPLS) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } +out: + return segs; +} + +static const struct net_offload udpv4_offload = { + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, +}; + +int __init udpv4_offload_init(void) +{ + return inet_add_offload(&udpv4_offload, IPPROTO_UDP); +} -- cgit From 7a6e288d2745611bef5b614acf19644283765732 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 8 Jun 2013 14:18:16 +0200 Subject: pktgen: ipv6: numa: consolidate skb allocation to pktgen_alloc_skb We currently allow for numa-node aware skb allocation only within the fill_packet_ipv4() path, but not in fill_packet_ipv6(). Consolidate that code to a common allocation helper to enable numa-node aware skb allocation for ipv6, and use it in both paths. This also makes both functions a bit more readable. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/pktgen.c | 52 +++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 303412d8332b..9640972ec50e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2627,6 +2627,29 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_usec = htonl(timestamp.tv_usec); } +static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, + struct pktgen_dev *pkt_dev, + unsigned int extralen) +{ + struct sk_buff *skb = NULL; + unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + + pkt_dev->pkt_overhead; + + if (pkt_dev->flags & F_NODE) { + int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); + + skb = __alloc_skb(NET_SKB_PAD + size, GFP_NOWAIT, 0, node); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } + } else { + skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); + } + + return skb; +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2657,32 +2680,13 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; - if (pkt_dev->flags & F_NODE) { - int node; - - if (pkt_dev->node >= 0) - node = pkt_dev->node; - else - node = numa_node_id(); - - skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = odev; - } - } - else - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); - + skb = pktgen_alloc_skb(odev, pkt_dev, datalen); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, datalen); /* Reserve for ethernet and IP header */ @@ -2786,15 +2790,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT); + skb = pktgen_alloc_skb(odev, pkt_dev, 16); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ -- cgit From c70eba74532a9b54583689fead6e2e8f3a86e1c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2013 14:11:16 -0700 Subject: igmp: fix new sparse errors Fix following sparse errors : net/ipv4/igmp.c:1222:25: warning: cast from restricted __be32 net/ipv4/igmp.c:1234:31: warning: incorrect type in assignment (different address spaces) net/ipv4/igmp.c:1234:31: expected struct ip_mc_list [noderef] *next_hash net/ipv4/igmp.c:1234:31: got struct ip_mc_list * net/ipv4/igmp.c:1250:31: warning: incorrect type in assignment (different address spaces) net/ipv4/igmp.c:1250:31: expected struct ip_mc_list [noderef] *next_hash net/ipv4/igmp.c:1250:31: got struct ip_mc_list * net/ipv4/igmp.c:2380:37: warning: cast from restricted __be32 These were added by commit e9897071350bd9 ("igmp: hash a hash table to speedup ip_check_mc_rcu()") Reported-by: kbuild test robot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a09190ddffba..cd71190d2962 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1219,7 +1219,7 @@ static void igmp_group_added(struct ip_mc_list *im) static u32 ip_mc_hash(const struct ip_mc_list *im) { - return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG); + return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG); } static void ip_mc_hash_add(struct in_device *in_dev, @@ -1231,7 +1231,7 @@ static void ip_mc_hash_add(struct in_device *in_dev, mc_hash = rtnl_dereference(in_dev->mc_hash); if (mc_hash) { hash = ip_mc_hash(im); - im->next_hash = rtnl_dereference(mc_hash[hash]); + im->next_hash = mc_hash[hash]; rcu_assign_pointer(mc_hash[hash], im); return; } @@ -1247,7 +1247,7 @@ static void ip_mc_hash_add(struct in_device *in_dev, for_each_pmc_rtnl(in_dev, im) { hash = ip_mc_hash(im); - im->next_hash = rtnl_dereference(mc_hash[hash]); + im->next_hash = mc_hash[hash]; RCU_INIT_POINTER(mc_hash[hash], im); } @@ -2377,7 +2377,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u mc_hash = rcu_dereference(in_dev->mc_hash); if (mc_hash) { - u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG); + u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG); for (im = rcu_dereference(mc_hash[hash]); im != NULL; -- cgit From 5b9b6263775d45ccc0c8b27344bfb1a97cf6f725 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2013 14:23:15 -0700 Subject: gro: remove a sparse error Fix following sparse error : net/ipv4/af_inet.c:1410:59: warning: restricted __be16 degrades to integer added in commit db8caf3dbc77599 ("gro: should aggregate frames without DF") Reported-by: kbuild test robot From: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5598b06d62db..b4d0be2b7ce9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1407,7 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | - ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | + (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; -- cgit From 7c0cadc69ca2ac8893aa162ee80d92a805840909 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2013 14:31:39 -0700 Subject: udp: fix two sparse errors commit ba418fa357a7b3c ("soreuseport: UDP/IPv4 implementation") added following sparse errors : net/ipv4/udp.c:433:60: warning: cast from restricted __be16 net/ipv4/udp.c:433:60: warning: incorrect type in argument 1 (different base types) net/ipv4/udp.c:433:60: expected unsigned short [unsigned] [usertype] val net/ipv4/udp.c:433:60: got restricted __be16 [usertype] sport net/ipv4/udp.c:433:60: warning: cast from restricted __be16 net/ipv4/udp.c:433:60: warning: cast from restricted __be16 net/ipv4/udp.c:514:60: warning: cast from restricted __be16 net/ipv4/udp.c:514:60: warning: incorrect type in argument 1 (different base types) net/ipv4/udp.c:514:60: expected unsigned short [unsigned] [usertype] val net/ipv4/udp.c:514:60: got restricted __be16 [usertype] sport net/ipv4/udp.c:514:60: warning: cast from restricted __be16 net/ipv4/udp.c:514:60: warning: cast from restricted __be16 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f65bc32c0266..959502afd8d9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -430,7 +430,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -511,7 +511,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { -- cgit From 661eb3811df568161399af0048f1ecb4ac073687 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2013 22:47:56 +0200 Subject: mac80211: fix TX aggregation TID struct leak Ben reports that kmemleak is saying TX aggregation TID structs are leaked. Given his workload, I suspect that they're leaked because stations are destroyed before their aggregation sessions get a chance to start. Fix this by simply freeing structs that are not used yet. Reported-by: Ben Greear Tested-by: Ben Greear Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index b4297982d34a..aaf68d297221 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -149,6 +149,7 @@ static void cleanup_single_sta(struct sta_info *sta) * directly by station destruction. */ for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + kfree(sta->ampdu_mlme.tid_start_tx[i]); tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); if (!tid_tx) continue; -- cgit From ca15febfe98f7c681ac345fc1d2ee1b8decaa493 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 13 Jun 2013 10:05:38 +0800 Subject: netlink: make compare exist all the time Commit da12c90e099789a63073fc82a19542ce54d4efb9 "netlink: Add compare function for netlink_table" only set compare at the time we create kernel netlink, and reset compare to NULL at the time we finially release netlink socket, but netlink_lookup wants the compare exist always. So we should set compare after we allocate nl_table, and never reset it. make comapre exist all the time. Reported-by: Fengguang Wu Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b6b115e008f..8978755251f7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1174,7 +1174,6 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; - nl_table[sk->sk_protocol].compare = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -2326,7 +2325,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, rcu_assign_pointer(nl_table[unit].listeners, listeners); nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; - nl_table[unit].compare = netlink_compare; if (cfg) { nl_table[unit].bind = cfg->bind; nl_table[unit].flags = cfg->flags; @@ -2973,6 +2971,8 @@ static int __init netlink_proto_init(void) hash->shift = 0; hash->mask = 0; hash->rehash_time = jiffies; + + nl_table[i].compare = netlink_compare; } netlink_add_usersock_entry(); -- cgit From 2c928e0e8dd6b3661870bfacb53d1c330a1a7411 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 13 Jun 2013 16:04:33 +0900 Subject: sctp: Correct byte order of access to skb->{network, transport}_header Corrects an byte order conflict introduced by 158874cac61245b84e939c92c53db7000122b7b0 ("sctp: Correct access to skb->{network, transport}_header"). The values in question are host byte order. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8ee553b499ce..fffc7b62a9a8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -153,7 +153,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sctp_association *asoc; struct sctp_transport *transport; struct ipv6_pinfo *np; - __be16 saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); -- cgit From e562078a19226660299eeaf40a50752672214f11 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 12 Jun 2013 14:08:44 -0700 Subject: mac80211: Ensure tid_start_tx is protected by sta->lock All accesses of the tid_start_tx lock should be protected by sta->lock if there is any chance that another thread could still be accessing the sta object. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 4 +++- net/mac80211/sta_info.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 75dff338f581..f83534f6a2ee 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -281,13 +281,14 @@ void ieee80211_ba_session_work(struct work_struct *work) sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); + spin_lock_bh(&sta->lock); + tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; if (tid_tx) { /* * Assign it over to the normal tid_tx array * where it "goes live". */ - spin_lock_bh(&sta->lock); sta->ampdu_mlme.tid_start_tx[tid] = NULL; /* could there be a race? */ @@ -300,6 +301,7 @@ void ieee80211_ba_session_work(struct work_struct *work) ieee80211_tx_ba_session_handle_start(sta, tid); continue; } + spin_unlock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index bd12fc54266c..4208dbd5861f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -203,6 +203,7 @@ struct tid_ampdu_rx { * driver requested to close until the work for it runs * @mtx: mutex to protect all TX data (except non-NULL assignments * to tid_tx[idx], which are protected by the sta spinlock) + * tid_start_tx is also protected by sta->lock. */ struct sta_ampdu_mlme { struct mutex mtx; -- cgit From a06a2d378dbf099c874ad58de07a8e54ffdc94d3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 12 Jun 2013 21:04:16 +0800 Subject: net: ping_check_bind_addr() etc. can be static net/ipv4/ping.c:286:5: sparse: symbol 'ping_check_bind_addr' was not declared. Should it be static? net/ipv4/ping.c:355:6: sparse: symbol 'ping_set_saddr' was not declared. Should it be static? net/ipv4/ping.c:370:6: sparse: symbol 'ping_clear_saddr' was not declared. Should it be static? net/ipv6/ping.c:60:5: sparse: symbol 'dummy_ipv6_recv_error' was not declared. Should it be static? net/ipv6/ping.c:64:5: sparse: symbol 'dummy_ip6_datagram_recv_ctl' was not declared. Should it be static? net/ipv6/ping.c:69:5: sparse: symbol 'dummy_icmpv6_err_convert' was not declared. Should it be static? net/ipv6/ping.c:73:6: sparse: symbol 'dummy_ipv6_icmp_error' was not declared. Should it be static? net/ipv6/ping.c:75:5: sparse: symbol 'dummy_ipv6_chk_addr' was not declared. Should it be static? net/ipv6/ping.c:201:5: sparse: symbol 'ping_v6_seq_show' was not declared. Should it be static? Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/ipv4/ping.c | 8 ++++---- net/ipv6/ping.c | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 1f1b2dd90277..746427c9e719 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -283,8 +283,8 @@ void ping_close(struct sock *sk, long timeout) EXPORT_SYMBOL_GPL(ping_close); /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ -int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, - struct sockaddr *uaddr, int addr_len) { +static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; @@ -352,7 +352,7 @@ int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return 0; } -void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) { if (saddr->sa_family == AF_INET) { struct inet_sock *isk = inet_sk(sk); @@ -367,7 +367,7 @@ void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) } } -void ping_clear_saddr(struct sock *sk, int dif) +static void ping_clear_saddr(struct sock *sk, int dif) { sk->sk_bound_dev_if = dif; if (sk->sk_family == AF_INET) { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a43110385918..2b52046e1263 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,23 +57,23 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) { return -EAFNOSUPPORT; } -int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, - struct sk_buff *skb) +static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) { return -EAFNOSUPPORT; } -int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) { return -EAFNOSUPPORT; } -void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, - __be16 port, u32 info, u8 *payload) {} -int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict) +static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) { return 0; } @@ -198,7 +198,7 @@ static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) return ping_seq_start(seq, pos, AF_INET6); } -int ping_v6_seq_show(struct seq_file *seq, void *v) +static int ping_v6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); -- cgit From 194f4a6df2a92c3d0bc65a85facfbc2433b25d06 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 11 Jun 2013 23:09:29 +0200 Subject: net: make all team port device link events urgent Since team functionality relies heavily on userspace daemon, we need to deliver event to userspace via Netlink as quick as possible. So make all team port device link events urgent. Signed-off-by: Flavio Leitner Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/link_watch.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8f82a5cc3851..9c3a839322ba 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -92,6 +92,9 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (dev->ifindex != dev->iflink) return true; + if (dev->priv_flags & IFF_TEAM_PORT) + return true; + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } -- cgit From fe2c6338fd2c6f383c4d4164262f35c8f3708e1f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Jun 2013 23:04:25 -0700 Subject: net: Convert uses of typedef ctl_table to struct ctl_table Reduce the uses of this unnecessary typedef. Done via perl script: $ git grep --name-only -w ctl_table net | \ xargs perl -p -i -e '\ sub trim { my ($local) = @_; $local =~ s/(^\s+|\s+$)//g; return $local; } \ s/\b(? Signed-off-by: David S. Miller --- net/ax25/sysctl_net_ax25.c | 2 +- net/bridge/br_netfilter.c | 4 ++-- net/core/neighbour.c | 6 ++--- net/core/sysctl_net_core.c | 8 +++---- net/decnet/dn_dev.c | 6 ++--- net/decnet/sysctl_net_decnet.c | 6 ++--- net/ipv4/devinet.c | 6 ++--- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/route.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 31 +++++++++++++------------- net/ipv6/addrconf.c | 10 ++++----- net/ipv6/icmp.c | 2 +- net/ipv6/route.c | 4 ++-- net/ipv6/sysctl_net_ipv6.c | 4 ++-- net/irda/irsysctl.c | 6 ++--- net/netfilter/ipvs/ip_vs_ctl.c | 8 +++---- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 4 ++-- net/netfilter/nf_log.c | 2 +- net/netrom/sysctl_net_netrom.c | 2 +- net/phonet/sysctl.c | 4 ++-- net/rds/ib_sysctl.c | 2 +- net/rds/iw_sysctl.c | 2 +- net/rds/sysctl.c | 2 +- net/rose/sysctl_net_rose.c | 2 +- net/sctp/sysctl.c | 10 ++++----- net/sunrpc/sysctl.c | 10 ++++----- net/sunrpc/xprtrdma/svc_rdma.c | 8 +++---- net/sunrpc/xprtrdma/transport.c | 4 ++-- net/sunrpc/xprtsock.c | 4 ++-- net/unix/sysctl_net_unix.c | 2 +- 32 files changed, 86 insertions(+), 85 deletions(-) (limited to 'net') diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index d5744b752511..919a5ce47515 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -29,7 +29,7 @@ static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; #endif -static const ctl_table ax25_param_table[] = { +static const struct ctl_table ax25_param_table[] = { { .procname = "ip_default_mode", .maxlen = sizeof(int), diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1ed75bfd8d1d..f87736270eaa 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -992,7 +992,7 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table * ctl, int write, +int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, void __user * buffer, size_t * lenp, loff_t * ppos) { int ret; @@ -1004,7 +1004,7 @@ int brnf_sysctl_call_tables(ctl_table * ctl, int write, return ret; } -static ctl_table brnf_table[] = { +static struct ctl_table brnf_table[] = { { .procname = "bridge-nf-call-arptables", .data = &brnf_call_arptables, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5c56b217b999..decaa4b9db2f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2765,11 +2765,11 @@ EXPORT_SYMBOL(neigh_app_ns); static int zero; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); -static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { int size, ret; - ctl_table tmp = *ctl; + struct ctl_table tmp = *ctl; tmp.extra1 = &zero; tmp.extra2 = &unres_qlen_max; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4b48f39582b0..637a42e5d589 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,12 +24,12 @@ static int one = 1; #ifdef CONFIG_RPS -static int rps_sock_flow_sysctl(ctl_table *table, int write, +static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned int orig_size, size; int ret, i; - ctl_table tmp = { + struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode @@ -91,7 +91,7 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, #ifdef CONFIG_NET_FLOW_LIMIT static DEFINE_MUTEX(flow_limit_update_mutex); -static int flow_limit_cpu_sysctl(ctl_table *table, int write, +static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -156,7 +156,7 @@ done: return ret; } -static int flow_limit_table_len_sysctl(ctl_table *table, int write, +static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 7d9197063ebb..dd0dfb25f4b1 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -158,11 +158,11 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(ctl_table *, int, +static int dn_forwarding_proc(struct ctl_table *, int, void __user *, size_t *, loff_t *); static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table dn_dev_vars[5]; + struct ctl_table dn_dev_vars[5]; } dn_dev_sysctl = { NULL, { @@ -242,7 +242,7 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } } -static int dn_forwarding_proc(ctl_table *table, int write, +static int dn_forwarding_proc(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index a55eeccaa72f..5325b541c526 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) return 0; } -static int dn_node_address_handler(ctl_table *table, int write, +static int dn_node_address_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -183,7 +183,7 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } -static int dn_def_dev_handler(ctl_table *table, int write, +static int dn_def_dev_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -246,7 +246,7 @@ static int dn_def_dev_handler(ctl_table *table, int write, return 0; } -static ctl_table dn_table[] = { +static struct ctl_table dn_table[] = { { .procname = "node_address", .maxlen = 7, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3469506c106d..8d48c392adcc 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1942,7 +1942,7 @@ static void inet_forward_change(struct net *net) } } -static int devinet_conf_proc(ctl_table *ctl, int write, +static int devinet_conf_proc(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -1985,7 +1985,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_sysctl_forward(ctl_table *ctl, int write, +static int devinet_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2028,7 +2028,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, return ret; } -static int ipv4_doint_and_flush(ctl_table *ctl, int write, +static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 567d84168bd2..0a2e0e3e95ba 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -223,7 +223,7 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static ctl_table ip_ct_sysctl_table[] = { +static struct ctl_table ip_ct_sysctl_table[] = { { .procname = "ip_conntrack_max", .maxlen = sizeof(int), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 198ea596f2d9..f3fa42eac461 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2448,7 +2448,7 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; -static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, +static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2463,7 +2463,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, return -EINVAL; } -static ctl_table ipv4_route_table[] = { +static struct ctl_table ipv4_route_table[] = { { .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fa2f63fc453b..b2c123c44d69 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -49,13 +49,13 @@ static void set_local_port_range(int range[2]) } /* Validate changes from /proc interface. */ -static int ipv4_local_port_range(ctl_table *table, int write, +static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2]; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, @@ -100,7 +100,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig } /* Validate changes from /proc interface. */ -static int ipv4_ping_group_range(ctl_table *table, int write, +static int ipv4_ping_group_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -108,7 +108,7 @@ static int ipv4_ping_group_range(ctl_table *table, int write, int ret; gid_t urange[2]; kgid_t low, high; - ctl_table tmp = { + struct ctl_table tmp = { .data = &urange, .maxlen = sizeof(urange), .mode = table->mode, @@ -135,11 +135,11 @@ static int ipv4_ping_group_range(ctl_table *table, int write, return ret; } -static int proc_tcp_congestion_control(ctl_table *ctl, int write, +static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; - ctl_table tbl = { + struct ctl_table tbl = { .data = val, .maxlen = TCP_CA_NAME_MAX, }; @@ -153,12 +153,12 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, return ret; } -static int proc_tcp_available_congestion_control(ctl_table *ctl, +static int proc_tcp_available_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -170,12 +170,12 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, return ret; } -static int proc_allowed_congestion_control(ctl_table *ctl, +static int proc_allowed_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -190,7 +190,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int ipv4_tcp_mem(ctl_table *ctl, int write, +static int ipv4_tcp_mem(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -201,7 +201,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, struct mem_cgroup *memcg; #endif - ctl_table tmp = { + struct ctl_table tmp = { .data = &vec, .maxlen = sizeof(vec), .mode = ctl->mode, @@ -233,10 +233,11 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } -static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) { - ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; int ret; u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 21010fddb203..80449121afa2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4620,13 +4620,13 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL static -int addrconf_sysctl_forward(ctl_table *ctl, int write, +int addrconf_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4705,13 +4705,13 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } static -int addrconf_sysctl_disable(ctl_table *ctl, int write, +int addrconf_sysctl_disable(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4733,7 +4733,7 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write, static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[DEVCONF_MAX+1]; + struct ctl_table addrconf_vars[DEVCONF_MAX+1]; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4b4890bbe16d..7cfc8d284870 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -976,7 +976,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -ctl_table ipv6_icmp_table_template[] = { +struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2b874185ebb2..7ca87b37c0ef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2790,7 +2790,7 @@ static const struct file_operations rt6_stats_seq_fops = { #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, +int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net; @@ -2805,7 +2805,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, return 0; } -ctl_table ipv6_route_table_template[] = { +struct ctl_table ipv6_route_table_template[] = { { .procname = "flush", .data = &init_net.ipv6.sysctl.flush_delay, diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e85c48bd404f..107b2f1d90ae 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,7 +16,7 @@ #include #include -static ctl_table ipv6_table_template[] = { +static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, @@ -27,7 +27,7 @@ static ctl_table ipv6_table_template[] = { { } }; -static ctl_table ipv6_rotable[] = { +static struct ctl_table ipv6_rotable[] = { { .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index de73f6496db5..d6a59651767a 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -73,7 +73,7 @@ static int min_lap_keepalive_time = 100; /* 100us */ /* For other sysctl, I've no idea of the range. Maybe Dag could help * us on that - Jean II */ -static int do_devname(ctl_table *table, int write, +static int do_devname(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -90,7 +90,7 @@ static int do_devname(ctl_table *table, int write, } -static int do_discovery(ctl_table *table, int write, +static int do_discovery(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -111,7 +111,7 @@ static int do_discovery(ctl_table *table, int write, } /* One file */ -static ctl_table irda_table[] = { +static struct ctl_table irda_table[] = { { .procname = "discovery", .data = &sysctl_discovery, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index df05c1c276f0..edb88fbcb1bd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1575,7 +1575,7 @@ static int zero; static int three = 3; static int -proc_do_defense_mode(ctl_table *table, int write, +proc_do_defense_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; @@ -1596,7 +1596,7 @@ proc_do_defense_mode(ctl_table *table, int write, } static int -proc_do_sync_threshold(ctl_table *table, int write, +proc_do_sync_threshold(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1616,7 +1616,7 @@ proc_do_sync_threshold(ctl_table *table, int write, } static int -proc_do_sync_mode(ctl_table *table, int write, +proc_do_sync_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1634,7 +1634,7 @@ proc_do_sync_mode(ctl_table *table, int write, } static int -proc_do_sync_ports(ctl_table *table, int write, +proc_do_sync_ports(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 5ea26bd87743..44595b8ae37f 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -118,7 +118,7 @@ struct ip_vs_lblc_table { * IPVS LBLC sysctl table */ #ifdef CONFIG_SYSCTL -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", .data = NULL, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 50123c2ab484..876937db0bf4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -299,7 +299,7 @@ struct ip_vs_lblcr_table { * IPVS LBLCR sysctl table */ -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblcr_expiration", .data = NULL, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index bd700b4013c1..f641751dba9d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -408,7 +408,7 @@ static int log_invalid_proto_max = 255; static struct ctl_table_header *nf_ct_netfilter_header; -static ctl_table nf_ct_sysctl_table[] = { +static struct ctl_table nf_ct_sysctl_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, @@ -458,7 +458,7 @@ static ctl_table nf_ct_sysctl_table[] = { #define NET_NF_CONNTRACK_MAX 2089 -static ctl_table nf_ct_netfilter_table[] = { +static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 4b60a87b7596..85296d4eac0e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -245,7 +245,7 @@ static const struct file_operations nflog_file_ops = { static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static int nf_log_proc_dostring(ctl_table *table, int write, +static int nf_log_proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 42f630b9a698..ba1c368b3f18 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -34,7 +34,7 @@ static int min_reset[] = {0}, max_reset[] = {1}; static struct ctl_table_header *nr_table_header; -static ctl_table nr_table[] = { +static struct ctl_table nr_table[] = { { .procname = "default_path_quality", .data = &sysctl_netrom_default_path_quality, diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index d6bbbbd0af18..c02a8c4bc11f 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -61,13 +61,13 @@ void phonet_get_local_port_range(int *min, int *max) } while (read_seqretry(&local_port_range_lock, seq)); } -static int proc_local_port_range(ctl_table *table, int write, +static int proc_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2] = {local_port_range[0], local_port_range[1]}; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index 7e643bafb4af..e4e41b3afce7 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -61,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; */ unsigned int rds_ib_sysctl_flow_control = 0; -static ctl_table rds_ib_sysctl_table[] = { +static struct ctl_table rds_ib_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_ib_sysctl_max_send_wr, diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 5d5ebd576f3f..89c91515ed0c 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL; unsigned int rds_iw_sysctl_flow_control = 1; -static ctl_table rds_iw_sysctl_table[] = { +static struct ctl_table rds_iw_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_iw_sysctl_max_send_wr, diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 907214b4c4d0..b5cb2aa08f33 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -49,7 +49,7 @@ unsigned int rds_sysctl_max_unacked_bytes = (16 << 20); unsigned int rds_sysctl_ping_enable = 1; -static ctl_table rds_sysctl_rds_table[] = { +static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "reconnect_min_delay_ms", .data = &rds_sysctl_reconnect_min_jiffies, diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 94ca9c2ccd69..89a9278795a9 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -24,7 +24,7 @@ static int min_window[] = {1}, max_window[] = {7}; static struct ctl_table_header *rose_table_header; -static ctl_table rose_table[] = { +static struct ctl_table rose_table[] = { { .procname = "restart_request_timeout", .data = &sysctl_rose_restart_request_timeout, diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index bf3c6e8fc401..9a5c4c9eddaf 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -62,12 +62,12 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static ctl_table sctp_table[] = { +static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", .data = &sysctl_sctp_mem, @@ -93,7 +93,7 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; -static ctl_table sctp_net_table[] = { +static struct ctl_table sctp_net_table[] = { { .procname = "rto_initial", .data = &init_net.sctp.rto_initial, @@ -300,14 +300,14 @@ static ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; char tmp[8]; - ctl_table tbl; + struct ctl_table tbl; int ret; int changed = 0; char *none = "none"; diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index af7d339add9d..c99c58e2ee66 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug); #ifdef RPC_DEBUG static struct ctl_table_header *sunrpc_table_header; -static ctl_table sunrpc_table[]; +static struct ctl_table sunrpc_table[]; void rpc_register_sysctl(void) @@ -58,7 +58,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, +static int proc_do_xprt(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -73,7 +73,7 @@ static int proc_do_xprt(ctl_table *table, int write, } static int -proc_dodebug(ctl_table *table, int write, +proc_dodebug(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; @@ -135,7 +135,7 @@ done: } -static ctl_table debug_table[] = { +static struct ctl_table debug_table[] = { { .procname = "rpc_debug", .data = &rpc_debug, @@ -173,7 +173,7 @@ static ctl_table debug_table[] = { { } }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 8343737e85f4..c1b6270262c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -84,7 +84,7 @@ struct workqueue_struct *svc_rdma_wq; * resets the associated statistic to zero. Any read returns it's * current value. */ -static int read_reset_stat(ctl_table *table, int write, +static int read_reset_stat(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -119,7 +119,7 @@ static int read_reset_stat(ctl_table *table, int write, } static struct ctl_table_header *svcrdma_table_header; -static ctl_table svcrdma_parm_table[] = { +static struct ctl_table svcrdma_parm_table[] = { { .procname = "max_requests", .data = &svcrdma_max_requests, @@ -214,7 +214,7 @@ static ctl_table svcrdma_parm_table[] = { { }, }; -static ctl_table svcrdma_table[] = { +static struct ctl_table svcrdma_table[] = { { .procname = "svc_rdma", .mode = 0555, @@ -223,7 +223,7 @@ static ctl_table svcrdma_table[] = { { }, }; -static ctl_table svcrdma_root_table[] = { +static struct ctl_table svcrdma_root_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 794312f22b9b..285dc0884115 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -86,7 +86,7 @@ static unsigned int max_memreg = RPCRDMA_LAST - 1; static struct ctl_table_header *sunrpc_table_header; -static ctl_table xr_tunables_table[] = { +static struct ctl_table xr_tunables_table[] = { { .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, @@ -138,7 +138,7 @@ static ctl_table xr_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ffd50348a509..412de7cfcc80 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -87,7 +87,7 @@ static struct ctl_table_header *sunrpc_table_header; * FIXME: changing the UDP slot table size should also resize the UDP * socket buffers for existing UDP transports */ -static ctl_table xs_tunables_table[] = { +static struct ctl_table xs_tunables_table[] = { { .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, @@ -143,7 +143,7 @@ static ctl_table xs_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 8800604c93f4..b3d515021b74 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -15,7 +15,7 @@ #include -static ctl_table unix_table[] = { +static struct ctl_table unix_table[] = { { .procname = "max_dgram_qlen", .data = &init_net.unx.sysctl_max_dgram_qlen, -- cgit From 85f16525a2eb66e6092cbd8dcf42371df8334ed0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 11 Jun 2013 15:35:32 -0700 Subject: tcp: properly send new data in fast recovery in first RTT Linux sends new unset data during disorder and recovery state if all (suspected) lost packets have been retransmitted ( RFC5681, section 3.2 step 1 & 2, RFC3517 section 4, NexSeg() Rule 2). One requirement is to keep the receive window about twice the estimated sender's congestion window (tcp_rcv_space_adjust()), assuming the fast retransmits repair the losses in the next round trip. But currently it's not the case on the first round trip in either normal or Fast Open connection, beucase the initial receive window is identical to (expected) sender's initial congestion window. The fix is to double it. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 13 ++----------- net/ipv4/tcp_output.c | 33 ++++++++++++++++++--------------- 2 files changed, 20 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 907311c9a012..46271cdcf088 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -347,22 +347,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) } /* 3. Tuning rcvbuf, when connection enters established state. */ - static void tcp_fixup_rcvbuf(struct sock *sk) { u32 mss = tcp_sk(sk)->advmss; - u32 icwnd = TCP_DEFAULT_INIT_RCVWND; int rcvmem; - /* Limit to 10 segments if mss <= 1460, - * or 14600/mss segments, with a minimum of two segments. - */ - if (mss > 1460) - icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - - rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER); - - rcvmem *= icwnd; + rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * + tcp_default_init_rwnd(mss); if (sk->sk_rcvbuf < rcvmem) sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ec335fabd5cc..3dd46eab3b05 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -181,6 +181,21 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } + +u32 tcp_default_init_rwnd(u32 mss) +{ + /* Initial receive window should be twice of TCP_INIT_CWND to + * enable proper sending of new unset data during fast recovery + * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a + * limit when mss is larger than 1460. + */ + u32 init_rwnd = TCP_INIT_CWND * 2; + + if (mss > 1460) + init_rwnd = max((1460 * init_rwnd) / mss, 2U); + return init_rwnd; +} + /* Determine a window scaling and initial window to offer. * Based on the assumption that the given amount of space * will be offered. Store the results in the tp structure. @@ -230,22 +245,10 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to a value enough for senders starting with - * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place - * a limit on the initial window when mss is larger than 1460. - */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = TCP_DEFAULT_INIT_RCVWND; - if (mss > 1460) - init_cwnd = - max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - /* when initializing use the value from init_rcv_wnd - * rather than the default from above - */ - if (init_rcv_wnd) - *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); - else - *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); + if (!init_rcv_wnd) /* Use default unless specified otherwise */ + init_rcv_wnd = tcp_default_init_rwnd(mss); + *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); } /* Set the clamp no higher than max representable value */ -- cgit From 817cee767523769cbc5ac94e439cde0c21752cbc Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Sun, 19 May 2013 14:23:57 +0300 Subject: mac80211: track AP's beacon rate and give it to the driver Track the AP's beacon rate in the scan BSS data and in the interface configuration to let the drivers know which rate the AP is using. This information may be used by drivers, in our case to let the firmware optimise beacon RX. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 8 +++++++- net/mac80211/scan.c | 9 +++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7a6f1a0207ec..a4dfb0be53d7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -94,6 +94,7 @@ struct ieee80211_bss { #define IEEE80211_MAX_SUPP_RATES 32 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; size_t supp_rates_len; + struct ieee80211_rate *beacon_rate; /* * During association, we save an ERP value from a probe response so diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ad9bb9e10cbb..87f2d4df31f8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1779,8 +1779,10 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, * probably just won't work at all. */ bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; + bss_conf->beacon_rate = bss->beacon_rate; bss_info_changed |= BSS_CHANGED_BEACON_INFO; } else { + bss_conf->beacon_rate = NULL; bss_conf->dtim_period = 0; } @@ -1903,6 +1905,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.chswitch_timer); sdata->vif.bss_conf.dtim_period = 0; + sdata->vif.bss_conf.beacon_rate = NULL; + ifmgd->have_beacon = false; ifmgd->flags = 0; @@ -2754,8 +2758,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, channel); - if (bss) + if (bss) { ieee80211_rx_bss_put(local, bss); + sdata->vif.bss_conf.beacon_rate = bss->beacon_rate; + } if (!sdata->u.mgd.associated || !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 99b103921a4b..1b122a79b0d8 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -140,6 +140,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_WMM; } + if (beacon) { + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[rx_status->band]; + if (!(rx_status->flag & RX_FLAG_HT) && + !(rx_status->flag & RX_FLAG_VHT)) + bss->beacon_rate = + &sband->bitrates[rx_status->rate_idx]; + } + return bss; } -- cgit From 38745c7414c0d9a0567c5b4a4e056c6b7f807179 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 10 Jun 2013 10:34:14 +0300 Subject: mac80211: Fix VHT bandwidth change event Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 171344d4eb7c..97c289414e32 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -396,7 +396,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, new_bw = ieee80211_sta_cur_vht_bw(sta); if (new_bw != sta->sta.bandwidth) { sta->sta.bandwidth = new_bw; - changed |= IEEE80211_RC_NSS_CHANGED; + changed |= IEEE80211_RC_BW_CHANGED; } change: -- cgit From 1095e69f47926db6f1350a9d6a38626521580e87 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 22 May 2013 11:36:17 +0200 Subject: NFC: NCI: Fix skb->dev usage skb->dev is used for carrying a net_device pointer and not an nci_dev pointer. Remove usage of skb-dev to carry nci_dev and replace it by parameter in nci_recv_frame(), nci_send_frame() and driver send() functions. NfcWilink driver is also updated to use those functions. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/core.c | 17 ++++++----------- net/nfc/nci/data.c | 2 -- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 48ada0ec749e..8e0dbbeee9e3 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -797,12 +797,11 @@ EXPORT_SYMBOL(nci_unregister_device); /** * nci_recv_frame - receive frame from NCI drivers * + * @ndev: The nci device * @skb: The sk_buff to receive */ -int nci_recv_frame(struct sk_buff *skb) +int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) { - struct nci_dev *ndev = (struct nci_dev *) skb->dev; - pr_debug("len %d\n", skb->len); if (!ndev || (!test_bit(NCI_UP, &ndev->flags) && @@ -819,10 +818,8 @@ int nci_recv_frame(struct sk_buff *skb) } EXPORT_SYMBOL(nci_recv_frame); -static int nci_send_frame(struct sk_buff *skb) +static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) { - struct nci_dev *ndev = (struct nci_dev *) skb->dev; - pr_debug("len %d\n", skb->len); if (!ndev) { @@ -833,7 +830,7 @@ static int nci_send_frame(struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); - return ndev->ops->send(skb); + return ndev->ops->send(ndev, skb); } /* Send NCI command */ @@ -861,8 +858,6 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) if (plen) memcpy(skb_put(skb, plen), payload, plen); - skb->dev = (void *) ndev; - skb_queue_tail(&ndev->cmd_q, skb); queue_work(ndev->cmd_wq, &ndev->cmd_work); @@ -894,7 +889,7 @@ static void nci_tx_work(struct work_struct *work) nci_conn_id(skb->data), nci_plen(skb->data)); - nci_send_frame(skb); + nci_send_frame(ndev, skb); mod_timer(&ndev->data_timer, jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT)); @@ -963,7 +958,7 @@ static void nci_cmd_work(struct work_struct *work) nci_opcode_oid(nci_opcode(skb->data)), nci_plen(skb->data)); - nci_send_frame(skb); + nci_send_frame(ndev, skb); mod_timer(&ndev->cmd_timer, jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 76c48c5324f8..2a9399dd6c68 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -80,8 +80,6 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT); nci_pbf_set((__u8 *)hdr, pbf); - - skb->dev = (void *) ndev; } static int nci_queue_tx_data_frags(struct nci_dev *ndev, -- cgit From 9674da8759df0d6c0d24e1ede6e2a1acdef91e3c Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 29 Apr 2013 17:13:27 +0200 Subject: NFC: Add firmware upload netlink command As several NFC chipsets can have their firmwares upgraded and reflashed, this patchset adds a new netlink command to trigger that the driver loads or flashes a new firmware. This will allows userspace triggered firmware upgrade through netlink. The firmware name or hint is passed as a parameter, and the driver will eventually fetch the firmware binary through the request_firmware API. The cmd can only be executed when the nfc dev is not in use. Actual firmware loading/flashing is an asynchronous operation. Result of the operation shall send a new event up to user space through the nfc dev multicast socket. During operation, the nfc dev is not openable and thus not usable. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 46 ++++++++++++++++++++++++++++++++++++++++ net/nfc/netlink.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 5 +++++ 3 files changed, 114 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 40d2527693da..eb3cecf1764e 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,6 +44,47 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); +int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) +{ + int rc = 0; + + pr_debug("%s do firmware %s\n", dev_name(&dev->dev), firmware_name); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (dev->dev_up) { + rc = -EBUSY; + goto error; + } + + if (!dev->ops->fw_upload) { + rc = -EOPNOTSUPP; + goto error; + } + + dev->fw_upload_in_progress = true; + rc = dev->ops->fw_upload(dev, firmware_name); + if (rc) + dev->fw_upload_in_progress = false; + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +{ + dev->fw_upload_in_progress = false; + + return nfc_genl_fw_upload_done(dev, firmware_name); +} +EXPORT_SYMBOL(nfc_fw_upload_done); + /** * nfc_dev_up - turn on the NFC device * @@ -69,6 +110,11 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } + if (dev->fw_upload_in_progress) { + rc = -EBUSY; + goto error; + } + if (dev->dev_up) { rc = -EALREADY; goto error; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f0c4d61f37c0..1deadad9a285 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -56,6 +56,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, + [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, + .len = NFC_FIRMWARE_NAME_MAXSIZE }, }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { @@ -1025,6 +1027,62 @@ exit: return rc; } +static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx; + char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], + sizeof(firmware_name)); + + rc = nfc_fw_upload(dev, firmware_name); + + nfc_put_device(dev); + return rc; +} + +int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_CMD_FW_UPLOAD); + if (!hdr) + goto free_msg; + + if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) || + nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -1084,6 +1142,11 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_llc_sdreq, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_FW_UPLOAD, + .doit = nfc_genl_fw_upload, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index afa1f84ba040..cf0c48165996 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -120,6 +120,11 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } +int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); + +int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); + int nfc_dev_up(struct nfc_dev *dev); int nfc_dev_down(struct nfc_dev *dev); -- cgit From 9a695d23aab889273821c91b4132f1ed315b251b Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 29 Apr 2013 17:47:42 +0200 Subject: NFC: HCI: Implement fw_upload ops This is a simple forward to the HCI driver. When driver is done with the operation, it shall directly notify NFC Core by calling nfc_fw_upload_done(). Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 91020b210d87..b7e4dac5654e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -779,6 +779,16 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) } } +static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->fw_upload) + return hdev->ops->fw_upload(hdev, firmware_name); + + return -ENOTSUPP; +} + static struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, @@ -791,6 +801,7 @@ static struct nfc_ops hci_nfc_ops = { .im_transceive = hci_transceive, .tm_send = hci_tm_send, .check_presence = hci_check_presence, + .fw_upload = hci_fw_upload, }; struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, -- cgit From a395298c9c96748cbd6acee4cb9a5ba13fbb3ab8 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 25 May 2013 01:21:21 +0200 Subject: NFC: HCI: Follow a positive code path in the HCI ops implementations Exiting on the error case is more typical to the kernel coding style. Signed-off-by: Samuel Ortiz --- net/nfc/hci/core.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index b7e4dac5654e..d2ef1e2ee0c6 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -570,21 +570,21 @@ static int hci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->dep_link_up) - return hdev->ops->dep_link_up(hdev, target, comm_mode, - gb, gb_len); + if (!hdev->ops->dep_link_up) + return 0; - return 0; + return hdev->ops->dep_link_up(hdev, target, comm_mode, + gb, gb_len); } static int hci_dep_link_down(struct nfc_dev *nfc_dev) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->dep_link_down) - return hdev->ops->dep_link_down(hdev); + if (!hdev->ops->dep_link_down) + return 0; - return 0; + return hdev->ops->dep_link_down(hdev); } static int hci_activate_target(struct nfc_dev *nfc_dev, @@ -673,12 +673,12 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->tm_send) - return hdev->ops->tm_send(hdev, skb); - - kfree_skb(skb); + if (!hdev->ops->tm_send) { + kfree_skb(skb); + return -ENOTSUPP; + } - return -ENOTSUPP; + return hdev->ops->tm_send(hdev, skb); } static int hci_check_presence(struct nfc_dev *nfc_dev, @@ -686,10 +686,10 @@ static int hci_check_presence(struct nfc_dev *nfc_dev, { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->check_presence) - return hdev->ops->check_presence(hdev, target); + if (!hdev->ops->check_presence) + return 0; - return 0; + return hdev->ops->check_presence(hdev, target); } static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) @@ -783,10 +783,10 @@ static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->fw_upload) - return hdev->ops->fw_upload(hdev, firmware_name); + if (!hdev->ops->fw_upload) + return -ENOTSUPP; - return -ENOTSUPP; + return hdev->ops->fw_upload(hdev, firmware_name); } static struct nfc_ops hci_nfc_ops = { -- cgit From 5f121b9a83b499a61ed44e5ba619c7de8f7271ad Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 13 Jun 2013 15:29:38 -0400 Subject: net-rps: fixes for rps flow limit Caught by sparse: - __rcu: missing annotation to sd->flow_limit - __user: direct access in cpumask_scnprintf Also - add endline character when printing bitmap if room in buffer - avoid bucket overflow by reducing FLOW_LIMIT_HISTORY The last item warrants some explanation. The hashtable buckets are subject to overflow if FLOW_LIMIT_HISTORY is larger than or equal to bucket size, since all packets may end up in a single bucket. The current (rather arbitrary) history value of 256 happens to match the buffer size (u8). As a result, with a single flow, the first 128 packets are accepted (correct), the second 128 packets dropped (correct) and then the history[] array has filled, so that each subsequent new packet causes an increment in the bucket for new_flow plus a decrement for old_flow: a steady state. This is fine if packets are dropped, as the steady state goes away as soon as a mix of traffic reappears. But, because the 256th packet overflowed the bucket to 0: no packets are dropped. Instead of explicitly adding an overflow check, this patch changes FLOW_LIMIT_HISTORY to never be able to overflow a single bucket. Reported-by: Fengguang Wu (first item) Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 637a42e5d589..78c746e016ae 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -132,6 +132,8 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, write_unlock: mutex_unlock(&flow_limit_update_mutex); } else { + char kbuf[128]; + if (*ppos || !*lenp) { *lenp = 0; goto done; @@ -146,9 +148,20 @@ write_unlock: } rcu_read_unlock(); - len = cpumask_scnprintf(buffer, *lenp, mask); - *lenp = len + 1; - *ppos += len + 1; + len = min(sizeof(kbuf) - 1, *lenp); + len = cpumask_scnprintf(kbuf, len, mask); + if (!len) { + *lenp = 0; + goto done; + } + if (len < *lenp) + kbuf[len++] = '\n'; + if (copy_to_user(buffer, kbuf, len)) { + ret = -EFAULT; + goto done; + } + *lenp = len; + *ppos += len; } done: -- cgit From ca4ec90b31d1ecf01087c607933cf792057bc8bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Jun 2013 07:58:30 -0700 Subject: htb: reorder struct htb_class fields for performance htb_class structures are big, and source of false sharing on SMP. By carefully splitting them in two parts, we can improve performance. I got 9 % performance increase on a 24 threads machine, with 200 concurrent netperf in TCP_RR mode, using a HTB hierarchy of 4 classes. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 62 ++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 1a3655a606c1..7954e73d118a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -76,23 +76,39 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; -/* interior & leaf nodes; props specific to leaves are marked L: */ +/* interior & leaf nodes; props specific to leaves are marked L: + * To reduce false sharing, place mostly read fields at beginning, + * and mostly written ones at the end. + */ struct htb_class { struct Qdisc_class_common common; - /* general class parameters */ - struct gnet_stats_basic_packed bstats; - struct gnet_stats_queue qstats; + struct psched_ratecfg rate; + struct psched_ratecfg ceil; + s64 buffer, cbuffer;/* token bucket depth/rate */ + s64 mbuffer; /* max wait time */ + int prio; /* these two are used only by leaves... */ + int quantum; /* but stored for parent-to-leaf return */ + + struct tcf_proto *filter_list; /* class attached filters */ + int filter_cnt; + int refcnt; /* usage count of this class */ + + int level; /* our level (see above) */ + unsigned int children; + struct htb_class *parent; /* parent class */ + struct gnet_stats_rate_est64 rate_est; - struct tc_htb_xstats xstats; /* our special stats */ - int refcnt; /* usage count of this class */ - /* topology */ - int level; /* our level (see above) */ - unsigned int children; - struct htb_class *parent; /* parent class */ + /* + * Written often fields + */ + struct gnet_stats_basic_packed bstats; + struct gnet_stats_queue qstats; + struct tc_htb_xstats xstats; /* our special stats */ - int prio; /* these two are used only by leaves... */ - int quantum; /* but stored for parent-to-leaf return */ + /* token bucket parameters */ + s64 tokens, ctokens;/* current number of tokens */ + s64 t_c; /* checkpoint time */ union { struct htb_class_leaf { @@ -111,24 +127,12 @@ struct htb_class { u32 last_ptr_id[TC_HTB_NUMPRIO]; } inner; } un; - struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ - struct rb_node pq_node; /* node for event queue */ - s64 pq_key; - - int prio_activity; /* for which prios are we active */ - enum htb_cmode cmode; /* current mode of the class */ + s64 pq_key; - /* class attached filters */ - struct tcf_proto *filter_list; - int filter_cnt; - - /* token bucket parameters */ - struct psched_ratecfg rate; - struct psched_ratecfg ceil; - s64 buffer, cbuffer; /* token bucket depth/rate */ - s64 mbuffer; /* max wait time */ - s64 tokens, ctokens; /* current number of tokens */ - s64 t_c; /* checkpoint time */ + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ + struct rb_node pq_node; /* node for event queue */ + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ }; struct htb_sched { -- cgit From 1d8faf48c74b8329a0322dc4b2a2030ae5003c86 Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 13 Jun 2013 13:19:10 +0300 Subject: net/core: Add VF link state control Add netlink directives and ndo entry to allow for controling VF link, which can be in one of three states: Auto - VF link state reflects the PF link state (default) Up - VF link state is up, traffic from VF to VF works even if the actual PF link is down Down - VF link state is down, no traffic from/to this VF, can be of use while configuring the VF Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 49c14451d8ab..9007533867f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -947,6 +947,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_vlan vf_vlan; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; + struct ifla_vf_link_state vf_linkstate; /* * Not all SR-IOV capable drivers support the @@ -956,18 +957,24 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); + /* The default value for VF link state is "auto" + * IFLA_VF_LINK_STATE_AUTO which equals zero + */ + ivi.linkstate = 0; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = - vf_spoofchk.vf = ivi.vf; + vf_spoofchk.vf = + vf_linkstate.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_tx_rate.rate = ivi.tx_rate; vf_spoofchk.setting = ivi.spoofchk; + vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -978,7 +985,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), - &vf_spoofchk)) + &vf_spoofchk) || + nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), + &vf_linkstate)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1238,6 +1247,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivs->setting); break; } + case IFLA_VF_LINK_STATE: { + struct ifla_vf_link_state *ivl; + ivl = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + break; + } default: err = -EINVAL; break; -- cgit From 8a00a61b0ef2bfd1b468dd20c0d0b1a94a8f7475 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 29 May 2013 15:35:02 +0200 Subject: NFC: Add basic NCI over SPI The NFC Forum defines a transport interface based on Serial Peripheral Interface (SPI) for the NFC Controller Interface (NCI). This module implements the SPI transport of NCI, calling SPI module directly to read/write data to NFC controller (NFCC). NFCC driver should provide functions performing device open and close. It should also provide functions asserting/de-asserting interruption to prevent TX/RX race conditions. NFCC driver can also fix a delay between transactions if needed by the hardware. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/Kconfig | 10 ++++ net/nfc/nci/Makefile | 4 +- net/nfc/nci/spi.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 net/nfc/nci/spi.c (limited to 'net') diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index 6d69b5f0f19b..2a2416080b4f 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -8,3 +8,13 @@ config NFC_NCI Say Y here to compile NCI support into the kernel or say M to compile it as module (nci). + +config NFC_NCI_SPI + depends on NFC_NCI && SPI + bool "NCI over SPI protocol support" + default n + help + NCI (NFC Controller Interface) is a communication protocol between + an NFC Controller (NFCC) and a Device Host (DH). + + Say yes if you use an NCI driver that requires SPI link layer. diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index cdb3a2e44471..7aeedc43187d 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o -nci-objs := core.o data.o lib.o ntf.o rsp.o \ No newline at end of file +nci-objs := core.o data.o lib.o ntf.o rsp.o + +nci-$(CONFIG_NFC_NCI_SPI) += spi.o diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c new file mode 100644 index 000000000000..ebcdba51418c --- /dev/null +++ b/net/nfc/nci/spi.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2013 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__ + +#include +#include +#include +#include + +#define NCI_SPI_HDR_LEN 4 +#define NCI_SPI_CRC_LEN 2 + +static int nci_spi_open(struct nci_dev *nci_dev) +{ + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + + return ndev->ops->open(ndev); +} + +static int nci_spi_close(struct nci_dev *nci_dev) +{ + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + + return ndev->ops->close(ndev); +} + +static int nci_spi_send(struct nci_dev *nci_dev, struct sk_buff *skb) +{ + return 0; +} + +static struct nci_ops nci_spi_ops = { + .open = nci_spi_open, + .close = nci_spi_close, + .send = nci_spi_send, +}; + +/* ---- Interface to NCI SPI drivers ---- */ + +/** + * nci_spi_allocate_device - allocate a new nci spi device + * + * @spi: SPI device + * @ops: device operations + * @supported_protocols: NFC protocols supported by the device + * @supported_se: NFC Secure Elements supported by the device + * @acknowledge_mode: Acknowledge mode used by the device + * @delay: delay between transactions in us + */ +struct nci_spi_dev *nci_spi_allocate_device(struct spi_device *spi, + struct nci_spi_ops *ops, + u32 supported_protocols, + u32 supported_se, + u8 acknowledge_mode, + unsigned int delay) +{ + struct nci_spi_dev *ndev; + int tailroom = 0; + + if (!ops->open || !ops->close || !ops->assert_int || !ops->deassert_int) + return NULL; + + if (!supported_protocols) + return NULL; + + ndev = devm_kzalloc(&spi->dev, sizeof(struct nci_dev), GFP_KERNEL); + if (!ndev) + return NULL; + + ndev->ops = ops; + ndev->acknowledge_mode = acknowledge_mode; + ndev->xfer_udelay = delay; + + if (acknowledge_mode == NCI_SPI_CRC_ENABLED) + tailroom += NCI_SPI_CRC_LEN; + + ndev->nci_dev = nci_allocate_device(&nci_spi_ops, supported_protocols, + supported_se, NCI_SPI_HDR_LEN, + tailroom); + if (!ndev->nci_dev) + return NULL; + + nci_set_drvdata(ndev->nci_dev, ndev); + + return ndev; +} +EXPORT_SYMBOL_GPL(nci_spi_allocate_device); + +/** + * nci_spi_free_device - deallocate nci spi device + * + * @ndev: The nci spi device to deallocate + */ +void nci_spi_free_device(struct nci_spi_dev *ndev) +{ + nci_free_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_free_device); + +/** + * nci_spi_register_device - register a nci spi device in the nfc subsystem + * + * @pdev: The nci spi device to register + */ +int nci_spi_register_device(struct nci_spi_dev *ndev) +{ + return nci_register_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_register_device); + +/** + * nci_spi_unregister_device - unregister a nci spi device in the nfc subsystem + * + * @dev: The nci spi device to unregister + */ +void nci_spi_unregister_device(struct nci_spi_dev *ndev) +{ + nci_unregister_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_unregister_device); -- cgit From ee9596d467e4d05c77a8c883aeeb5b74d1a3cd31 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 29 May 2013 15:35:03 +0200 Subject: NFC: Add NCI over SPI send Before any operation, driver interruption is de-asserted to prevent race condition between TX and RX. The NCI over SPI header is added in front of NCI packet. If acknowledged mode is set, CRC-16-CCITT is added to the packet. Then the packet is forwarded to SPI module to be sent. A delay after the transaction is added. This delay is determined by the driver during nci_spi_allocate_device() call and can be 0. After data has been sent, driver interruption is re-asserted. If acknowledged mode is set, nci_spi_send will block until acknowledgment is received. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/spi.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index ebcdba51418c..6258461e6998 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -20,12 +20,25 @@ #include #include +#include #include #include #define NCI_SPI_HDR_LEN 4 #define NCI_SPI_CRC_LEN 2 +#define NCI_SPI_SEND_TIMEOUT (NCI_CMD_TIMEOUT > NCI_DATA_TIMEOUT ? \ + NCI_CMD_TIMEOUT : NCI_DATA_TIMEOUT) + +#define NCI_SPI_DIRECT_WRITE 0x01 +#define NCI_SPI_DIRECT_READ 0x02 + +#define ACKNOWLEDGE_NONE 0 +#define ACKNOWLEDGE_ACK 1 +#define ACKNOWLEDGE_NACK 2 + +#define CRC_INIT 0xFFFF + static int nci_spi_open(struct nci_dev *nci_dev) { struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); @@ -40,9 +53,65 @@ static int nci_spi_close(struct nci_dev *nci_dev) return ndev->ops->close(ndev); } +static int __nci_spi_send(struct nci_spi_dev *ndev, struct sk_buff *skb) +{ + struct spi_message m; + struct spi_transfer t; + + t.tx_buf = skb->data; + t.len = skb->len; + t.cs_change = 0; + t.delay_usecs = ndev->xfer_udelay; + + spi_message_init(&m); + spi_message_add_tail(&t, &m); + + return spi_sync(ndev->spi, &m); +} + static int nci_spi_send(struct nci_dev *nci_dev, struct sk_buff *skb) { - return 0; + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + unsigned int payload_len = skb->len; + unsigned char *hdr; + int ret; + long completion_rc; + + ndev->ops->deassert_int(ndev); + + /* add the NCI SPI header to the start of the buffer */ + hdr = skb_push(skb, NCI_SPI_HDR_LEN); + hdr[0] = NCI_SPI_DIRECT_WRITE; + hdr[1] = ndev->acknowledge_mode; + hdr[2] = payload_len >> 8; + hdr[3] = payload_len & 0xFF; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + u16 crc; + + crc = crc_ccitt(CRC_INIT, skb->data, skb->len); + *skb_put(skb, 1) = crc >> 8; + *skb_put(skb, 1) = crc & 0xFF; + } + + ret = __nci_spi_send(ndev, skb); + + kfree_skb(skb); + ndev->ops->assert_int(ndev); + + if (ret != 0 || ndev->acknowledge_mode == NCI_SPI_CRC_DISABLED) + goto done; + + init_completion(&ndev->req_completion); + completion_rc = + wait_for_completion_interruptible_timeout(&ndev->req_completion, + NCI_SPI_SEND_TIMEOUT); + + if (completion_rc <= 0 || ndev->req_result == ACKNOWLEDGE_NACK) + ret = -EIO; + +done: + return ret; } static struct nci_ops nci_spi_ops = { -- cgit From 391d8a2da787257aeaf952c974405b53926e3fb3 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 29 May 2013 15:35:04 +0200 Subject: NFC: Add NCI over SPI receive Before any operation, driver interruption is de-asserted to prevent race condition between TX and RX. Transaction starts by emitting "Direct read" and acknowledged mode bytes. Then packet length is read allowing to allocate correct NCI socket buffer. After that payload is retrieved. A delay after the transaction can be added. This delay is determined by the driver during nci_spi_allocate_device() call and can be 0. If acknowledged mode is set: - CRC of header and payload is checked - if frame reception fails (CRC error): NACK is sent - if received frame has ACK or NACK flag: unblock nci_spi_send() Payload is passed to NCI module. At the end, driver interruption is re asserted. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/spi.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) (limited to 'net') diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 6258461e6998..70afc387a965 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -26,6 +26,8 @@ #define NCI_SPI_HDR_LEN 4 #define NCI_SPI_CRC_LEN 2 +#define NCI_SPI_ACK_SHIFT 6 +#define NCI_SPI_MSB_PAYLOAD_MASK 0x3F #define NCI_SPI_SEND_TIMEOUT (NCI_CMD_TIMEOUT > NCI_DATA_TIMEOUT ? \ NCI_CMD_TIMEOUT : NCI_DATA_TIMEOUT) @@ -203,3 +205,175 @@ void nci_spi_unregister_device(struct nci_spi_dev *ndev) nci_unregister_device(ndev->nci_dev); } EXPORT_SYMBOL_GPL(nci_spi_unregister_device); + +static int send_acknowledge(struct nci_spi_dev *ndev, u8 acknowledge) +{ + struct sk_buff *skb; + unsigned char *hdr; + u16 crc; + int ret; + + skb = nci_skb_alloc(ndev->nci_dev, 0, GFP_KERNEL); + + /* add the NCI SPI header to the start of the buffer */ + hdr = skb_push(skb, NCI_SPI_HDR_LEN); + hdr[0] = NCI_SPI_DIRECT_WRITE; + hdr[1] = NCI_SPI_CRC_ENABLED; + hdr[2] = acknowledge << NCI_SPI_ACK_SHIFT; + hdr[3] = 0; + + crc = crc_ccitt(CRC_INIT, skb->data, skb->len); + *skb_put(skb, 1) = crc >> 8; + *skb_put(skb, 1) = crc & 0xFF; + + ret = __nci_spi_send(ndev, skb); + + kfree_skb(skb); + + return ret; +} + +static struct sk_buff *__nci_spi_recv_frame(struct nci_spi_dev *ndev) +{ + struct sk_buff *skb; + struct spi_message m; + unsigned char req[2], resp_hdr[2]; + struct spi_transfer tx, rx; + unsigned short rx_len = 0; + int ret; + + spi_message_init(&m); + req[0] = NCI_SPI_DIRECT_READ; + req[1] = ndev->acknowledge_mode; + tx.tx_buf = req; + tx.len = 2; + tx.cs_change = 0; + spi_message_add_tail(&tx, &m); + rx.rx_buf = resp_hdr; + rx.len = 2; + rx.cs_change = 1; + spi_message_add_tail(&rx, &m); + ret = spi_sync(ndev->spi, &m); + + if (ret) + return NULL; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) + rx_len = ((resp_hdr[0] & NCI_SPI_MSB_PAYLOAD_MASK) << 8) + + resp_hdr[1] + NCI_SPI_CRC_LEN; + else + rx_len = (resp_hdr[0] << 8) | resp_hdr[1]; + + skb = nci_skb_alloc(ndev->nci_dev, rx_len, GFP_KERNEL); + if (!skb) + return NULL; + + spi_message_init(&m); + rx.rx_buf = skb_put(skb, rx_len); + rx.len = rx_len; + rx.cs_change = 0; + rx.delay_usecs = ndev->xfer_udelay; + spi_message_add_tail(&rx, &m); + ret = spi_sync(ndev->spi, &m); + + if (ret) + goto receive_error; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + *skb_push(skb, 1) = resp_hdr[1]; + *skb_push(skb, 1) = resp_hdr[0]; + } + + return skb; + +receive_error: + kfree_skb(skb); + + return NULL; +} + +static int nci_spi_check_crc(struct sk_buff *skb) +{ + u16 crc_data = (skb->data[skb->len - 2] << 8) | + skb->data[skb->len - 1]; + int ret; + + ret = (crc_ccitt(CRC_INIT, skb->data, skb->len - NCI_SPI_CRC_LEN) + == crc_data); + + skb_trim(skb, skb->len - NCI_SPI_CRC_LEN); + + return ret; +} + +static u8 nci_spi_get_ack(struct sk_buff *skb) +{ + u8 ret; + + ret = skb->data[0] >> NCI_SPI_ACK_SHIFT; + + /* Remove NFCC part of the header: ACK, NACK and MSB payload len */ + skb_pull(skb, 2); + + return ret; +} + +/** + * nci_spi_recv_frame - receive frame from NCI SPI drivers + * + * @ndev: The nci spi device + * Context: can sleep + * + * This call may only be used from a context that may sleep. The sleep + * is non-interruptible, and has no timeout. + * + * It returns zero on success, else a negative error code. + */ +int nci_spi_recv_frame(struct nci_spi_dev *ndev) +{ + struct sk_buff *skb; + int ret = 0; + + ndev->ops->deassert_int(ndev); + + /* Retrieve frame from SPI */ + skb = __nci_spi_recv_frame(ndev); + if (!skb) { + ret = -EIO; + goto done; + } + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + if (!nci_spi_check_crc(skb)) { + send_acknowledge(ndev, ACKNOWLEDGE_NACK); + goto done; + } + + /* In case of acknowledged mode: if ACK or NACK received, + * unblock completion of latest frame sent. + */ + ndev->req_result = nci_spi_get_ack(skb); + if (ndev->req_result) + complete(&ndev->req_completion); + } + + /* If there is no payload (ACK/NACK only frame), + * free the socket buffer + */ + if (skb->len == 0) { + kfree_skb(skb); + goto done; + } + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) + send_acknowledge(ndev, ACKNOWLEDGE_ACK); + + /* Forward skb to NCI core layer */ + ret = nci_recv_frame(ndev->nci_dev, skb); + +done: + ndev->ops->assert_int(ndev); + + return ret; +} +EXPORT_SYMBOL_GPL(nci_spi_recv_frame); -- cgit From 0b456c418a5595b9d67f300c9ac6a2441e774603 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 7 May 2013 19:22:11 +0200 Subject: NFC: Remove the static supported_se field Supported secure elements are typically found during a discovery process initiated when the NFC controller is up and running. For a given NFC chipset there can be many configurations (embedded SE or not, with or without a SIM card wired to the NFC controller SWP interface, etc...) and thus driver code will never know before hand which SEs are available. So we remove this field, it will be replaced by a real SE discovery mechanism. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 2 -- net/nfc/hci/core.c | 3 +-- net/nfc/nci/core.c | 2 -- net/nfc/nci/spi.c | 3 +-- net/nfc/netlink.c | 1 - 5 files changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index eb3cecf1764e..334954a1d6e8 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -832,7 +832,6 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, - u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -850,7 +849,6 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; - dev->supported_se = supported_se; dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d2ef1e2ee0c6..9c8a63d341d3 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -808,7 +808,6 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, - u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -834,7 +833,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 8e0dbbeee9e3..145bad15e113 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,7 +658,6 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, - __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -681,7 +680,6 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, - supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 70afc387a965..c7cf37ba7298 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -162,8 +162,7 @@ struct nci_spi_dev *nci_spi_allocate_device(struct spi_device *spi, tailroom += NCI_SPI_CRC_LEN; ndev->nci_dev = nci_allocate_device(&nci_spi_ops, supported_protocols, - supported_se, NCI_SPI_HDR_LEN, - tailroom); + NCI_SPI_HDR_LEN, tailroom); if (!ndev->nci_dev) return NULL; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 1deadad9a285..fdbc662c564a 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -444,7 +444,6 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || - nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; -- cgit From 0a946301c2d3eac8673e556df820c0b6023ac6c3 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 11:57:06 +0200 Subject: NFC: Extend and fix the internal secure element API Secure elements need to be discovered after enabling the NFC controller. This is typically done by the NCI core and the HCI drivers (HCI does not specify how to discover SEs, it is left to the specific drivers). Also, the SE enable/disable API explicitely takes a SE index as its argument. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 7 +++++++ net/nfc/hci/core.c | 33 +++++++++++++++++++++++++++++++++ net/nfc/nci/core.c | 18 ++++++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 334954a1d6e8..a43a56d7f4be 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -126,6 +126,13 @@ int nfc_dev_up(struct nfc_dev *dev) if (!rc) dev->dev_up = true; + /* We have to enable the device before discovering SEs */ + if (dev->ops->discover_se) { + rc = dev->ops->discover_se(dev); + if (!rc) + pr_warn("SE discovery failed\n"); + } + error: device_unlock(&dev->dev); return rc; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 9c8a63d341d3..7b1c186736eb 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -692,6 +692,36 @@ static int hci_check_presence(struct nfc_dev *nfc_dev, return hdev->ops->check_presence(hdev, target); } +static int hci_discover_se(struct nfc_dev *nfc_dev) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->discover_se) + return hdev->ops->discover_se(hdev); + + return 0; +} + +static int hci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->enable_se) + return hdev->ops->enable_se(hdev, se_idx); + + return 0; +} + +static int hci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->disable_se) + return hdev->ops->enable_se(hdev, se_idx); + + return 0; +} + static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) { mutex_lock(&hdev->msg_tx_mutex); @@ -802,6 +832,9 @@ static struct nfc_ops hci_nfc_ops = { .tm_send = hci_tm_send, .check_presence = hci_check_presence, .fw_upload = hci_fw_upload, + .discover_se = hci_discover_se, + .enable_se = hci_enable_se, + .disable_se = hci_disable_se, }; struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 145bad15e113..b943d46a1644 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -636,6 +636,21 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, return rc; } +static int nci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + return 0; +} + +static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + return 0; +} + +static int nci_discover_se(struct nfc_dev *nfc_dev) +{ + return 0; +} + static struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, @@ -646,6 +661,9 @@ static struct nfc_ops nci_nfc_ops = { .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, .im_transceive = nci_transceive, + .enable_se = nci_enable_se, + .disable_se = nci_disable_se, + .discover_se = nci_discover_se, }; /* ---- Interface to NCI drivers ---- */ -- cgit From fed7c25ec0d4894edfc36bbe5c5231e52f45483a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:28:38 +0200 Subject: NFC: Add secure elements addition and removal API This API will allow NFC drivers to add and remove the secure elements they know about or detect. Typically this should be called (asynchronously or not) from the driver or the host interface stack detect_se hook. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index a43a56d7f4be..dacadfbcacea 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -760,6 +760,49 @@ inline void nfc_driver_failure(struct nfc_dev *dev, int err) } EXPORT_SYMBOL(nfc_driver_failure); +int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct nfc_se *se, *n; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) + return -EALREADY; + + se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL); + if (!se) + return -ENOMEM; + + se->idx = se_idx; + se->type = type; + se->state = NFC_SE_DISABLED; + INIT_LIST_HEAD(&se->list); + + list_add(&se->list, &dev->secure_elements); + + return 0; +} +EXPORT_SYMBOL(nfc_add_se); + +int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) +{ + struct nfc_se *se, *n; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) { + list_del(&se->list); + kfree(se); + + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL(nfc_remove_se); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); @@ -856,9 +899,9 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; - dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; + INIT_LIST_HEAD(&dev->secure_elements); nfc_genl_data_init(&dev->genl_data); -- cgit From 2757c3723c3d2b13e3a8bfaa034826f64e9cca43 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:47:37 +0200 Subject: NFC: Send netlink events for secure elements additions and removals When an NFC driver or host controller stack discovers a secure element, it will call nfc_add_se(). In order for userspace applications to use these secure elements, a netlink event will then be sent with the SE index and its type. With that information userspace applications can decide wether or not to enable SEs, through their indexes. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 14 +++++++++++++ net/nfc/netlink.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 3 +++ 3 files changed, 80 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index dacadfbcacea..bb5f16cfc201 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -763,6 +763,7 @@ EXPORT_SYMBOL(nfc_driver_failure); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) { struct nfc_se *se, *n; + int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); @@ -781,6 +782,14 @@ int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) list_add(&se->list, &dev->secure_elements); + rc = nfc_genl_se_added(dev, se_idx, type); + if (rc < 0) { + list_del(&se->list); + kfree(se); + + return rc; + } + return 0; } EXPORT_SYMBOL(nfc_add_se); @@ -788,11 +797,16 @@ EXPORT_SYMBOL(nfc_add_se); int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) { struct nfc_se *se, *n; + int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); list_for_each_entry_safe(se, n, &dev->secure_elements, list) if (se->idx == se_idx) { + rc = nfc_genl_se_removed(dev, se_idx); + if (rc < 0) + return rc; + list_del(&se->list); kfree(se); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index fdbc662c564a..8a11a3a27e69 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -426,6 +426,69 @@ free_msg: return rc; } +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_ADDED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_REMOVED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index cf0c48165996..a6aeee094aa4 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -94,6 +94,9 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev); int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) -- cgit From ee656e9d0993144f4e4ad261aefeeaab9554cd3f Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:53:29 +0200 Subject: NFC: Remove and free all SEs when releasing an NFC device Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index bb5f16cfc201..5b60b9ddfc8f 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -820,11 +820,19 @@ EXPORT_SYMBOL(nfc_remove_se); static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); + struct nfc_se *se, *n; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); nfc_genl_data_exit(&dev->genl_data); kfree(dev->targets); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) { + nfc_genl_se_removed(dev, se->idx); + list_del(&se->list); + kfree(se); + } + kfree(dev); } -- cgit From c531c9ec2969860c98a8a47f501c4874278388d3 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 16:15:32 +0200 Subject: NFC: Add secure element enablement internal API Called via netlink, this API will enable or disable a specific secure element. When a secure element is enabled, it will handle card emulation and more generically ISO-DEP target mode, i.e. all target mode cases except for p2p target mode. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- net/nfc/nfc.h | 3 ++ 2 files changed, 109 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 5b60b9ddfc8f..dc96a83aa6ab 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -528,6 +528,108 @@ error: return rc; } +static struct nfc_se *find_se(struct nfc_dev *dev, u32 se_idx) +{ + struct nfc_se *se, *n; + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) + return se; + + return NULL; +} + +int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) +{ + + struct nfc_se *se; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (!dev->dev_up) { + rc = -ENODEV; + goto error; + } + + if (dev->polling) { + rc = -EBUSY; + goto error; + } + + if (!dev->ops->enable_se || !dev->ops->disable_se) { + rc = -EOPNOTSUPP; + goto error; + } + + se = find_se(dev, se_idx); + if (!se) { + rc = -EINVAL; + goto error; + } + + if (se->type == NFC_SE_ENABLED) { + rc = -EALREADY; + goto error; + } + + rc = dev->ops->enable_se(dev, se_idx); + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) +{ + + struct nfc_se *se; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (!dev->dev_up) { + rc = -ENODEV; + goto error; + } + + if (!dev->ops->enable_se || !dev->ops->disable_se) { + rc = -EOPNOTSUPP; + goto error; + } + + se = find_se(dev, se_idx); + if (!se) { + rc = -EINVAL; + goto error; + } + + if (se->type == NFC_SE_DISABLED) { + rc = -EALREADY; + goto error; + } + + rc = dev->ops->disable_se(dev, se_idx); + +error: + device_unlock(&dev->dev); + return rc; +} + int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); @@ -762,14 +864,14 @@ EXPORT_SYMBOL(nfc_driver_failure); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) { - struct nfc_se *se, *n; + struct nfc_se *se; int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); - list_for_each_entry_safe(se, n, &dev->secure_elements, list) - if (se->idx == se_idx) - return -EALREADY; + se = find_se(dev, se_idx); + if (se) + return -EALREADY; se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL); if (!se) diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index a6aeee094aa4..ee85a1fc1b24 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -147,4 +147,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx); int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); +int nfc_enable_se(struct nfc_dev *dev, u32 se_idx); +int nfc_disable_se(struct nfc_dev *dev, u32 se_idx); + #endif /* __LOCAL_NFC_H */ -- cgit From be0856535c64697685af47c5bf2be9f36ab5ca08 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 17:07:32 +0200 Subject: NFC: Add secure element enablement netlink API Enabling or disabling an NFC accessible secure element through netlink requires giving both an NFC controller and a secure element indexes. Once enabled the secure element will handle card emulation once polling starts. Signed-off-by: Samuel Ortiz --- net/nfc/netlink.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'net') diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 8a11a3a27e69..b05ad909778f 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1145,6 +1145,52 @@ free_msg: return -EMSGSIZE; } +static int nfc_genl_enable_se(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx, se_idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_enable_se(dev, se_idx); + + nfc_put_device(dev); + return rc; +} + +static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx, se_idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_disable_se(dev, se_idx); + + nfc_put_device(dev); + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -1209,6 +1255,16 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_fw_upload, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_ENABLE_SE, + .doit = nfc_genl_enable_se, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_DISABLE_SE, + .doit = nfc_genl_disable_se, + .policy = nfc_genl_policy, + }, }; -- cgit From 58e3dd1558f56e95e7077a63340bb33e7aa42946 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 4 Jun 2013 11:34:50 +0200 Subject: NFC: Rename nfc_llcp_disconnect() to nfc_llcp_send_disconnect() nfc_llcp_send_disconnect() already exists but is not used. nfc_llcp_disconnect() naming is not consistent with other PDU sending functions. This patch removes nfc_llcp_send_disconnect() and renames nfc_llcp_disconnect() Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp.h | 1 - net/nfc/llcp_commands.c | 22 +--------------------- net/nfc/llcp_sock.c | 4 ++-- 3 files changed, 3 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index ff8c434f7df8..ac16ebe3069d 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -246,7 +246,6 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); -int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); int nfc_llcp_send_symm(struct nfc_dev *dev); int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index c1b23eef83ca..1017894807c0 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -339,7 +339,7 @@ static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock, return skb; } -int nfc_llcp_disconnect(struct nfc_llcp_sock *sock) +int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) { struct sk_buff *skb; struct nfc_dev *dev; @@ -630,26 +630,6 @@ int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason) return 0; } -int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) -{ - struct sk_buff *skb; - struct nfc_llcp_local *local; - - pr_debug("Send DISC\n"); - - local = sock->local; - if (local == NULL) - return -ENODEV; - - skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0); - if (skb == NULL) - return -ENOMEM; - - skb_queue_head(&local->tx_queue, skb); - - return 0; -} - int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, struct msghdr *msg, size_t len) { diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 380253eccb74..03fd3162cee5 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -603,7 +603,7 @@ static int llcp_sock_release(struct socket *sock) /* Send a DISC */ if (sk->sk_state == LLCP_CONNECTED) - nfc_llcp_disconnect(llcp_sock); + nfc_llcp_send_disconnect(llcp_sock); if (sk->sk_state == LLCP_LISTEN) { struct nfc_llcp_sock *lsk, *n; @@ -614,7 +614,7 @@ static int llcp_sock_release(struct socket *sock) accept_sk = &lsk->sk; lock_sock(accept_sk); - nfc_llcp_disconnect(lsk); + nfc_llcp_send_disconnect(lsk); nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); -- cgit From 17f7ae16aef1f58bc4af4c7a16b8778a91a30255 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 4 Jun 2013 11:34:51 +0200 Subject: NFC: Keep socket alive until the DISC PDU is actually sent This patch keeps the socket alive and therefore does not remove it from the sockets list in the local until the DISC PDU has been actually sent. Otherwise we would reply with DM PDUs before sending the DISC one. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp.h | 1 + net/nfc/llcp_core.c | 7 +++++++ net/nfc/llcp_sock.c | 7 +++++++ 3 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index ac16ebe3069d..71f649e5ef49 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -19,6 +19,7 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_DISCONNECTING, LLCP_CLOSED, LLCP_BOUND, LLCP_LISTEN, diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 158bdbf668cc..1c4c048e0a1b 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -730,6 +730,13 @@ static void nfc_llcp_tx_work(struct work_struct *work) DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + if (ptype == LLCP_PDU_DISC && sk != NULL && + sk->sk_state == LLCP_DISCONNECTING) { + nfc_llcp_sock_unlink(&local->sockets, sk); + sock_orphan(sk); + sock_put(sk); + } + if (ptype == LLCP_PDU_I) copy_skb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 03fd3162cee5..47e7acfc0236 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -626,6 +626,13 @@ static int llcp_sock_release(struct socket *sock) release_sock(sk); + /* Keep this sock alive and therefore do not remove it from the sockets + * list until the DISC PDU has been actually sent. Otherwise we would + * reply with DM PDUs before sending the DISC one. + */ + if (sk->sk_state == LLCP_DISCONNECTING) + return err; + if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); else -- cgit From f1b79dc8915ebf176d6f1fcfc4fee001b6d5ca46 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 5 Jun 2013 17:15:59 +0200 Subject: NFC: Fix a potential memory leak In nfc_llcp_tx_work() the sk_buff is not freed when the llcp_sock is null and the PDU is an I one. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 1c4c048e0a1b..44730f0edfd8 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -719,6 +719,7 @@ static void nfc_llcp_tx_work(struct work_struct *work) llcp_sock = nfc_llcp_sock(sk); if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) { + kfree_skb(skb); nfc_llcp_send_symm(local->dev); } else { struct sk_buff *copy_skb = NULL; -- cgit From b4011239a08e7e6c2c6e970dfa9e8ecb73139261 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 3 May 2013 18:29:30 +0200 Subject: NFC: llcp: Fix non blocking sockets connections Without the new LLCP_CONNECTING state, non blocking sockets will be woken up with a POLLHUP right after calling connect() because their state is stuck at LLCP_CLOSED. That prevents userspace from implementing any proper non blocking socket based NFC p2p client. Cc: stable@vger.kernel.org Signed-off-by: Samuel Ortiz --- net/nfc/llcp.h | 1 + net/nfc/llcp_sock.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index 71f649e5ef49..f4d48b57ea11 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -19,6 +19,7 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_CONNECTING, LLCP_DISCONNECTING, LLCP_CLOSED, LLCP_BOUND, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 47e7acfc0236..d308402b67d8 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -571,7 +571,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (sock_writeable(sk)) + if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -729,14 +729,16 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, if (ret) goto sock_unlink; + sk->sk_state = LLCP_CONNECTING; + ret = sock_wait_state(sk, LLCP_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); - if (ret) + if (ret && ret != -EINPROGRESS) goto sock_unlink; release_sock(sk); - return 0; + return ret; sock_unlink: nfc_llcp_put_ssap(local, llcp_sock->ssap); -- cgit From 2635a4bdfa8d513c531fa7d7a0ccafc1d6a9ff85 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 28 May 2013 15:03:17 +0200 Subject: NFC: llcp: Do not send pending Tx frames when the remote is not ready When we receive a RNR, the remote is busy processing the last received frame. We set a local flag for that, and we should send a SYMM when it is set instead of sending any pending frame. Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 44730f0edfd8..47746a088f8f 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -721,6 +721,9 @@ static void nfc_llcp_tx_work(struct work_struct *work) if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) { kfree_skb(skb); nfc_llcp_send_symm(local->dev); + } else if (llcp_sock && !llcp_sock->remote_ready) { + skb_queue_head(&local->tx_queue, skb); + nfc_llcp_send_symm(local->dev); } else { struct sk_buff *copy_skb = NULL; u8 ptype = nfc_llcp_ptype(skb); -- cgit From f768b34017cbe6e7690686514f682f076bb1f477 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 28 May 2013 15:41:32 +0200 Subject: NFC: llcp: Set the LLC Link Management well known service bit In order to advertise our LLCP support properly and to follow the LLCP specs requirements, we need to initialize the WKS (Well-Known Services) bitfield to 1 as SAP 0 is the only mandatory supported service. Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 47746a088f8f..d45bcbbc9f78 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1590,6 +1590,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) local->lto = 150; /* 1500 ms */ local->rw = LLCP_MAX_RW; local->miux = cpu_to_be16(LLCP_MAX_MIUX); + local->local_wks = 0x1; /* LLC Link Management */ nfc_llcp_build_gb(local); -- cgit From 4ca546e5545b7345b69e9331ecd53a1e4c6f7fe1 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 3 Jun 2013 12:10:04 +0200 Subject: NFC: llcp: Fix the well known services endianness The WKS (Well Known Services) bitmask should be transmitted in big endian order. Picky implementations will refuse to establish an LLCP link when the WKS bit 0 is not set to 1. The vast majority of implementations out there are not that picky though... Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index d45bcbbc9f78..81cd3416c7d4 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -537,6 +537,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) u8 *lto_tlv, lto_length; u8 *wks_tlv, wks_length; u8 *miux_tlv, miux_length; + __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; int ret = 0; @@ -549,8 +550,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); - wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&local->local_wks, 2, - &wks_length); + wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); gb_len += wks_length; miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, -- cgit From f44f340883388b57fe03edfb0982e038e57a992c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 13 May 2013 08:15:26 -0700 Subject: openvswitch: Immediately exit on error in ovs_vport_cmd_set(). It is an error to try to change the type of a vport using the set command. However, while we check that this is an error, we still proceed to allocate memory which then gets freed immediately. This stops processing after noticing the error, which does not actually fix a bug but is more correct. Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d12d6b8b5e8b..748aa97cbfb2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1812,10 +1812,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - err = 0; if (a[OVS_VPORT_ATTR_TYPE] && - nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) + nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; + goto exit_unlock; + } reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!reply) { @@ -1823,10 +1824,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - if (!err && a[OVS_VPORT_ATTR_OPTIONS]) + if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); - if (err) - goto exit_free; + if (err) + goto exit_free; + } if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); -- cgit From cbd531bebb02bc6c0fc3619a2cfc32f7d8843b18 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 13 May 2013 08:16:29 -0700 Subject: openvswitch: Remove unused get_config vport op. The get_config vport op is left over from old compatibility code, it is neither used nor implemented any more. Signed-off-by: Jesse Gross --- net/openvswitch/vport-netdev.h | 1 - net/openvswitch/vport.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index a3cb3a32cd77..dd298b5c5cdb 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,6 +39,5 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); -const char *ovs_netdev_get_config(const struct vport *); #endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 68a377bc0841..26c594b1a470 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -123,8 +123,6 @@ struct vport_parms { * existing vport to a &struct sk_buff. May be %NULL for a vport that does not * have any configuration. * @get_name: Get the device's name. - * @get_config: Get the device's configuration. - * May be null if the device does not have an ifindex. * @send: Send a packet on the device. Returns the length of the packet sent. */ struct vport_ops { @@ -139,7 +137,6 @@ struct vport_ops { /* Called with rcu_read_lock or ovs_mutex. */ const char *(*get_name)(const struct vport *); - void (*get_config)(const struct vport *, void *); int (*send)(struct vport *, struct sk_buff *); }; -- cgit From 91b7514cdff406ad8f63d09b74f664c37bed2e01 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 13 May 2013 08:22:34 -0700 Subject: openvswitch: Unify vport error stats handling. Following patch changes vport->send return type so that vport layer can do error accounting. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/vport-netdev.c | 5 ++--- net/openvswitch/vport.c | 9 +++++++-- net/openvswitch/vport.h | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4f01c6d2ffa4..43712217a372 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -170,7 +170,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", netdev_vport->dev->name, packet_length(skb), mtu); - goto error; + goto drop; } skb->dev = netdev_vport->dev; @@ -179,9 +179,8 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) return len; -error: +drop: kfree_skb(skb); - ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); return 0; } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 720623190eaa..7f20f6d1be94 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -351,7 +351,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) { int sent = vport->ops->send(vport, skb); - if (likely(sent)) { + if (likely(sent > 0)) { struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); @@ -360,7 +360,12 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) stats->tx_packets++; stats->tx_bytes += sent; u64_stats_update_end(&stats->syncp); - } + } else if (sent < 0) { + ovs_vport_record_error(vport, VPORT_E_TX_ERROR); + kfree_skb(skb); + } else + ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); + return sent; } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 26c594b1a470..1cef5cd3be47 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -123,7 +123,8 @@ struct vport_parms { * existing vport to a &struct sk_buff. May be %NULL for a vport that does not * have any configuration. * @get_name: Get the device's name. - * @send: Send a packet on the device. Returns the length of the packet sent. + * @send: Send a packet on the device. Returns the length of the packet sent, + * zero for dropped packets or negative for error. */ struct vport_ops { enum ovs_vport_type type; -- cgit From 34d94f2102fb361030569eb983751ed80742f6e9 Mon Sep 17 00:00:00 2001 From: Lorand Jakab Date: Mon, 3 Jun 2013 10:01:14 -0700 Subject: openvswitch: fix variable names in comment Signed-off-by: Lorand Jakab Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b15321a2228c..33df0913358d 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -590,10 +590,10 @@ out: * - skb->network_header: just past the Ethernet header, or just past the * VLAN header, to the first byte of the Ethernet payload. * - * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6 + * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. - * For other key->dl_type values it is left untouched. + * For other key->eth.type values it is left untouched. */ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, int *key_lenp) -- cgit From af7841636bb29575fe27faaeb351146e87217717 Mon Sep 17 00:00:00 2001 From: Andy Hill Date: Fri, 7 Jun 2013 16:53:50 -0700 Subject: openvswitch: Fix misspellings in comments and docs. Flagged with: https://github.com/lyda/misspell-check Run with: git ls-files | misspellings -f - Signed-off-by: Andy Hill Signed-off-by: Jesse Gross --- net/openvswitch/vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 7f20f6d1be94..176d449351eb 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -376,7 +376,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) * @err_type: one of enum vport_err_type types to indicate the error type * * If using the vport generic stats layer indicate that an error of the given - * type has occured. + * type has occurred. */ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) { -- cgit From b34df5e805a6e98cae0bc5bc80c1b52d9ff811de Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 13 Jun 2013 11:11:44 -0700 Subject: openvswitch: make skb->csum consistent with rest of networking stack. Following patch keeps skb->csum correct across ovs. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 4 ++++ net/openvswitch/flow.c | 3 +++ net/openvswitch/vport-internal_dev.c | 1 + net/openvswitch/vport-netdev.c | 2 ++ net/openvswitch/vport.h | 7 +++++++ 5 files changed, 17 insertions(+) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 894b6cbdd929..596d6373399d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -130,9 +130,13 @@ static int set_eth_addr(struct sk_buff *skb, if (unlikely(err)) return err; + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + return 0; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 33df0913358d..fca483360ce2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -618,6 +618,9 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ if (vlan_tx_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 84e0a0379186..e284c7e1fec4 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -221,6 +221,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); netif_rx(skb); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 43712217a372..40de815b4213 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -49,6 +49,8 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + ovs_vport_receive(vport, skb); return; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 1cef5cd3be47..293278c4c2df 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -192,4 +192,11 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); +} + #endif /* vport.h */ -- cgit From 93d8fd1514b6862c3370ea92be3f3b4216e0bf8f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 13 Jun 2013 11:11:32 -0700 Subject: openvswitch: Simplify interface ovs_flow_metadata_from_nlattrs() This is not functional change, this is just code cleanup. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 5 +---- net/openvswitch/flow.c | 22 ++++++++++------------ net/openvswitch/flow.h | 4 ++-- 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 748aa97cbfb2..0f783d9fa00d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -739,10 +739,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, - &flow->key.phy.skb_mark, - &flow->key.phy.in_port, - a[OVS_PACKET_ATTR_KEY]); + err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fca483360ce2..093c191d4fc2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -1125,10 +1125,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @priority: receives the skb priority - * @mark: receives the skb mark - * @in_port: receives the extracted input port. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. * * This parses a series of Netlink attributes that form a flow key, which must @@ -1136,15 +1134,15 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *attr) +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, + const struct nlattr *attr) { const struct nlattr *nla; int rem; - *in_port = DP_MAX_PORTS; - *priority = 0; - *mark = 0; + flow->key.phy.in_port = DP_MAX_PORTS; + flow->key.phy.priority = 0; + flow->key.phy.skb_mark = 0; nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); @@ -1155,17 +1153,17 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, switch (type) { case OVS_KEY_ATTR_PRIORITY: - *priority = nla_get_u32(nla); + flow->key.phy.priority = nla_get_u32(nla); break; case OVS_KEY_ATTR_IN_PORT: if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; - *in_port = nla_get_u32(nla); + flow->key.phy.in_port = nla_get_u32(nla); break; case OVS_KEY_ATTR_SKB_MARK: - *mark = nla_get_u32(nla); + flow->key.phy.skb_mark = nla_get_u32(nla); break; } } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 0875fde65b9c..2a83e2141f08 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -141,8 +141,8 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *); +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, + const struct nlattr *attr); #define MAX_ACTIONS_BUFSIZE (16 * 1024) #define TBL_MIN_BUCKETS 1024 -- cgit From f9e42b853523cda0732022c2e0473c183f7aec65 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:04 +0200 Subject: net: sctp: sideeffect: throw BUG if primary_path is NULL This clearly states a BUG somewhere in the SCTP code as e.g. fixed once in f28156335 ("sctp: Use correct sideffect command in duplicate cookie handling"). If this ever happens, throw a trace in the sideeffect engine where assocs clearly must have a primary_path assigned. When in sctp_seq_dump_local_addrs() also throw a WARN and bail out since we do not need to panic for printing this one asterisk. Also, it will avoid the not so obvious case when primary != NULL test passes and at a later point in time triggering a NULL ptr dereference caused by primary. While at it, also fix up the white space. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/proc.c | 12 +++++++++--- net/sctp/sm_sideeffect.c | 5 ++++- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 4e45ee35d0db..0c83162a6bf8 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -134,9 +134,15 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo struct sctp_af *af; if (epb->type == SCTP_EP_TYPE_ASSOCIATION) { - asoc = sctp_assoc(epb); - peer = asoc->peer.primary_path; - primary = &peer->saddr; + asoc = sctp_assoc(epb); + + peer = asoc->peer.primary_path; + if (unlikely(peer == NULL)) { + WARN(1, "Association %p with NULL primary path!", asoc); + return; + } + + primary = &peer->saddr; } rcu_read_lock(); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8aab894aeabe..ff91f47b0239 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -864,6 +864,7 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; + BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } @@ -1274,8 +1275,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_uncork(&asoc->outqueue); local_cork = 0; } - asoc = cmd->obj.asoc; + /* Register with the endpoint. */ + asoc = cmd->obj.asoc; + BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); sctp_hash_established(asoc); break; -- cgit From 405426f6ca8ac2d8d5b1f8eb9285452d44222781 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:05 +0200 Subject: net: sctp: sctp_sf_do_prm_asoc: do SCTP_CMD_INIT_CHOOSE_TRANSPORT first While this currently cannot trigger any NULL pointer dereference in sctp_seq_dump_local_addrs(), better change the order of commands to prevent a future bug to happen. Although we first add SCTP_CMD_NEW_ASOC and then set the SCTP_CMD_INIT_CHOOSE_TRANSPORT, it is okay for now, since this primitive is only called by sctp_connect() or sctp_sendmsg() with sctp_assoc_add_peer() set first. However, lets do this precaution and first set the transport and then add it to the association hashlist to prevent in future something to possibly triggering this. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index de1a0138317f..b3d186856513 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4632,16 +4632,16 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, if (!repl) goto nomem; + /* Choose transport for INIT. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); + /* Cast away the const modifier, as we want to just * rerun it through as a sideffect. */ my_asoc = (struct sctp_association *)asoc; sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(my_asoc)); - /* Choose transport for INIT. */ - sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, - SCTP_CHUNK(repl)); - /* After sending the INIT, "A" starts the T1-init timer and * enters the COOKIE-WAIT state. */ -- cgit From c164b8381496ca797c37671fe510a264412ccae5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:06 +0200 Subject: net: sctp: minor: remove variable in sctp_init_sock It's only used at this one time, so we could remove it as well. This is valid and also makes it more explicit/obvious that in case of error the sp->ep is NULL here, i.e. for the sctp_destroy_sock() check that was recently added. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f631c5ff4dbf..510dc79a32a1 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3862,7 +3862,6 @@ out: SCTP_STATIC int sctp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - struct sctp_endpoint *ep; struct sctp_sock *sp; SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); @@ -3971,11 +3970,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) * change the data structure relationships, this may still * be useful for storing pre-connect address information. */ - ep = sctp_endpoint_new(sk, GFP_KERNEL); - if (!ep) + sp->ep = sctp_endpoint_new(sk, GFP_KERNEL); + if (!sp->ep) return -ENOMEM; - sp->ep = ep; sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); -- cgit From 2e0c9e7911465b29daf85f7de97949004bf7b31c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:07 +0200 Subject: net: sctp: sctp_association_init: put refs in reverse order In case we need to bail out for whatever reason during assoc init, we call sctp_endpoint_put() and then sock_put(), however, we've hold both refs in reverse, non-symmetric order, so first sctp_endpoint_hold() and then sock_hold(). Reverse this, so that in an error case we have sock_put() and then sctp_endpoint_put(). Actually shouldn't matter too much, since both cleanup paths do the right thing, but that way, it is more consistent with the rest of the code. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 91cfd8f94a19..756025c98e85 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -86,10 +86,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; - sctp_endpoint_hold(asoc->ep); - - /* Hold the sock. */ asoc->base.sk = (struct sock *)sk; + + sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); /* Initialize the common base substructure. */ @@ -343,8 +342,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; fail_init: - sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); + sctp_endpoint_put(asoc->ep); return NULL; } -- cgit From eb6db622825b2028df74f490b8c36887cf3c2f50 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:25 +0300 Subject: net: change sysctl_net_ll_poll into an unsigned int There is no reason for sysctl_net_ll_poll to be an unsigned long. Change it into an unsigned int. Fix the proc handler. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 4 ++-- net/socket.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 78c746e016ae..62702c2053de 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -302,9 +302,9 @@ static struct ctl_table net_core_table[] = { { .procname = "low_latency_poll", .data = &sysctl_net_ll_poll, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax + .proc_handler = proc_dointvec }, #endif #endif /* CONFIG_NET */ diff --git a/net/socket.c b/net/socket.c index 21fd29f63ed2..caaffa14e87e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -107,7 +107,7 @@ #include #ifdef CONFIG_NET_LL_RX_POLL -unsigned long sysctl_net_ll_poll __read_mostly; +unsigned int sysctl_net_ll_poll __read_mostly; EXPORT_SYMBOL_GPL(sysctl_net_ll_poll); #endif -- cgit From 9a3c71aa802499e0b1db2788ccc75a56c5f00555 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:35 +0300 Subject: net: convert low latency sockets to sched_clock() Use sched_clock() instead of get_cycles(). We can use sched_clock() because we don't care much about accuracy. Remove the dependency on X86_TSC Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index d6a9ce6e1800..e591668fb38f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -245,7 +245,6 @@ config NETPRIO_CGROUP config NET_LL_RX_POLL bool "Low Latency Receive Poll" - depends on X86_TSC default n ---help--- Support Low Latency Receive Queue Poll. -- cgit From 89bf1b5a683df497c572c4d3bd3f9c9aa919d773 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:46 +0300 Subject: net: remove NET_LL_RX_POLL config menue Remove NET_LL_RX_POLL from the config menu. Change default to y. Busy polling still needs to be enabled at run time. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/Kconfig | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index e591668fb38f..51da83943847 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -244,15 +244,8 @@ config NETPRIO_CGROUP a per-interface basis config NET_LL_RX_POLL - bool "Low Latency Receive Poll" - default n - ---help--- - Support Low Latency Receive Queue Poll. - (For network card drivers which support this option.) - When waiting for data in read or poll call directly into the the device driver - to flush packets which may be pending on the device queues into the stack. - - If unsure, say N. + boolean + default y config BQL boolean -- cgit From dafcc4380deec21d160c31411f33c8813f67f517 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:57 +0300 Subject: net: add socket option for low latency polling adds a socket option for low latency polling. This allows overriding the global sysctl value with a per-socket one. Unexport sysctl_net_ll_poll since for now it's not needed in modules. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/sock.c | 20 ++++++++++++++++++++ net/socket.c | 1 - 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 788c0da5eed1..1e744b12fda3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -913,6 +913,19 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; +#ifdef CONFIG_NET_LL_RX_POLL + case SO_LL: + /* allow unprivileged users to decrease the value */ + if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) + ret = -EPERM; + else { + if (val < 0) + ret = -EINVAL; + else + sk->sk_ll_usec = val; + } + break; +#endif default: ret = -ENOPROTOOPT; break; @@ -1170,6 +1183,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; +#ifdef CONFIG_NET_LL_RX_POLL + case SO_LL: + v.val = sk->sk_ll_usec; + break; +#endif + default: return -ENOPROTOOPT; } @@ -2288,6 +2307,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_LL_RX_POLL sk->sk_napi_id = 0; + sk->sk_ll_usec = sysctl_net_ll_poll; #endif /* diff --git a/net/socket.c b/net/socket.c index caaffa14e87e..3eec3f76b49c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -108,7 +108,6 @@ #ifdef CONFIG_NET_LL_RX_POLL unsigned int sysctl_net_ll_poll __read_mostly; -EXPORT_SYMBOL_GPL(sysctl_net_ll_poll); #endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -- cgit From cc79dd1ba9c1021c2ac6ae200a65ec38ee8db351 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:37 -0400 Subject: tipc: change socket buffer overflow control to respect sk_rcvbuf As per feedback from the netdev community, we change the buffer overflow protection algorithm in receiving sockets so that it always respects the nominal upper limit set in sk_rcvbuf. Instead of scaling up from a small sk_rcvbuf value, which leads to violation of the configured sk_rcvbuf limit, we now calculate the weighted per-message limit by scaling down from a much bigger value, still in the same field, according to the importance priority of the received message. To allow for administrative tunability of the socket receive buffer size, we create a tipc_rmem sysctl variable to allow the user to configure an even bigger value via sysctl command. It is a size of three (min/default/max) to be consistent with things like tcp_rmem. By default, the value initialized in tipc_rmem[1] is equal to the receive socket size needed by a TIPC_CRITICAL_IMPORTANCE message. This value is also set as the default value of sk_rcvbuf. Originally-by: Jon Maloy Cc: Neil Horman Cc: Jon Maloy [Ying: added sysctl variation to Jon's original patch] Signed-off-by: Ying Xue [PG: don't compile sysctl.c if not config'd; add Documentation] Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/Makefile | 1 + net/tipc/core.c | 12 +++++++++-- net/tipc/core.h | 9 ++++++++ net/tipc/port.h | 2 ++ net/tipc/socket.c | 19 +++++++++-------- net/tipc/sysctl.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 net/tipc/sysctl.c (limited to 'net') diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 4df8e02d9008..02636d0a90cf 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -11,3 +11,4 @@ tipc-y += addr.o bcast.o bearer.o config.o \ socket.o log.o eth_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o +tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/core.c b/net/tipc/core.c index 7ec2c1eb94f1..b0e42a087291 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -39,6 +39,7 @@ #include "name_table.h" #include "subscr.h" #include "config.h" +#include "port.h" #include @@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly; int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; int tipc_remote_management __read_mostly; - +int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ /** * tipc_buf_acquire - creates a TIPC message buffer @@ -118,6 +119,7 @@ static void tipc_core_stop(void) tipc_nametbl_stop(); tipc_ref_table_stop(); tipc_socket_stop(); + tipc_unregister_sysctl(); } /** @@ -142,13 +144,14 @@ static int tipc_core_start(void) res = tipc_netlink_start(); if (!res) res = tipc_socket_init(); + if (!res) + res = tipc_register_sysctl(); if (res) tipc_core_stop(); return res; } - static int __init tipc_init(void) { int res; @@ -160,6 +163,11 @@ static int __init tipc_init(void) tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; + sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; + res = tipc_core_start(); if (res) pr_err("Unable to start in single node mode\n"); diff --git a/net/tipc/core.h b/net/tipc/core.h index 0207db04179a..fe7f2b7c19fe 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; extern int tipc_net_id __read_mostly; extern int tipc_remote_management __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; /* * Other global variables @@ -97,6 +98,14 @@ extern void tipc_netlink_stop(void); extern int tipc_socket_init(void); extern void tipc_socket_stop(void); +#ifdef CONFIG_SYSCTL +extern int tipc_register_sysctl(void); +extern void tipc_unregister_sysctl(void); +#else +#define tipc_register_sysctl() 0 +#define tipc_unregister_sysctl() +#endif + /* * TIPC timer and signal code */ diff --git a/net/tipc/port.h b/net/tipc/port.h index fb66e2e5f4d1..2485649c408a 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -43,6 +43,8 @@ #include "node_subscr.h" #define TIPC_FLOW_CONTROL_WIN 512 +#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, struct sk_buff **buf, unsigned char const *data, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 515ce38e4f4c..aba4255f297b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -43,8 +43,6 @@ #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ struct tipc_sock { @@ -203,6 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; + sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; @@ -1233,10 +1232,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) * For all connectionless messages, by default new queue limits are * as belows: * - * TIPC_LOW_IMPORTANCE (5MB) - * TIPC_MEDIUM_IMPORTANCE (10MB) - * TIPC_HIGH_IMPORTANCE (20MB) - * TIPC_CRITICAL_IMPORTANCE (40MB) + * TIPC_LOW_IMPORTANCE (4 MB) + * TIPC_MEDIUM_IMPORTANCE (8 MB) + * TIPC_HIGH_IMPORTANCE (16 MB) + * TIPC_CRITICAL_IMPORTANCE (32 MB) * * Returns overload limit according to corresponding message importance */ @@ -1246,9 +1245,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) unsigned int limit; if (msg_connected(msg)) - limit = CONN_OVERLOAD_LIMIT; + limit = sysctl_tipc_rmem[2]; else - limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); + limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << + msg_importance(msg); return limit; } @@ -1847,7 +1847,8 @@ static const struct net_proto_family tipc_family_ops = { static struct proto tipc_proto = { .name = "TIPC", .owner = THIS_MODULE, - .obj_size = sizeof(struct tipc_sock) + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem }; /** diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c new file mode 100644 index 000000000000..f3fef93325a8 --- /dev/null +++ b/net/tipc/sysctl.c @@ -0,0 +1,64 @@ +/* + * net/tipc/sysctl.c: sysctl interface to TIPC subsystem + * + * Copyright (c) 2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" + +#include + +static struct ctl_table_header *tipc_ctl_hdr; + +static struct ctl_table tipc_table[] = { + { + .procname = "tipc_rmem", + .data = &sysctl_tipc_rmem, + .maxlen = sizeof(sysctl_tipc_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +int tipc_register_sysctl(void) +{ + tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table); + if (tipc_ctl_hdr == NULL) + return -ENOMEM; + return 0; +} + +void tipc_unregister_sysctl(void) +{ + unregister_net_sysctl_table(tipc_ctl_hdr); +} -- cgit From 5d21cb70db0122507cd18f58b4a9112583c1e075 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 17 Jun 2013 10:54:38 -0400 Subject: tipc: allow implicit connect for stream sockets TIPC's implied connect feature, aka piggyback connect, allows applications to save one syscall and all SYN/SYN-ACK signalling overhead when setting up a connection. Until now, this has only been supported for SEQPACKET sockets. Here, we make it possible to use this feature even with stream sockets. At the connecting side, the connection is completed when the first data message arrives from the accepting peer. This means that we must allow the connecting user to call blocking recv() before the socket has reached state SS_CONNECTED. So we must must relax the state machine check at recv_stream(), and allow the recv() call even if socket is in state SS_CONNECTING. Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index aba4255f297b..d5fa708f037d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -518,8 +518,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = -EISCONN; goto exit; } - if ((tport->published) || - ((sock->type == SOCK_STREAM) && (total_len != 0))) { + if (tport->published) { res = -EOPNOTSUPP; goto exit; } @@ -1010,8 +1009,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (unlikely((sock->state == SS_UNCONNECTED) || - (sock->state == SS_CONNECTING))) { + if (unlikely((sock->state == SS_UNCONNECTED))) { res = -ENOTCONN; goto exit; } -- cgit From c5fa7b3cf3cb22e4ac60485fc2dc187fe012910f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:39 -0400 Subject: tipc: introduce new TIPC server infrastructure TIPC has two internal servers, one providing a subscription service for topology events, and another providing the configuration interface. These servers have previously been running in BH context, accessing the TIPC-port (aka native) API directly. Apart from these servers, even the TIPC socket implementation is partially built on this API. As this API may simultaneously be called via different paths and in different contexts, a complex and costly lock policiy is required in order to protect TIPC internal resources. To eliminate the need for this complex lock policiy, we introduce a new, generic service API that uses kernel sockets for message passing instead of the native API. Once the toplogy and configuration servers are converted to use this new service, all code pertaining to the native API can be removed. This entails a significant reduction in code amount and complexity, and opens up for a complete rework of the locking policy in TIPC. The new service also solves another problem: As the current topology server works in BH context, it cannot easily be blocked when sending of events fails due to congestion. In such cases events may have to be silently dropped, something that is unacceptable. Therefore, the new service keeps a dedicated outbound queue receiving messages from BH context. Once messages are inserted into this queue, we will immediately schedule a work from a special workqueue. This way, messages/events from the topology server are in reality sent in process context, and the server can block if necessary. Analogously, there is a new workqueue for receiving messages. Once a notification about an arriving message is received in BH context, we schedule a work from the receive workqueue to do the job of receiving the message in process context. As both sending and receive messages are now finished in processes, subscribed events cannot be dropped any more. As of this commit, this new server infrastructure is built, but not actually yet called by the existing TIPC code, but since the conversion changes required in order to use it are significant, the addition is kept here as a separate commit. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/Makefile | 2 +- net/tipc/core.h | 8 +- net/tipc/server.c | 596 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/tipc/server.h | 94 +++++++++ net/tipc/socket.c | 99 ++++++++- 5 files changed, 789 insertions(+), 10 deletions(-) create mode 100644 net/tipc/server.c create mode 100644 net/tipc/server.h (limited to 'net') diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 02636d0a90cf..b282f7130d2b 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -8,7 +8,7 @@ tipc-y += addr.o bcast.o bearer.o config.o \ core.o handler.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ - socket.o log.o eth_media.o + socket.o log.o eth_media.o server.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/core.h b/net/tipc/core.h index fe7f2b7c19fe..be72f8cebc53 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -1,8 +1,8 @@ /* * net/tipc/core.h: Include file for TIPC global declarations * - * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2013 Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -97,6 +97,10 @@ extern int tipc_netlink_start(void); extern void tipc_netlink_stop(void); extern int tipc_socket_init(void); extern void tipc_socket_stop(void); +extern int tipc_sock_create_local(int type, struct socket **res); +extern void tipc_sock_release_local(struct socket *sock); +extern int tipc_sock_accept_local(struct socket *sock, + struct socket **newsock, int flags); #ifdef CONFIG_SYSCTL extern int tipc_register_sysctl(void); diff --git a/net/tipc/server.c b/net/tipc/server.c new file mode 100644 index 000000000000..19da5abe0fa6 --- /dev/null +++ b/net/tipc/server.c @@ -0,0 +1,596 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "core.h" +#include + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 + +#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @rwork: receive work item + * @usr_data: user-specified field + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: controll access to the outqueue + * @outqueue: list of connection objects for its server + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_server *server; + struct work_struct rwork; + int (*rx_action) (struct tipc_conn *con); + void *usr_data; + struct list_head outqueue; + spinlock_t outqueue_lock; + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + struct list_head list; + struct kvec iov; + struct sockaddr_tipc dest; +}; + +static void tipc_recv_work(struct work_struct *work); +static void tipc_send_work(struct work_struct *work); +static void tipc_clean_outqueues(struct tipc_conn *con); + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_server *s = con->server; + + if (con->sock) { + tipc_sock_release_local(con->sock); + con->sock = NULL; + } + + tipc_clean_outqueues(con); + + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (con) + conn_get(con); + spin_unlock_bh(&s->idr_lock); + return con; +} + +static void sock_data_ready(struct sock *sk, int unused) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void sock_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) +{ + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_data_ready = sock_data_ready; + sk->sk_write_space = sock_write_space; + sk->sk_user_data = con; + + con->sock = sock; + + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_unregister_callbacks(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_close_conn(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + + tipc_unregister_callbacks(con); + + /* We shouldn't flush pending works as we may be in the + * thread. In fact the races with pending rx/tx work structs + * are harmless for us here as we have already deleted this + * connection from server connection list and set + * sk->sk_user_data to 0 before releasing connection object. + */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); + } +} + +static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + spin_lock_init(&con->outqueue_lock); + INIT_WORK(&con->swork, tipc_send_work); + INIT_WORK(&con->rwork, tipc_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static int tipc_receive_from_sock(struct tipc_conn *con) +{ + struct msghdr msg = {}; + struct tipc_server *s = con->server; + struct sockaddr_tipc addr; + struct kvec iov; + void *buf; + int ret; + + buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); + if (!buf) { + ret = -ENOMEM; + goto out_close; + } + + iov.iov_base = buf; + iov.iov_len = s->max_rcvbuf_size; + msg.msg_name = &addr; + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, + MSG_DONTWAIT); + if (ret <= 0) { + kmem_cache_free(s->rcvbuf_cache, buf); + goto out_close; + } + + s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); + + kmem_cache_free(s->rcvbuf_cache, buf); + + return 0; + +out_close: + if (ret != -EWOULDBLOCK) + tipc_close_conn(con); + else if (ret == 0) + /* Don't return success if we really got EOF */ + ret = -EAGAIN; + + return ret; +} + +static int tipc_accept_from_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = con->sock; + struct socket *newsock; + struct tipc_conn *newcon; + int ret; + + ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); + if (ret < 0) + return ret; + + newcon = tipc_alloc_conn(con->server); + if (IS_ERR(newcon)) { + ret = PTR_ERR(newcon); + sock_release(newsock); + return ret; + } + + newcon->rx_action = tipc_receive_from_sock; + tipc_register_callbacks(newsock, newcon); + + /* Notify that new connection is incoming */ + newcon->usr_data = s->tipc_conn_new(newcon->conid); + + /* Wake up receive process in case of 'SYN+' message */ + newsock->sk->sk_data_ready(newsock->sk, 0); + return ret; +} + +static struct socket *tipc_create_listen_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = NULL; + int ret; + + ret = tipc_sock_create_local(s->type, &sock); + if (ret < 0) + return NULL; + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&s->imp, sizeof(s->imp)); + if (ret < 0) + goto create_err; + ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); + if (ret < 0) + goto create_err; + + switch (s->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + con->rx_action = tipc_accept_from_sock; + + ret = kernel_listen(sock, 0); + if (ret < 0) + goto create_err; + break; + case SOCK_DGRAM: + case SOCK_RDM: + con->rx_action = tipc_receive_from_sock; + break; + default: + pr_err("Unknown socket type %d\n", s->type); + goto create_err; + } + return sock; + +create_err: + sock_release(sock); + con->sock = NULL; + return NULL; +} + +static int tipc_open_listening_sock(struct tipc_server *s) +{ + struct socket *sock; + struct tipc_conn *con; + + con = tipc_alloc_conn(s); + if (IS_ERR(con)) + return PTR_ERR(con); + + sock = tipc_create_listen_sock(con); + if (!sock) + return -EINVAL; + + tipc_register_callbacks(sock, con); + return 0; +} + +static struct outqueue_entry *tipc_alloc_entry(void *data, int len) +{ + struct outqueue_entry *entry; + void *buf; + + entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); + if (!entry) + return NULL; + + buf = kmalloc(len, GFP_ATOMIC); + if (!buf) { + kfree(entry); + return NULL; + } + + memcpy(buf, data, len); + entry->iov.iov_base = buf; + entry->iov.iov_len = len; + + return entry; +} + +static void tipc_free_entry(struct outqueue_entry *e) +{ + kfree(e->iov.iov_base); + kfree(e); +} + +static void tipc_clean_outqueues(struct tipc_conn *con) +{ + struct outqueue_entry *e, *safe; + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len) +{ + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (!con) + return -EINVAL; + + e = tipc_alloc_entry(data, len); + if (!e) { + conn_put(con); + return -ENOMEM; + } + + if (addr) + memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); + + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (test_bit(CF_CONNECTED, &con->flags)) + if (!queue_work(s->send_wq, &con->swork)) + conn_put(con); + + return 0; +} + +void tipc_conn_terminate(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (con) { + tipc_close_conn(con); + conn_put(con); + } +} + +static void tipc_send_to_sock(struct tipc_conn *con) +{ + int count = 0; + struct tipc_server *s = con->server; + struct outqueue_entry *e; + struct msghdr msg; + int ret; + + spin_lock_bh(&con->outqueue_lock); + while (1) { + e = list_entry(con->outqueue.next, struct outqueue_entry, + list); + if ((struct list_head *) e == &con->outqueue) + break; + spin_unlock_bh(&con->outqueue_lock); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +out: + return; + +send_err: + tipc_close_conn(con); +} + +static void tipc_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (test_bit(CF_CONNECTED, &con->flags)) { + if (con->rx_action(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +static void tipc_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (test_bit(CF_CONNECTED, &con->flags)) + tipc_send_to_sock(con); + + conn_put(con); +} + +static void tipc_work_stop(struct tipc_server *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +static int tipc_work_start(struct tipc_server *s) +{ + s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +int tipc_server_start(struct tipc_server *s) +{ + int ret; + + spin_lock_init(&s->idr_lock); + idr_init(&s->conn_idr); + s->idr_in_use = 0; + + s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!s->rcvbuf_cache) + return -ENOMEM; + + ret = tipc_work_start(s); + if (ret < 0) { + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } + s->enabled = 1; + + return tipc_open_listening_sock(s); +} + +void tipc_server_stop(struct tipc_server *s) +{ + struct tipc_conn *con; + int total = 0; + int id; + + if (!s->enabled) + return; + + s->enabled = 0; + spin_lock_bh(&s->idr_lock); + for (id = 0; total < s->idr_in_use; id++) { + con = idr_find(&s->conn_idr, id); + if (con) { + total++; + spin_unlock_bh(&s->idr_lock); + tipc_close_conn(con); + spin_lock_bh(&s->idr_lock); + } + } + spin_unlock_bh(&s->idr_lock); + + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + idr_destroy(&s->conn_idr); +} diff --git a/net/tipc/server.h b/net/tipc/server.h new file mode 100644 index 000000000000..98b23f20bc0f --- /dev/null +++ b/net/tipc/server.h @@ -0,0 +1,94 @@ +/* + * net/tipc/server.h: Include file for TIPC server code + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SERVER_H +#define _TIPC_SERVER_H + +#include "core.h" + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_server - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_recvmsg: callback will be called when message arrives + * @saddr: TIPC server address + * @name: server name + * @imp: message importance + * @type: socket type + * @enabled: identify whether server is launched or not + */ +struct tipc_server { + struct idr conn_idr; + spinlock_t idr_lock; + int idr_in_use; + struct kmem_cache *rcvbuf_cache; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + void *(*tipc_conn_new) (int conid); + void (*tipc_conn_shutdown) (int conid, void *usr_data); + void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); + struct sockaddr_tipc *saddr; + const char name[TIPC_SERVER_NAME_LEN]; + int imp; + int type; + int enabled; +}; + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len); + +/** + * tipc_conn_terminate - terminate connection with server + * + * Note: Must call it in process context since it might sleep + */ +void tipc_conn_terminate(struct tipc_server *s, int conid); + +int tipc_server_start(struct tipc_server *s); + +void tipc_server_stop(struct tipc_server *s); + +#endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d5fa708f037d..bd8e2cdeceef 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, 2012 Ericsson AB - * Copyright (c) 2004-2008, 2010-2012, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,12 +63,15 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); static void tipc_data_ready(struct sock *sk, int len); static void tipc_write_space(struct sock *sk); +static int release(struct socket *sock); +static int accept(struct socket *sock, struct socket *new_sock, int flags); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; +static struct proto tipc_proto_kern; static int sockets_enabled; @@ -141,7 +144,7 @@ static void reject_rx_queue(struct sock *sk) } /** - * tipc_create - create a TIPC socket + * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) * @sock: pre-allocated socket structure * @protocol: protocol indicator (must be 0) @@ -152,8 +155,8 @@ static void reject_rx_queue(struct sock *sk) * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct net *net, struct socket *sock, int protocol, - int kern) +static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, + int kern) { const struct proto_ops *ops; socket_state state; @@ -183,7 +186,11 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + if (!kern) + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + else + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); + if (sk == NULL) return -ENOMEM; @@ -218,6 +225,78 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, return 0; } +/** + * tipc_sock_create_local - create TIPC socket from inside TIPC module + * @type: socket type - SOCK_RDM or SOCK_SEQPACKET + * + * We cannot use sock_creat_kern here because it bumps module user count. + * Since socket owner and creator is the same module we must make sure + * that module count remains zero for module local sockets, otherwise + * we cannot do rmmod. + * + * Returns 0 on success, errno otherwise + */ +int tipc_sock_create_local(int type, struct socket **res) +{ + int rc; + struct sock *sk; + + rc = sock_create_lite(AF_TIPC, type, 0, res); + if (rc < 0) { + pr_err("Failed to create kernel socket\n"); + return rc; + } + tipc_sk_create(&init_net, *res, 0, 1); + + sk = (*res)->sk; + + return 0; +} + +/** + * tipc_sock_release_local - release socket created by tipc_sock_create_local + * @sock: the socket to be released. + * + * Module reference count is not incremented when such sockets are created, + * so we must keep it from being decremented when they are released. + */ +void tipc_sock_release_local(struct socket *sock) +{ + release(sock); + sock->ops = NULL; + sock_release(sock); +} + +/** + * tipc_sock_accept_local - accept a connection on a socket created + * with tipc_sock_create_local. Use this function to avoid that + * module reference count is inadvertently incremented. + * + * @sock: the accepting socket + * @newsock: reference to the new socket to be created + * @flags: socket flags + */ + +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags) +{ + struct sock *sk = sock->sk; + int ret; + + ret = sock_create_lite(sk->sk_family, sk->sk_type, + sk->sk_protocol, newsock); + if (ret < 0) + return ret; + + ret = accept(sock, *newsock, flags); + if (ret < 0) { + sock_release(*newsock); + return ret; + } + (*newsock)->ops = sock->ops; + return ret; +} + /** * release - destroy a TIPC socket * @sock: socket to destroy @@ -1529,7 +1608,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_create(sock_net(sock->sk), new_sock, 0, 0); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; @@ -1839,7 +1918,7 @@ static const struct proto_ops stream_ops = { static const struct net_proto_family tipc_family_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .create = tipc_create + .create = tipc_sk_create }; static struct proto tipc_proto = { @@ -1849,6 +1928,12 @@ static struct proto tipc_proto = { .sysctl_rmem = sysctl_tipc_rmem }; +static struct proto tipc_proto_kern = { + .name = "TIPC", + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem +}; + /** * tipc_socket_init - initialize TIPC socket interface * -- cgit From 13a2e89873506d64d7e52f17b571da371a3e25a4 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:40 -0400 Subject: tipc: convert topology server to use new server facility As the new TIPC server infrastructure has been introduced, we can now convert the TIPC topology server to it. We get two benefits from doing this: 1) It simplifies the topology server locking policy. In the original locking policy, we placed one spin lock pointer in the tipc_subscriber structure to reuse the lock of the subscriber's server port, controlling access to members of tipc_subscriber instance. That is, we only used one lock to ensure both tipc_port and tipc_subscriber members were safely accessed. Now we introduce another spin lock for tipc_subscriber structure only protecting themselves, to get a finer granularity locking policy. Moreover, the change will allow us to make the topology server code more readable and maintainable. 2) It fixes a bug where sent subscription events may be lost when the topology port is congested. Using the new service, the topology server now queues sent events into an outgoing buffer, and then wakes up a sender process which has been blocked in workqueue context. The process will keep picking events from the buffer and send them to their respective subscribers, using the kernel socket interface, until the buffer is empty. Even if the socket is congested during transmission there is no risk that events may be dropped, since the sender process may block when needed. Some minor reordering of initialization is done, since we now have a scenario where the topology server must be started after socket initialization has taken place, as the former depends on the latter. And overall, we see a simplification of the TIPC subscriber code in making this changeover. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/core.c | 6 +- net/tipc/socket.c | 3 +- net/tipc/subscr.c | 334 +++++++++++++++--------------------------------------- net/tipc/subscr.h | 8 +- 4 files changed, 104 insertions(+), 247 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index b0e42a087291..15bbe99b609d 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -136,8 +136,6 @@ static int tipc_core_start(void) res = tipc_ref_table_init(tipc_max_ports, tipc_random); if (!res) res = tipc_nametbl_init(); - if (!res) - res = tipc_subscr_start(); if (!res) res = tipc_cfg_init(); if (!res) @@ -146,6 +144,8 @@ static int tipc_core_start(void) res = tipc_socket_init(); if (!res) res = tipc_register_sysctl(); + if (!res) + res = tipc_subscr_start(); if (res) tipc_core_stop(); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index bd8e2cdeceef..d0254157a30d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -402,7 +402,8 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) else if (addr->addrtype != TIPC_ADDR_NAMESEQ) return -EAFNOSUPPORT; - if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) + if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && + (addr->addr.nameseq.type != TIPC_TOP_SRV)) return -EACCES; return (addr->scope > 0) ? diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 6b42d47029af..f6be92a6973a 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -2,7 +2,7 @@ * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,33 +41,42 @@ /** * struct tipc_subscriber - TIPC network topology subscriber - * @port_ref: object reference to server port connecting to subscriber - * @lock: pointer to spinlock controlling access to subscriber's server port - * @subscriber_list: adjacent subscribers in top. server's list of subscribers + * @conid: connection identifier to server connecting to subscriber + * @lock: controll access to subscriber * @subscription_list: list of subscription objects for this subscriber */ struct tipc_subscriber { - u32 port_ref; - spinlock_t *lock; - struct list_head subscriber_list; + int conid; + spinlock_t lock; struct list_head subscription_list; }; -/** - * struct top_srv - TIPC network topology subscription service - * @setup_port: reference to TIPC port that handles subscription requests - * @subscription_count: number of active subscriptions (not subscribers!) - * @subscriber_list: list of ports subscribing to service - * @lock: spinlock govering access to subscriber list - */ -struct top_srv { - u32 setup_port; - atomic_t subscription_count; - struct list_head subscriber_list; - spinlock_t lock; +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); +static void *subscr_named_msg_event(int conid); +static void subscr_conn_shutdown_event(int conid, void *usr_data); + +static atomic_t subscription_count = ATOMIC_INIT(0); + +static struct sockaddr_tipc topsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_TOP_SRV, + .addr.nameseq.lower = TIPC_TOP_SRV, + .addr.nameseq.upper = TIPC_TOP_SRV, + .scope = TIPC_NODE_SCOPE }; -static struct top_srv topsrv; +static struct tipc_server topsrv __read_mostly = { + .saddr = &topsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_SEQPACKET, + .max_rcvbuf_size = sizeof(struct tipc_subscr), + .name = "topology_server", + .tipc_conn_recvmsg = subscr_conn_msg_event, + .tipc_conn_new = subscr_named_msg_event, + .tipc_conn_shutdown = subscr_conn_shutdown_event, +}; /** * htohl - convert value to endianness used by destination @@ -81,20 +90,13 @@ static u32 htohl(u32 in, int swap) return swap ? swab32(in) : in; } -/** - * subscr_send_event - send a message containing a tipc_event to the subscriber - * - * Note: Must not hold subscriber's server port lock, since tipc_send() will - * try to take the lock if the message is rejected and returned! - */ -static void subscr_send_event(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, +static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, u32 node) { - struct iovec msg_sect; + struct tipc_subscriber *subscriber = sub->subscriber; + struct kvec msg_sect; + int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); @@ -104,7 +106,10 @@ static void subscr_send_event(struct tipc_subscription *sub, sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - tipc_send(sub->server_ref, 1, &msg_sect, msg_sect.iov_len); + ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); + if (ret < 0) + pr_err("Sending subscription event failed, no memory\n"); } /** @@ -147,21 +152,24 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } -/** - * subscr_timeout - subscription timeout has occurred - */ static void subscr_timeout(struct tipc_subscription *sub) { - struct tipc_port *server_port; + struct tipc_subscriber *subscriber = sub->subscriber; - /* Validate server port reference (in case subscriber is terminating) */ - server_port = tipc_port_lock(sub->server_ref); - if (server_port == NULL) + /* The spin lock per subscriber is used to protect its members */ + spin_lock_bh(&subscriber->lock); + + /* Validate if the connection related to the subscriber is + * closed (in case subscriber is terminating) + */ + if (subscriber->conid == 0) { + spin_unlock_bh(&subscriber->lock); return; + } /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); return; } @@ -171,8 +179,7 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Unlink subscription from subscriber */ list_del(&sub->subscription_list); - /* Release subscriber's server port */ - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -181,64 +188,54 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Now destroy subscription */ k_term_timer(&sub->timer); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_del - delete a subscription within a subscription list * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static void subscr_del(struct tipc_subscription *sub) { tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_terminate - terminate communication with a subscriber * - * Called with subscriber port locked. Routine must temporarily release lock - * to enable subscription timeout routine(s) to finish without deadlocking; - * the lock is then reclaimed to allow caller to release it upon return. - * (This should work even in the unlikely event some other thread creates - * a new object reference in the interim that uses this lock; this routine will - * simply wait for it to be released, then claim it.) + * Note: Must call it in process context since it might sleep. */ static void subscr_terminate(struct tipc_subscriber *subscriber) { - u32 port_ref; + tipc_conn_terminate(&topsrv, subscriber->conid); +} + +static void subscr_release(struct tipc_subscriber *subscriber) +{ struct tipc_subscription *sub; struct tipc_subscription *sub_temp; - /* Invalidate subscriber reference */ - port_ref = subscriber->port_ref; - subscriber->port_ref = 0; - spin_unlock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); - /* Sever connection to subscriber */ - tipc_shutdown(port_ref); - tipc_deleteport(port_ref); + /* Invalidate subscriber reference */ + subscriber->conid = 0; /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { if (sub->timeout != TIPC_WAIT_FOREVER) { + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } - - /* Remove subscriber from topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_del(&subscriber->subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Reclaim subscriber lock */ - spin_lock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); /* Now destroy subscriber */ kfree(subscriber); @@ -247,7 +244,7 @@ static void subscr_terminate(struct tipc_subscriber *subscriber) /** * subscr_cancel - handle subscription cancellation request * - * Called with subscriber port locked. Routine must temporarily release lock + * Called with subscriber lock held. Routine must temporarily release lock * to enable the subscription timeout routine to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * @@ -274,10 +271,10 @@ static void subscr_cancel(struct tipc_subscr *s, /* Cancel subscription timer (if used), then delete subscription */ if (sub->timeout != TIPC_WAIT_FOREVER) { sub->timeout = TIPC_WAIT_FOREVER; - spin_unlock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); - spin_lock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } @@ -285,7 +282,7 @@ static void subscr_cancel(struct tipc_subscr *s, /** * subscr_subscribe - create subscription for subscriber * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, struct tipc_subscriber *subscriber) @@ -304,7 +301,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); subscr_terminate(subscriber); @@ -335,10 +332,10 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); - sub->server_ref = subscriber->port_ref; + sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&topsrv.subscription_count); + atomic_inc(&subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { k_init_timer(&sub->timer, (Handler)subscr_timeout, (unsigned long)sub); @@ -348,196 +345,51 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, return sub; } -/** - * subscr_conn_shutdown_event - handle termination request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_shutdown_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - int reason) +/* Handle one termination request for the subscriber */ +static void subscr_conn_shutdown_event(int conid, void *usr_data) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; - - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); + subscr_release((struct tipc_subscriber *)usr_data); } -/** - * subscr_conn_msg_event - handle new subscription request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size) +/* Handle one request to create a new subscription for the subscriber */ +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; + struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub; - /* - * Lock subscriber's server port (& make a local copy of lock pointer, - * in case subscriber is deleted while processing subscription request) - */ - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - - if (size != sizeof(struct tipc_subscr)) { - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } else { - sub = subscr_subscribe((struct tipc_subscr *)data, subscriber); - spin_unlock_bh(subscriber_lock); - if (sub != NULL) { - - /* - * We must release the server port lock before adding a - * subscription to the name table since TIPC needs to be - * able to (re)acquire the port lock if an event message - * issued by the subscription process is rejected and - * returned. The subscription cannot be deleted while - * it is being added to the name table because: - * a) the single-threading of the native API port code - * ensures the subscription cannot be cancelled and - * the subscriber connection cannot be broken, and - * b) the name table lock ensures the subscription - * timeout code cannot delete the subscription, - * so the subscription object is still protected. - */ - tipc_nametbl_subscribe(sub); - } - } + spin_lock_bh(&subscriber->lock); + sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); + if (sub) + tipc_nametbl_subscribe(sub); + spin_unlock_bh(&subscriber->lock); } -/** - * subscr_named_msg_event - handle request to establish a new subscriber - */ -static void subscr_named_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) + +/* Handle one request to establish a new subscriber */ +static void *subscr_named_msg_event(int conid) { struct tipc_subscriber *subscriber; - u32 server_port_ref; /* Create subscriber object */ subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); if (subscriber == NULL) { pr_warn("Subscriber rejected, no memory\n"); - return; + return NULL; } INIT_LIST_HEAD(&subscriber->subscription_list); - INIT_LIST_HEAD(&subscriber->subscriber_list); - - /* Create server port & establish connection to subscriber */ - tipc_createport(subscriber, - importance, - NULL, - NULL, - subscr_conn_shutdown_event, - NULL, - NULL, - subscr_conn_msg_event, - NULL, - &subscriber->port_ref); - if (subscriber->port_ref == 0) { - pr_warn("Subscriber rejected, unable to create port\n"); - kfree(subscriber); - return; - } - tipc_connect(subscriber->port_ref, orig); - - /* Lock server port (& save lock address for future use) */ - subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock; - - /* Add subscriber to topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_add(&subscriber->subscriber_list, &topsrv.subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Unlock server port */ - server_port_ref = subscriber->port_ref; - spin_unlock_bh(subscriber->lock); - - /* Send an ACK- to complete connection handshaking */ - tipc_send(server_port_ref, 0, NULL, 0); + subscriber->conid = conid; + spin_lock_init(&subscriber->lock); - /* Handle optional subscription request */ - if (size != 0) { - subscr_conn_msg_event(subscriber, server_port_ref, - buf, data, size); - } + return (void *)subscriber; } int tipc_subscr_start(void) { - struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; - int res; - - spin_lock_init(&topsrv.lock); - INIT_LIST_HEAD(&topsrv.subscriber_list); - - res = tipc_createport(NULL, - TIPC_CRITICAL_IMPORTANCE, - NULL, - NULL, - NULL, - NULL, - subscr_named_msg_event, - NULL, - NULL, - &topsrv.setup_port); - if (res) - goto failed; - - res = tipc_publish(topsrv.setup_port, TIPC_NODE_SCOPE, &seq); - if (res) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - goto failed; - } - - return 0; - -failed: - pr_err("Failed to create subscription service\n"); - return res; + return tipc_server_start(&topsrv); } void tipc_subscr_stop(void) { - struct tipc_subscriber *subscriber; - struct tipc_subscriber *subscriber_temp; - spinlock_t *subscriber_lock; - - if (topsrv.setup_port) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - - list_for_each_entry_safe(subscriber, subscriber_temp, - &topsrv.subscriber_list, - subscriber_list) { - subscriber_lock = subscriber->lock; - spin_lock_bh(subscriber_lock); - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } - } + tipc_server_stop(&topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 218d2e07f0cc..43e6d6332a02 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -2,7 +2,7 @@ * net/tipc/subscr.h: Include file for TIPC network topology service * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2012-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,10 +37,14 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H +#include "server.h" + struct tipc_subscription; +struct tipc_subscriber; /** * struct tipc_subscription - TIPC network topology subscription object + * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription @@ -52,13 +56,13 @@ struct tipc_subscription; * @evt: template for events generated by subscription */ struct tipc_subscription { + struct tipc_subscriber *subscriber; struct tipc_name_seq seq; u32 timeout; u32 filter; struct timer_list timer; struct list_head nameseq_list; struct list_head subscription_list; - u32 server_ref; int swap; struct tipc_event evt; }; -- cgit From 7d0ab17b74330e39a68ba33099ccda27f794f519 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:41 -0400 Subject: tipc: convert configuration server to use new server facility As the new socket-based TIPC server infrastructure has been introduced, we can now convert the configuration server to use it. Then we can take future steps to simplify the configuration server locking policy. Some minor reordering of initialization is done, due to the dependency on having tipc_socket_init completed. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/config.c | 102 ++++++++++++++++++++++++------------------------------ net/tipc/core.c | 4 +-- net/tipc/socket.c | 3 +- 3 files changed, 49 insertions(+), 60 deletions(-) (limited to 'net') diff --git a/net/tipc/config.c b/net/tipc/config.c index f67866c765dd..4887ae04f3a5 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2012, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,12 +38,12 @@ #include "port.h" #include "name_table.h" #include "config.h" +#include "server.h" #define REPLY_TRUNCATED "\n" -static u32 config_port_ref; - static DEFINE_SPINLOCK(config_lock); +static struct tipc_server cfgsrv; static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ @@ -381,33 +381,27 @@ exit: return rep_tlv_buf; } -static void cfg_named_msg_event(void *userdata, - u32 port_ref, - struct sk_buff **buf, - const unchar *msg, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) +static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { struct tipc_cfg_msg_hdr *req_hdr; struct tipc_cfg_msg_hdr *rep_hdr; struct sk_buff *rep_buf; + int ret; /* Validate configuration message header (ignore invalid message) */ - req_hdr = (struct tipc_cfg_msg_hdr *)msg; - if ((size < sizeof(*req_hdr)) || - (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || + req_hdr = (struct tipc_cfg_msg_hdr *)buf; + if ((len < sizeof(*req_hdr)) || + (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { pr_warn("Invalid configuration message discarded\n"); return; } /* Generate reply for request (if can't, return request) */ - rep_buf = tipc_cfg_do_cmd(orig->node, - ntohs(req_hdr->tcm_type), - msg + sizeof(*req_hdr), - size - sizeof(*req_hdr), + rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type), + buf + sizeof(*req_hdr), + len - sizeof(*req_hdr), BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr)); if (rep_buf) { skb_push(rep_buf, sizeof(*rep_hdr)); @@ -415,57 +409,51 @@ static void cfg_named_msg_event(void *userdata, memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); rep_hdr->tcm_len = htonl(rep_buf->len); rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - } else { - rep_buf = *buf; - *buf = NULL; - } - /* NEED TO ADD CODE TO HANDLE FAILED SEND (SUCH AS CONGESTION) */ - tipc_send_buf2port(port_ref, orig, rep_buf, rep_buf->len); + ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, + rep_buf->len); + if (ret < 0) + pr_err("Sending cfg reply message failed, no memory\n"); + + kfree_skb(rep_buf); + } } +static struct sockaddr_tipc cfgsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_CFG_SRV, + .addr.nameseq.lower = 0, + .addr.nameseq.upper = 0, + .scope = TIPC_ZONE_SCOPE +}; + +static struct tipc_server cfgsrv __read_mostly = { + .saddr = &cfgsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_RDM, + .max_rcvbuf_size = 64 * 1024, + .name = "cfg_server", + .tipc_conn_recvmsg = cfg_conn_msg_event, + .tipc_conn_new = NULL, + .tipc_conn_shutdown = NULL +}; + int tipc_cfg_init(void) { - struct tipc_name_seq seq; - int res; - - res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, - NULL, NULL, NULL, - NULL, cfg_named_msg_event, NULL, - NULL, &config_port_ref); - if (res) - goto failed; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - goto failed; - - return 0; - -failed: - pr_err("Unable to create configuration service\n"); - return res; + return tipc_server_start(&cfgsrv); } void tipc_cfg_reinit(void) { - struct tipc_name_seq seq; - int res; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = 0; - tipc_withdraw(config_port_ref, TIPC_ZONE_SCOPE, &seq); + tipc_server_stop(&cfgsrv); - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - pr_err("Unable to reinitialize configuration service\n"); + cfgsrv_addr.addr.nameseq.lower = tipc_own_addr; + cfgsrv_addr.addr.nameseq.upper = tipc_own_addr; + tipc_server_start(&cfgsrv); } void tipc_cfg_stop(void) { - tipc_deleteport(config_port_ref); - config_port_ref = 0; + tipc_server_stop(&cfgsrv); } diff --git a/net/tipc/core.c b/net/tipc/core.c index 15bbe99b609d..fd4eeeaa972a 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -136,8 +136,6 @@ static int tipc_core_start(void) res = tipc_ref_table_init(tipc_max_ports, tipc_random); if (!res) res = tipc_nametbl_init(); - if (!res) - res = tipc_cfg_init(); if (!res) res = tipc_netlink_start(); if (!res) @@ -146,6 +144,8 @@ static int tipc_core_start(void) res = tipc_register_sysctl(); if (!res) res = tipc_subscr_start(); + if (!res) + res = tipc_cfg_init(); if (res) tipc_core_stop(); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d0254157a30d..9510fe8acf45 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -403,7 +403,8 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) return -EAFNOSUPPORT; if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && - (addr->addr.nameseq.type != TIPC_TOP_SRV)) + (addr->addr.nameseq.type != TIPC_TOP_SRV) && + (addr->addr.nameseq.type != TIPC_CFG_SRV)) return -EACCES; return (addr->scope > 0) ? -- cgit From 198d73b82bf78739f8f11cf7ff567a2e0da1dbef Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:42 -0400 Subject: tipc: delete code orphaned by new server infrastructure Having completed the conversion of the topology server and configuration server to use the new server infrastructure, the following functions become unused, and can be deleted: - tipc_createport() - port_wakeup_sh() - port_dispatcher() - port_dispatcher_sigh() - tipc_send_buf_fast() - tipc_send_buf2port Additionally, the following variables become orphaned, and can be deleted: - tipc_msg_err_event - tipc_named_msg_err_event - tipc_conn_shutdown_event - tipc_msg_event - tipc_named_msg_event - tipc_conn_msg_event - tipc_continue_event - msg_queue_head - msg_queue_tail - queue_lock Deletion is done here in a separate commit in order to allow the actual conversion changes to be more easily viewed. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 36 +------ net/tipc/port.c | 299 +------------------------------------------------------- net/tipc/port.h | 53 +--------- 3 files changed, 3 insertions(+), 385 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index a80feee5197a..0a4c3a1bb9cc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, 2012, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1056,40 +1056,6 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, return tipc_link_send_buf(l_ptr, buf); /* All other cases */ } -/* - * tipc_send_buf_fast: Entry for data messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Returns user data length. - */ -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) -{ - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - int res; - u32 selector = msg_origport(buf_msg(buf)) & 1; - u32 dummy; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(destnode); - if (likely(n_ptr)) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector]; - if (likely(l_ptr)) { - res = link_send_buf_fast(l_ptr, buf, &dummy); - tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); - return res; - } - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - res = msg_data_sz(buf_msg(buf)); - tipc_reject_msg(buf, TIPC_ERR_NO_NODE); - return res; -} - - /* * tipc_link_send_sections_fast: Entry for messages where the * destination processor is known and the header is complete, diff --git a/net/tipc/port.c b/net/tipc/port.c index 18098cac62f2..0651522c9435 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2008, 2010-2011, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,11 +46,7 @@ #define MAX_REJECT_SIZE 1024 -static struct sk_buff *msg_queue_head; -static struct sk_buff *msg_queue_tail; - DEFINE_SPINLOCK(tipc_port_list_lock); -static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); @@ -668,215 +664,6 @@ void tipc_port_reinit(void) spin_unlock_bh(&tipc_port_list_lock); } - -/* - * port_dispatcher_sigh(): Signal handler for messages destinated - * to the tipc_port interface. - */ -static void port_dispatcher_sigh(void *dummy) -{ - struct sk_buff *buf; - - spin_lock_bh(&queue_lock); - buf = msg_queue_head; - msg_queue_head = NULL; - spin_unlock_bh(&queue_lock); - - while (buf) { - struct tipc_port *p_ptr; - struct user_port *up_ptr; - struct tipc_portid orig; - struct tipc_name_seq dseq; - void *usr_handle; - int connected; - int peer_invalid; - int published; - u32 message_type; - - struct sk_buff *next = buf->next; - struct tipc_msg *msg = buf_msg(buf); - u32 dref = msg_destport(msg); - - message_type = msg_type(msg); - if (message_type > TIPC_DIRECT_MSG) - goto reject; /* Unsupported message type */ - - p_ptr = tipc_port_lock(dref); - if (!p_ptr) - goto reject; /* Port deleted while msg in queue */ - - orig.ref = msg_origport(msg); - orig.node = msg_orignode(msg); - up_ptr = p_ptr->user_port; - usr_handle = up_ptr->usr_handle; - connected = p_ptr->connected; - peer_invalid = connected && !tipc_port_peer_msg(p_ptr, msg); - published = p_ptr->published; - - if (unlikely(msg_errcode(msg))) - goto err; - - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_msg_event cb = up_ptr->conn_msg_cb; - u32 dsz; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb)) - goto reject; - if (unlikely(!connected)) { - if (tipc_connect(dref, &orig)) - goto reject; - } else if (peer_invalid) - goto reject; - dsz = msg_data_sz(msg); - if (unlikely(dsz && - (++p_ptr->conn_unacked >= - TIPC_FLOW_CONTROL_WIN))) - tipc_acknowledge(dref, - p_ptr->conn_unacked); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), dsz); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_event cb = up_ptr->msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected)) - goto reject; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_event cb = up_ptr->named_msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected || !published)) - goto reject; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig, &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -err: - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_shutdown_event cb = - up_ptr->conn_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || !connected || peer_invalid) - break; - tipc_disconnect(dref); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg)); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_err_event cb = up_ptr->err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_err_event cb = - up_ptr->named_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -reject: - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); - buf = next; - } -} - -/* - * port_dispatcher(): Dispatcher for messages destinated - * to the tipc_port interface. Called with port locked. - */ -static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) -{ - buf->next = NULL; - spin_lock_bh(&queue_lock); - if (msg_queue_head) { - msg_queue_tail->next = buf; - msg_queue_tail = buf; - } else { - msg_queue_tail = msg_queue_head = buf; - tipc_k_signal((Handler)port_dispatcher_sigh, 0); - } - spin_unlock_bh(&queue_lock); - return 0; -} - -/* - * Wake up port after congestion: Called with port locked - */ -static void port_wakeup_sh(unsigned long ref) -{ - struct tipc_port *p_ptr; - struct user_port *up_ptr; - tipc_continue_event cb = NULL; - void *uh = NULL; - - p_ptr = tipc_port_lock(ref); - if (p_ptr) { - up_ptr = p_ptr->user_port; - if (up_ptr) { - cb = up_ptr->continue_event_cb; - uh = up_ptr->usr_handle; - } - tipc_port_unlock(p_ptr); - } - if (cb) - cb(uh, ref); -} - - -static void port_wakeup(struct tipc_port *p_ptr) -{ - tipc_k_signal((Handler)port_wakeup_sh, p_ptr->ref); -} - void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; @@ -893,50 +680,6 @@ void tipc_acknowledge(u32 ref, u32 ack) tipc_net_route_msg(buf); } -/* - * tipc_createport(): user level call. - */ -int tipc_createport(void *usr_handle, - unsigned int importance, - tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, - tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, /* May be zero */ - u32 *portref) -{ - struct user_port *up_ptr; - struct tipc_port *p_ptr; - - up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); - if (!up_ptr) { - pr_warn("Port creation failed, no memory\n"); - return -ENOMEM; - } - p_ptr = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, - importance); - if (!p_ptr) { - kfree(up_ptr); - return -ENOMEM; - } - - p_ptr->user_port = up_ptr; - up_ptr->usr_handle = usr_handle; - up_ptr->ref = p_ptr->ref; - up_ptr->err_cb = error_cb; - up_ptr->named_err_cb = named_error_cb; - up_ptr->conn_err_cb = conn_error_cb; - up_ptr->msg_cb = msg_cb; - up_ptr->named_msg_cb = named_msg_cb; - up_ptr->conn_msg_cb = conn_msg_cb; - up_ptr->continue_event_cb = continue_event_cb; - *portref = p_ptr->ref; - tipc_port_unlock(p_ptr); - return 0; -} - int tipc_portimportance(u32 ref, unsigned int *importance) { struct tipc_port *p_ptr; @@ -1322,43 +1065,3 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, } return -ELINKCONG; } - -/** - * tipc_send_buf2port - send message buffer to port identity - */ -int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - int res; - - p_ptr = (struct tipc_port *)tipc_ref_deref(ref); - if (!p_ptr || p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - msg_set_size(msg, BASIC_H_SIZE + dsz); - if (skb_cow(buf, BASIC_H_SIZE)) - return -ENOMEM; - - skb_push(buf, BASIC_H_SIZE); - skb_copy_to_linear_data(buf, msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_recv_msg(buf); - else - res = tipc_send_buf_fast(buf, dest->node); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (port_unreliable(p_ptr)) - return dsz; - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index 2485649c408a..7fd37c202ce6 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -2,7 +2,7 @@ * net/tipc/port.h: Include file for TIPC port code * * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,37 +46,6 @@ #define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) -typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_portid const *attmpt_destid); - -typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_name_seq const *attmpt_dest); - -typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason); - -typedef void (*tipc_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *origin); - -typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest); - -typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size); - -typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); - /** * struct user_port - TIPC user port (used with native API) * @usr_handle: user-specified field @@ -87,13 +56,6 @@ typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); struct user_port { void *usr_handle; u32 ref; - tipc_msg_err_event err_cb; - tipc_named_msg_err_event named_err_cb; - tipc_conn_shutdown_event conn_err_cb; - tipc_msg_event msg_cb; - tipc_named_msg_event named_msg_cb; - tipc_conn_msg_event conn_msg_cb; - tipc_continue_event continue_event_cb; }; /** @@ -164,18 +126,8 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, int tipc_reject_msg(struct sk_buff *buf, u32 err); -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); - void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_createport(void *usr_handle, - unsigned int importance, tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, u32 *portref); - int tipc_deleteport(u32 portref); int tipc_portimportance(u32 portref, unsigned int *importance); @@ -222,9 +174,6 @@ int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, struct iovec const *msg_sect, unsigned int total_len); -int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, unsigned int section_count, struct iovec const *msg, unsigned int total_len); -- cgit From f1733d7580ff94deb8ea071a293c23939ae0d450 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:43 -0400 Subject: tipc: remove user_port instance from tipc_port structure After the native API has been completely removed, the 'user_port' field in struct tipc_port becomes unused, and can be removed. As a consequence, the "usrmem" argument in tipc_msg_build() is no longer needed, and so we remove that one too. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 18 +++++++----------- net/tipc/msg.c | 15 +++++---------- net/tipc/msg.h | 4 ++-- net/tipc/port.c | 8 +++----- net/tipc/port.h | 14 -------------- 5 files changed, 17 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 0a4c3a1bb9cc..d34429d03c16 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1081,7 +1081,7 @@ again: * (Must not hold any locks while building message.) */ res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, - sender->max_pkt, !sender->user_port, &buf); + sender->max_pkt, &buf); read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); @@ -1216,18 +1216,14 @@ again: else sz = fragm_rest; - if (likely(!sender->user_port)) { - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { error: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } - return -EFAULT; + for (; buf_chain; buf_chain = buf) { + buf = buf_chain->next; + kfree_skb(buf_chain); } - } else - skb_copy_to_linear_data_offset(buf, fragm_crs, - sect_crs, sz); + return -EFAULT; + } sect_crs += sz; sect_rest -= sz; fragm_crs += sz; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index f2db8a87d9c5..c2a261322515 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -73,8 +73,8 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, * Returns message data size or errno */ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf) + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf) { int dsz, sz, hsz, pos, res, cnt; @@ -92,14 +92,9 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, return -ENOMEM; skb_copy_to_linear_data(*buf, hdr, hsz); for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { - if (likely(usrmem)) - res = !copy_from_user((*buf)->data + pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); - else - skb_copy_to_linear_data_offset(*buf, pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); + skb_copy_to_linear_data_offset(*buf, pos, + msg_sect[cnt].iov_base, + msg_sect[cnt].iov_len); pos += msg_sect[cnt].iov_len; } if (likely(res)) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ba2a72beea68..511019a77c9c 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -722,6 +722,6 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m); void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf); + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf); #endif diff --git a/net/tipc/port.c b/net/tipc/port.c index 0651522c9435..f628c84a8f61 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -115,7 +115,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, msg_set_nameupper(hdr, seq->upper); msg_set_hdr_sz(hdr, MCAST_H_SIZE); res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !oport->user_port, &buf); + &buf); if (unlikely(!buf)) return res; @@ -234,7 +234,6 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->dispatcher = dispatcher; p_ptr->wakeup = wakeup; - p_ptr->user_port = NULL; k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); INIT_LIST_HEAD(&p_ptr->publications); INIT_LIST_HEAD(&p_ptr->port_list); @@ -271,7 +270,6 @@ int tipc_deleteport(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); } - kfree(p_ptr->user_port); spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); @@ -444,7 +442,7 @@ int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, int res; res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !p_ptr->user_port, &buf); + &buf); if (!buf) return res; @@ -927,7 +925,7 @@ static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_se int res; res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len, - MAX_MSG_SIZE, !sender->user_port, &buf); + MAX_MSG_SIZE, &buf); if (likely(buf)) tipc_port_recv_msg(buf); return res; diff --git a/net/tipc/port.h b/net/tipc/port.h index 7fd37c202ce6..4779f0a82234 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -46,18 +46,6 @@ #define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) -/** - * struct user_port - TIPC user port (used with native API) - * @usr_handle: user-specified field - * @ref: object reference to associated TIPC port - * - * - */ -struct user_port { - void *usr_handle; - u32 ref; -}; - /** * struct tipc_port - TIPC port structure * @usr_handle: pointer to additional user-defined information about port @@ -74,7 +62,6 @@ struct user_port { * @port_list: adjacent ports in TIPC's global list of ports * @dispatcher: ptr to routine which handles received messages * @wakeup: ptr to routine to call when port is no longer congested - * @user_port: ptr to user port associated with port (if any) * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: * @sent: # of non-empty messages sent by port @@ -101,7 +88,6 @@ struct tipc_port { struct list_head port_list; u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); void (*wakeup)(struct tipc_port *); - struct user_port *user_port; struct list_head wait_list; u32 waiting_pkts; u32 sent; -- cgit From 3c5db8e4eca36e4f312b49bba99f4c1f6ce0563a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:44 -0400 Subject: tipc: rename tipc_createport_raw to tipc_createport After the removal of the native API, there is now only one way to to create a TIPC port instance -- the function tipc_createport_raw(). We make it more readable by renaming it to tipc_createport(). Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/port.c | 4 ++-- net/tipc/port.h | 2 +- net/tipc/socket.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/port.c b/net/tipc/port.c index f628c84a8f61..84b2a574f161 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -202,11 +202,11 @@ exit: } /** - * tipc_createport_raw - create a generic TIPC port + * tipc_createport - create a generic TIPC port * * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -struct tipc_port *tipc_createport_raw(void *usr_handle, +struct tipc_port *tipc_createport(void *usr_handle, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance) diff --git a/net/tipc/port.h b/net/tipc/port.h index 4779f0a82234..45838826f2f8 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -106,7 +106,7 @@ struct tipc_port_list; /* * TIPC port manipulation routines */ -struct tipc_port *tipc_createport_raw(void *usr_handle, +struct tipc_port *tipc_createport(void *usr_handle, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9510fe8acf45..67f4e1fbf5a1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -195,8 +195,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; /* Allocate TIPC port for socket to use */ - tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); + tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch, + TIPC_LOW_IMPORTANCE); if (unlikely(!tp_ptr)) { sk_free(sk); return -ENOMEM; -- cgit From 28e5297281ab85d636aa814a9b65cfb99375d092 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:45 -0400 Subject: tipc: convert config_lock from spinlock to mutex As the configuration server is now running under process context, it's unnecessary for us to have a spinlock serializing the TIPC configuration process. Instead, we replace it with a mutex lock, which gives us more freedom. For instance, we can now call pre-emptable functions within the protected area. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/config.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/tipc/config.c b/net/tipc/config.c index 4887ae04f3a5..c301a9a592d8 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -42,7 +42,7 @@ #define REPLY_TRUNCATED "\n" -static DEFINE_SPINLOCK(config_lock); +static DEFINE_MUTEX(config_mutex); static struct tipc_server cfgsrv; static const void *req_tlv_area; /* request message TLV area */ @@ -181,18 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - - /* - * Must temporarily release configuration spinlock while switching into - * networking mode as it calls tipc_eth_media_start(), which may sleep. - * Releasing the lock is harmless as other locally-issued configuration - * commands won't occur until this one completes, and remotely-issued - * configuration commands can't be received until a local configuration - * command to enable the first bearer is received and processed. - */ - spin_unlock_bh(&config_lock); tipc_core_start_net(addr); - spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); } @@ -248,7 +237,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - spin_lock_bh(&config_lock); + mutex_lock(&config_mutex); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -377,7 +366,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - spin_unlock_bh(&config_lock); + mutex_unlock(&config_mutex); return rep_tlv_buf; } -- cgit From c0fee8aca7206264d5e3dcc4e60aaf86501f4ea1 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:46 -0400 Subject: tipc: save sock structure pointer instead of void pointer to tipc_port Directly save sock structure pointer instead of void pointer to avoid unnecessary cast conversions. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/port.c | 4 ++-- net/tipc/port.h | 6 +++--- net/tipc/socket.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/port.c b/net/tipc/port.c index 84b2a574f161..0bb185a3ed4a 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -206,7 +206,7 @@ exit: * * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -struct tipc_port *tipc_createport(void *usr_handle, +struct tipc_port *tipc_createport(struct sock *sk, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance) @@ -227,7 +227,7 @@ struct tipc_port *tipc_createport(void *usr_handle, return NULL; } - p_ptr->usr_handle = usr_handle; + p_ptr->sk = sk; p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->ref = ref; INIT_LIST_HEAD(&p_ptr->wait_list); diff --git a/net/tipc/port.h b/net/tipc/port.h index 45838826f2f8..241f529db942 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -48,7 +48,7 @@ /** * struct tipc_port - TIPC port structure - * @usr_handle: pointer to additional user-defined information about port + * @sk: pointer to socket handle * @lock: pointer to spinlock for controlling access to port * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established @@ -74,7 +74,7 @@ * @subscription: "node down" subscription used to terminate failed connections */ struct tipc_port { - void *usr_handle; + struct sock *sk; spinlock_t *lock; int connected; u32 conn_type; @@ -106,7 +106,7 @@ struct tipc_port_list; /* * TIPC port manipulation routines */ -struct tipc_port *tipc_createport(void *usr_handle, +struct tipc_port *tipc_createport(struct sock *sk, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 67f4e1fbf5a1..14ed54e961b6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1406,7 +1406,7 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf) */ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; u32 res; /* @@ -1437,7 +1437,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) */ static void wakeupdispatch(struct tipc_port *tport) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; sk->sk_write_space(sk); } -- cgit From ae8509c420122866344bde1241e31858d0aa2fbc Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 17 Jun 2013 10:54:47 -0400 Subject: tipc: cosmetic realignment of function arguments No runtime code changes here. Just a realign of the function arguments to start where the 1st one was, and fit as many args as can be put in an 80 char line. Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/bcast.c | 3 +-- net/tipc/bcast.h | 3 ++- net/tipc/discover.c | 7 +++---- net/tipc/eth_media.c | 2 +- net/tipc/link.c | 18 +++++++----------- net/tipc/msg.c | 4 ++-- net/tipc/msg.h | 4 ++-- net/tipc/name_table.c | 10 +++++----- net/tipc/name_table.h | 11 ++++++----- net/tipc/node_subscr.c | 2 +- net/tipc/port.c | 7 ++++--- net/tipc/port.h | 10 ++++++---- net/tipc/socket.c | 12 ++++++------ net/tipc/subscr.c | 14 ++++---------- net/tipc/subscr.h | 13 ++++--------- 15 files changed, 54 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e5f3da507823..716de1ac6cb5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -578,8 +578,7 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, - struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a93306557e00..6ee587b469fd 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -75,7 +75,8 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); /** * tipc_nmap_equal - test for equality of node maps */ -static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b) +static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b) { return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index eedff58d0387..ecc758c6eacf 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -70,8 +70,7 @@ struct tipc_link_req { * @dest_domain: network domain of node(s) which should respond to message * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, - u32 dest_domain, +static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, struct tipc_bearer *b_ptr) { struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); @@ -346,8 +345,8 @@ exit: * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, - struct tipc_media_addr *dest, u32 dest_domain) +int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, + u32 dest_domain) { struct tipc_link_req *req; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index fc60bea63169..c1aa37fdca2f 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -62,7 +62,7 @@ static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv); + void *dv); /* * Network device notifier info */ diff --git a/net/tipc/link.c b/net/tipc/link.c index d34429d03c16..b852c94a784e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -771,8 +771,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) * link_bundle_buf(): Append contents of a buffer to * the tail of an existing one. */ -static int link_bundle_buf(struct tipc_link *l_ptr, - struct sk_buff *bundler, +static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, struct sk_buff *buf) { struct tipc_msg *bundler_msg = buf_msg(bundler); @@ -1064,8 +1063,7 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, */ int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, - const u32 num_sect, - unsigned int total_len, + const u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_msg *hdr = &sender->phdr; @@ -1155,8 +1153,7 @@ exit: */ static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, - u32 num_sect, - unsigned int total_len, + u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_link *l_ptr; @@ -1408,7 +1405,7 @@ static void link_reset_all(unsigned long addr) } static void link_retransmit_failure(struct tipc_link *l_ptr, - struct sk_buff *buf) + struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -1863,8 +1860,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, * Send protocol message to the other endpoint. */ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, - int probe_msg, u32 gap, u32 tolerance, - u32 priority, u32 ack_mtu) + int probe_msg, u32 gap, u32 tolerance, + u32 priority, u32 ack_mtu) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -2107,8 +2104,7 @@ exit: * another bearer. Owner node is locked. */ static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, - struct tipc_msg *msg, + struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, u32 selector) { struct tipc_link *tunnel; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index c2a261322515..ced60e2fc4f7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -51,8 +51,8 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m) } -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode) +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode) { memset(m, 0, hsize); msg_set_version(m); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 511019a77c9c..5e4ccf5c27df 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,8 +719,8 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) } u32 tipc_msg_tot_importance(struct tipc_msg *m); -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode); +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, unsigned int total_len, int max_size, struct sk_buff **buf); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 24b167914311..09dcd54b04e1 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -440,7 +440,7 @@ found: * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) + struct tipc_subscription *s) { struct sub_seq *sseq = nseq->sseqs; @@ -662,7 +662,7 @@ exit: * tipc_nametbl_publish - add name publication to network name tables */ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) + u32 scope, u32 port_ref, u32 key) { struct publication *publ; @@ -753,7 +753,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) * subseq_list - print specified sub-sequence contents into the given buffer */ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, - u32 index) + u32 index) { char portIdStr[27]; const char *scope_str[] = {"", " zone", " cluster", " node"}; @@ -792,7 +792,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, * nameseq_list - print specified name sequence contents into the given buffer */ static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, - u32 type, u32 lowbound, u32 upbound, u32 index) + u32 type, u32 lowbound, u32 upbound, u32 index) { struct sub_seq *sseq; char typearea[11]; @@ -849,7 +849,7 @@ static int nametbl_header(char *buf, int len, u32 depth) * nametbl_list - print specified name table contents into the given buffer */ static int nametbl_list(char *buf, int len, u32 depth_info, - u32 type, u32 lowbound, u32 upbound) + u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; struct name_seq *seq; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 71cb4dc712df..f02f48b9a216 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -87,14 +87,15 @@ extern rwlock_t tipc_nametbl_lock; struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); + struct tipc_port_list *dports); struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); + u32 scope, u32 port_ref, u32 key); int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key); + u32 scope, u32 node, u32 ref, + u32 key); +struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, + u32 ref, u32 key); void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(void); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 5e34b015da45..8a7384c04add 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -42,7 +42,7 @@ * tipc_nodesub_subscribe - create "node down" subscription for specified node */ void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, - void *usr_handle, net_ev_handler handle_down) + void *usr_handle, net_ev_handler handle_down) { if (in_own_node(addr)) { node_sub->node = NULL; diff --git a/net/tipc/port.c b/net/tipc/port.c index 0bb185a3ed4a..b3ed2fcab4fb 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -207,9 +207,10 @@ exit: * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance) + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance) { struct tipc_port *p_ptr; struct tipc_msg *msg; diff --git a/net/tipc/port.h b/net/tipc/port.h index 241f529db942..5a7026b9c345 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -107,8 +107,10 @@ struct tipc_port_list; * TIPC port manipulation routines */ struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), const u32 importance); + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance); int tipc_reject_msg(struct sk_buff *buf, u32 err); @@ -126,9 +128,9 @@ int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); int tipc_publish(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_withdraw(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 14ed54e961b6..ce8249c76827 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -278,7 +278,7 @@ void tipc_sock_release_local(struct socket *sock) */ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags) + int flags) { struct sock *sk = sock->sk; int ret; @@ -889,7 +889,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) * Returns 0 if successful, otherwise errno */ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, - struct tipc_port *tport) + struct tipc_port *tport) { u32 anc_data[3]; u32 err; @@ -1736,8 +1736,8 @@ restart: * * Returns 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, unsigned int ol) +static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + unsigned int ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); @@ -1795,8 +1795,8 @@ static int setsockopt(struct socket *sock, * * Returns 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, int __user *ol) +static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + int __user *ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f6be92a6973a..d38bb45d82e9 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -117,10 +117,8 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper) - { if (found_lower < sub->seq.lower) found_lower = sub->seq.lower; @@ -136,13 +134,9 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, * * Protected by nameseq.lock in name_table.c */ -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must) +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must) { if (!tipc_subscr_overlap(sub, found_lower, found_upper)) return; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 43e6d6332a02..393e417bee3f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -67,17 +67,12 @@ struct tipc_subscription { struct tipc_event evt; }; -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must_report); +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must); int tipc_subscr_start(void); -- cgit From 796c75d0d3ef13cd1df00779abb8b27edb630504 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:48 -0400 Subject: tipc: enhance priority of link protocol packet pfifo_fast is set as default traffic class queueing discipline. This queue has three so called "bands". Within each band, FIFO rules apply. However, as long as there are packets waiting in band 0, band 1 won't be processed. Now all kind of TIPC type packet priorities are never set, that is, their priorities are 0, so they are mapped to band 1 of pfifo_fast qdisc. But, especially during link congestion, if link protocol packet can be sent out as earlier as possible than other type of packets so that protocol packet can arrive at peer endpoint in time, the peer will timely reset its link timeout timer to keep the link alive. So enhancing the priority of link protocol packets can meet the specific demand to avoid unnecessary link reset due to a transient link congestion. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b852c94a784e..b6de1aa059f4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -41,6 +41,8 @@ #include "discover.h" #include "config.h" +#include + /* * Error message prefixes */ @@ -1947,6 +1949,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, return; skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); + buf->priority = TC_PRIO_CONTROL; /* Defer message if bearer is already blocked */ if (tipc_bearer_blocked(l_ptr->b_ptr)) { -- cgit From 7410f967ba9bdc14b1e336e5d235929ed878cbfc Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:49 -0400 Subject: tipc: make tipc_link_send_sections_fast exit earlier Once message build request function returns invalid code, the process of sending message cannot continue. So in case of message build failure, tipc_link_send_sections_fast() should return immediately. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b6de1aa059f4..b6ffa9fab244 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1082,6 +1082,9 @@ again: */ res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, sender->max_pkt, &buf); + /* Exit if build request was invalid */ + if (unlikely(res < 0)) + return res; read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); @@ -1098,10 +1101,6 @@ exit: return res; } - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - goto exit; - /* Exit if link (or bearer) is congested */ if (link_congested(l_ptr) || tipc_bearer_blocked(l_ptr->b_ptr)) { -- cgit From 126c0524648631a0f6fba4d016586b236209fe6f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:50 -0400 Subject: tipc: fix wrong return value for link_send_sections_long routine When skb buffer cannot be allocated in link_send_sections_long(), -ENOMEM error code instead of -EFAULT should be returned to its caller. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b6ffa9fab244..0cc3d9015c5d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1168,6 +1168,7 @@ static int link_send_sections_long(struct tipc_port *sender, const unchar *sect_crs; int curr_sect; u32 fragm_no; + int res = 0; again: fragm_no = 1; @@ -1215,12 +1216,13 @@ again: sz = fragm_rest; if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + res = -EFAULT; error: for (; buf_chain; buf_chain = buf) { buf = buf_chain->next; kfree_skb(buf_chain); } - return -EFAULT; + return res; } sect_crs += sz; sect_rest -= sz; @@ -1241,8 +1243,10 @@ error: msg_set_fragm_no(&fragm_hdr, ++fragm_no); prev = buf; buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) + if (!buf) { + res = -ENOMEM; goto error; + } buf->next = NULL; prev->next = buf; -- cgit From 2537af9dcabbdd6c93c041a955d3a9ae42c0c008 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:51 -0400 Subject: tipc: remove dev_base_lock use from enable_bearer Convert enable_bearer() to RCU locking with dev_get_by_name(). Based on a similar changeset in commit 840a185d ["aoe: remove dev_base_lock use from aoecmd_cfg_pkts()"] -- quoting that: "dev_base_lock is the legacy way to lock the device list, and is planned to disappear. (writers hold RTNL, readers hold RCU lock)" Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/eth_media.c | 13 ++----------- net/tipc/ib_media.c | 13 ++----------- 2 files changed, 4 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index c1aa37fdca2f..40ea40cf6204 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -162,8 +162,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -178,15 +177,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index baa9df4327d9..ad2e1ec4117e 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -155,8 +155,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -171,15 +170,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; -- cgit From 939cfa75a0cea97aa60cb88e3722baefdceb4e72 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 17 Jun 2013 11:40:04 +0200 Subject: net: sctp: get rid of t_new macro for kzalloc t_new rather obfuscates things where everyone else is using actual function names instead of that macro, so replace it with kzalloc, which is the function t_new wraps. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- net/sctp/bind_addr.c | 2 +- net/sctp/endpointola.c | 3 ++- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sctp/transport.c | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 756025c98e85..bf6e6bd553c0 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -355,7 +355,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, { struct sctp_association *asoc; - asoc = t_new(struct sctp_association, gfp); + asoc = kzalloc(sizeof(*asoc), gfp); if (!asoc) goto fail; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 41145fe31813..64977ea0f9c5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -162,7 +162,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, struct sctp_sockaddr_entry *addr; /* Add the address to the bind address list. */ - addr = t_new(struct sctp_sockaddr_entry, gfp); + addr = kzalloc(sizeof(*addr), gfp); if (!addr) return -ENOMEM; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 5fbd7bc6bb11..a8b26741c0af 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -192,9 +192,10 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) struct sctp_endpoint *ep; /* Build a local endpoint. */ - ep = t_new(struct sctp_endpoint, gfp); + ep = kzalloc(sizeof(*ep), gfp); if (!ep) goto fail; + if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fffc7b62a9a8..4f3e13b31fcc 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -402,7 +402,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index eaee00c61139..fad7d1b67be5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,7 +153,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 098f1d5f769e..5d3c71bbd197 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -116,7 +116,7 @@ struct sctp_transport *sctp_transport_new(struct net *net, { struct sctp_transport *transport; - transport = t_new(struct sctp_transport, gfp); + transport = kzalloc(sizeof(*transport), gfp); if (!transport) goto fail; -- cgit From dda9192851dcf904b4d1095480834f2a4f814ae3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 17 Jun 2013 11:40:05 +0200 Subject: net: sctp: remove SCTP_STATIC macro SCTP_STATIC is just another define for the static keyword. It's use is inconsistent in the SCTP code anyway and it was introduced in the initial implementation of SCTP in 2.5. We have a regression suite in lksctp-tools, but this is for user space only, so noone makes use of this macro anymore. The kernel test suite for 2.5 is incompatible with the current SCTP code anyway. So simply Remove it, to be more consistent with the rest of the kernel code. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/chunk.c | 2 +- net/sctp/input.c | 4 +-- net/sctp/ipv6.c | 4 +-- net/sctp/protocol.c | 4 +-- net/sctp/sm_make_chunk.c | 10 +++---- net/sctp/socket.c | 78 ++++++++++++++++++++++-------------------------- net/sctp/tsnmap.c | 10 +++---- net/sctp/ulpevent.c | 10 +++---- 8 files changed, 57 insertions(+), 65 deletions(-) (limited to 'net') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 69ce21e3716f..7135fc0c087a 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -66,7 +66,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) } /* Allocate and initialize datamsg. */ -SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) +static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) { struct sctp_datamsg *msg; msg = kmalloc(sizeof(struct sctp_datamsg), gfp); diff --git a/net/sctp/input.c b/net/sctp/input.c index 6533d81a638d..4cfc74699a3f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -903,11 +903,11 @@ hit: } /* Look up an association. BH-safe. */ -SCTP_STATIC +static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp) { struct sctp_association *asoc; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 4f3e13b31fcc..adeaa0e64f52 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -145,8 +145,8 @@ static struct notifier_block sctp_inet6addr_notifier = { }; /* ICMP error handler. */ -SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; struct sock *sk; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fad7d1b67be5..57b568c38ef6 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1312,7 +1312,7 @@ static struct pernet_operations sctp_net_ops = { }; /* Initialize the universe into something sensible. */ -SCTP_STATIC __init int sctp_init(void) +static __init int sctp_init(void) { int i; int status = -EINVAL; @@ -1499,7 +1499,7 @@ err_chunk_cachep: } /* Exit handler for the SCTP protocol. */ -SCTP_STATIC __exit void sctp_exit(void) +static __exit void sctp_exit(void) { /* BUG. This should probably do something useful like clean * up all the remaining associations and all that memory. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index cf579e71cff0..fc8548743ed5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -68,9 +68,8 @@ #include #include -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen); +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen); static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, @@ -1353,9 +1352,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk) /* Create a new chunk, setting the type and flags headers from the * arguments, reserving enough space for a 'paylen' byte payload. */ -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen) +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen) { struct sctp_chunk *retval; sctp_chunkhdr_t *chunk_hdr; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 510dc79a32a1..75fe92ac2e9c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -84,11 +84,6 @@ #include #include -/* WARNING: Please do not remove the SCTP_STATIC attribute to - * any of the functions below as they are used to export functions - * used by a project regression testsuite. - */ - /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); @@ -279,7 +274,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, * sockaddr_in6 [RFC 2553]), * addr_len - the size of the address structure. */ -SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) +static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) { int retval = 0; @@ -333,7 +328,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, } /* Bind a local address either to an endpoint or to an association. */ -SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) +static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) { struct net *net = sock_net(sk); struct sctp_sock *sp = sctp_sk(sk); @@ -964,9 +959,9 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size, int op) +static int sctp_setsockopt_bindx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size, int op) { struct sockaddr *kaddrs; int err; @@ -1312,7 +1307,7 @@ out_free: * * Returns >=0 if ok, <0 errno code on error. */ -SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, +static int __sctp_setsockopt_connectx(struct sock* sk, struct sockaddr __user *addrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1350,9 +1345,9 @@ SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ -SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx_old(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); } @@ -1363,9 +1358,9 @@ SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, * indication to the call. Error is always negative and association id is * always positive. */ -SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { sctp_assoc_t assoc_id = 0; int err = 0; @@ -1386,9 +1381,9 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, * addrs_num structure member. That way we can re-use the existing * code. */ -SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, - char __user *optval, - int __user *optlen) +static int sctp_getsockopt_connectx3(struct sock* sk, int len, + char __user *optval, + int __user *optlen) { struct sctp_getaddrs_old param; sctp_assoc_t assoc_id = 0; @@ -1464,7 +1459,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, * shutdown phase does not finish during this period, close() will * return but the graceful shutdown phase continues in the system. */ -SCTP_STATIC void sctp_close(struct sock *sk, long timeout) +static void sctp_close(struct sock *sk, long timeout) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -1573,10 +1568,10 @@ static int sctp_error(struct sock *sk, int flags, int err) */ /* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); +static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -2034,9 +2029,9 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) */ static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -3565,8 +3560,8 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, * optval - the buffer to store the value of the option. * optlen - the size of the buffer. */ -SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +static int sctp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) { int retval = 0; @@ -3725,8 +3720,8 @@ out_nounlock: * * len: the size of the address. */ -SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) +static int sctp_connect(struct sock *sk, struct sockaddr *addr, + int addr_len) { int err = 0; struct sctp_af *af; @@ -3752,7 +3747,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* FIXME: Write comments. */ -SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) +static int sctp_disconnect(struct sock *sk, int flags) { return -EOPNOTSUPP; /* STUB */ } @@ -3764,7 +3759,7 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) +static struct sock *sctp_accept(struct sock *sk, int flags, int *err) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -3817,7 +3812,7 @@ out: } /* The SCTP ioctl handler. */ -SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) { int rc = -ENOTCONN; @@ -3859,7 +3854,7 @@ out: * initialized the SCTP-specific portion of the sock. * The sock structure should already be zero-filled memory. */ -SCTP_STATIC int sctp_init_sock(struct sock *sk) +static int sctp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -3993,7 +3988,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) } /* Cleanup any SCTP per socket resources. */ -SCTP_STATIC void sctp_destroy_sock(struct sock *sk) +static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -4028,7 +4023,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) * Disables further send and receive operations * and initiates the SCTP shutdown sequence. */ -SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) +static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -5700,8 +5695,8 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, return 0; } -SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int sctp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { int retval = 0; int len; @@ -6046,7 +6041,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) /* * Move a socket to LISTENING state. */ -SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) +static int sctp_listen_start(struct sock *sk, int backlog) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; @@ -6333,8 +6328,7 @@ static int sctp_autobind(struct sock *sk) * msg_control * points here */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, - sctp_cmsgs_t *cmsgs) +static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) { struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 396c45174e5b..b46019568a86 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -161,8 +161,8 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, /* Initialize a Gap Ack Block iterator from memory being provided. */ -SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter) +static void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter) { /* Only start looking one past the Cumulative TSN Ack Point. */ iter->start = map->cumulative_tsn_ack_point + 1; @@ -171,9 +171,9 @@ SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, /* Get the next Gap Ack Blocks. Returns 0 if there was not another block * to get. */ -SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter, - __u16 *start, __u16 *end) +static int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter, + __u16 *start, __u16 *end) { int ended = 0; __u16 start_ = 0, end_ = 0, offset; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 10c018a5b9fe..44a45dbee4df 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -57,9 +57,9 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); /* Initialize an ULP event from an given skb. */ -SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, - int msg_flags, - unsigned int len) +static void sctp_ulpevent_init(struct sctp_ulpevent *event, + int msg_flags, + unsigned int len) { memset(event, 0, sizeof(struct sctp_ulpevent)); event->msg_flags = msg_flags; @@ -67,8 +67,8 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, } /* Create a new sctp_ulpevent. */ -SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, - gfp_t gfp) +static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sk_buff *skb; -- cgit From a1193be83b4bb173228f04870afd6a4174b19130 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 14 Jun 2013 14:15:19 +0200 Subject: nl80211: use attributes to parse beacons only the attributes are required and not the whole netlink info, as the function accesses the attributes only anyway. This makes it easier to parse nested beacon IEs later. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 53 ++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e4028197b75d..1c4f7daea6c7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2882,61 +2882,58 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_parse_beacon(struct genl_info *info, +static int nl80211_parse_beacon(struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) { bool haveinfo = false; - if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_PROBE_RESP]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_ASSOC_RESP])) + if (!is_valid_ie_attr(attrs[NL80211_ATTR_BEACON_TAIL]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE_PROBE_RESP]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE_ASSOC_RESP])) return -EINVAL; memset(bcn, 0, sizeof(*bcn)); - if (info->attrs[NL80211_ATTR_BEACON_HEAD]) { - bcn->head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]); - bcn->head_len = nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]); + if (attrs[NL80211_ATTR_BEACON_HEAD]) { + bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); + bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); if (!bcn->head_len) return -EINVAL; haveinfo = true; } - if (info->attrs[NL80211_ATTR_BEACON_TAIL]) { - bcn->tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]); - bcn->tail_len = - nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]); + if (attrs[NL80211_ATTR_BEACON_TAIL]) { + bcn->tail = nla_data(attrs[NL80211_ATTR_BEACON_TAIL]); + bcn->tail_len = nla_len(attrs[NL80211_ATTR_BEACON_TAIL]); haveinfo = true; } if (!haveinfo) return -EINVAL; - if (info->attrs[NL80211_ATTR_IE]) { - bcn->beacon_ies = nla_data(info->attrs[NL80211_ATTR_IE]); - bcn->beacon_ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); + if (attrs[NL80211_ATTR_IE]) { + bcn->beacon_ies = nla_data(attrs[NL80211_ATTR_IE]); + bcn->beacon_ies_len = nla_len(attrs[NL80211_ATTR_IE]); } - if (info->attrs[NL80211_ATTR_IE_PROBE_RESP]) { + if (attrs[NL80211_ATTR_IE_PROBE_RESP]) { bcn->proberesp_ies = - nla_data(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + nla_data(attrs[NL80211_ATTR_IE_PROBE_RESP]); bcn->proberesp_ies_len = - nla_len(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + nla_len(attrs[NL80211_ATTR_IE_PROBE_RESP]); } - if (info->attrs[NL80211_ATTR_IE_ASSOC_RESP]) { + if (attrs[NL80211_ATTR_IE_ASSOC_RESP]) { bcn->assocresp_ies = - nla_data(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + nla_data(attrs[NL80211_ATTR_IE_ASSOC_RESP]); bcn->assocresp_ies_len = - nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + nla_len(attrs[NL80211_ATTR_IE_ASSOC_RESP]); } - if (info->attrs[NL80211_ATTR_PROBE_RESP]) { - bcn->probe_resp = - nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]); - bcn->probe_resp_len = - nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]); + if (attrs[NL80211_ATTR_PROBE_RESP]) { + bcn->probe_resp = nla_data(attrs[NL80211_ATTR_PROBE_RESP]); + bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]); } return 0; @@ -3015,7 +3012,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; - err = nl80211_parse_beacon(info, ¶ms.beacon); + err = nl80211_parse_beacon(info->attrs, ¶ms.beacon); if (err) return err; @@ -3167,7 +3164,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!wdev->beacon_interval) return -EINVAL; - err = nl80211_parse_beacon(info, ¶ms); + err = nl80211_parse_beacon(info->attrs, ¶ms); if (err) return err; -- cgit From f81a9dedaff434604c7fc3d9c299d277b76db0a8 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 13 Jun 2013 15:54:41 -0700 Subject: mac80211: update mesh beacon on workqueue Instead of updating the mesh beacon immediately when requested (which would require the sdata_lock()), defer it to the mac80211 workqueue. Fixes yet another deadlock on calling sta_info_flush() with the sdata_lock() held from ieee80211_stop_mesh(). We could just drop the sdata_lock() around the mesh_sta_cleanup() call, but this path is also taken from several non-locked error paths. Signed-off-by: Thomas Pedersen [fix comment position] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh.c | 53 +++++++++++++++++++++++++++++++++++----------- net/mac80211/mesh.h | 2 ++ 3 files changed, 44 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a4dfb0be53d7..194be3de16dd 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -544,6 +544,7 @@ struct ieee80211_if_mesh { struct timer_list mesh_path_root_timer; unsigned long wrkq_flags; + unsigned long mbss_changed; u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; size_t mesh_id_len; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6c33af482df4..d5dea94216e4 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,11 +161,8 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->plink_timer); } - if (changed) { - sdata_lock(sdata); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); - } } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -719,14 +716,18 @@ ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed) { - if (sdata->vif.bss_conf.enable_beacon && - (changed & (BSS_CHANGED_BEACON | - BSS_CHANGED_HT | - BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT))) - if (ieee80211_mesh_rebuild_beacon(sdata)) - return; - ieee80211_bss_info_change_notify(sdata, changed); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + unsigned long bits = changed; + u32 bit; + + if (!bits) + return; + + /* if we race with running work, worst case this work becomes a noop */ + for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE) + set_bit(bit, &ifmsh->mbss_changed); + set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) @@ -799,6 +800,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); + /* clear any mesh work (for next join) we may have accrued */ + ifmsh->wrkq_flags = 0; + ifmsh->mbss_changed = 0; + local->fif_other_bss--; atomic_dec(&local->iff_allmultis); ieee80211_configure_filter(local); @@ -965,6 +970,28 @@ out: sdata_unlock(sdata); } +static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u32 bit, changed = 0; + + for_each_set_bit(bit, &ifmsh->mbss_changed, + sizeof(changed) * BITS_PER_BYTE) { + clear_bit(bit, &ifmsh->mbss_changed); + changed |= BIT(bit); + } + + if (sdata->vif.bss_conf.enable_beacon && + (changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT))) + if (ieee80211_mesh_rebuild_beacon(sdata)) + return; + + ieee80211_bss_info_change_notify(sdata, changed); +} + void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; @@ -995,6 +1022,8 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags)) mesh_sync_adjust_tbtt(sdata); + if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags)) + mesh_bss_info_changed(sdata); out: sdata_unlock(sdata); } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 01a28bca6e9b..2bc7fd2f787d 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -58,6 +58,7 @@ enum mesh_path_flags { * @MESH_WORK_ROOT: the mesh root station needs to send a frame * @MESH_WORK_DRIFT_ADJUST: time to compensate for clock drift relative to other * mesh nodes + * @MESH_WORK_MBSS_CHANGED: rebuild beacon and notify driver of BSS changes */ enum mesh_deferred_task_flags { MESH_WORK_HOUSEKEEPING, @@ -65,6 +66,7 @@ enum mesh_deferred_task_flags { MESH_WORK_GROW_MPP_TABLE, MESH_WORK_ROOT, MESH_WORK_DRIFT_ADJUST, + MESH_WORK_MBSS_CHANGED, }; /** -- cgit From 2f301ab29e4656af824592363039d8f6bd5a9f68 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 16 May 2013 13:00:28 +0200 Subject: nl80211/cfg80211: add 5 and 10 MHz defines and wiphy flag Add defines for 5 and 10 MHz channel width and fix channel handling functions accordingly. Also check for and report the WIPHY_FLAG_SUPPORTS_5_10_MHZ capability. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer [fix spelling in comment] Signed-off-by: Johannes Berg --- net/wireless/chan.c | 57 +++++++++++++++++++++++++++++++++++++++++++------- net/wireless/nl80211.c | 21 +++++++++++++++---- 2 files changed, 66 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index fd556ac05fdb..50f6195c8b70 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -54,6 +54,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: if (chandef->center_freq1 != control_freq) @@ -152,6 +154,12 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) int width; switch (c->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: width = 20; @@ -194,6 +202,16 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, if (c1->width == c2->width) return NULL; + /* + * can't be compatible if one of them is 5 or 10 MHz, + * but they don't have the same width. + */ + if (c1->width == NL80211_CHAN_WIDTH_5 || + c1->width == NL80211_CHAN_WIDTH_10 || + c2->width == NL80211_CHAN_WIDTH_5 || + c2->width == NL80211_CHAN_WIDTH_10) + return NULL; + if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || c1->width == NL80211_CHAN_WIDTH_20) return c2; @@ -264,11 +282,17 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, u32 bandwidth) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return -EINVAL; @@ -310,11 +334,17 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 prohibited_flags) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return false; @@ -349,6 +379,12 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; @@ -405,6 +441,11 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + /* 5 and 10 MHz are only defined for the OFDM PHY */ + if (width < 20) + prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + + if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1, width, prohibited_flags)) return false; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1c4f7daea6c7..4ab1ffa9df11 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1188,6 +1188,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + goto nla_put_failure; (*split_start)++; if (split) @@ -1731,6 +1734,11 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, IEEE80211_CHAN_DISABLED)) return -EINVAL; + if ((chandef->width == NL80211_CHAN_WIDTH_5 || + chandef->width == NL80211_CHAN_WIDTH_10) && + !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + return -EINVAL; + return 0; } @@ -6280,11 +6288,16 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) return -EINVAL; - if (ibss.chandef.width > NL80211_CHAN_WIDTH_40) - return -EINVAL; - if (ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && - !(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + switch (ibss.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + break; + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS) + break; + default: return -EINVAL; + } ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; -- cgit From 0418a445838749c51cf1e31a9c7ace6685ae87cd Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 16 May 2013 13:00:31 +0200 Subject: mac80211: fix various components for the new 5 and 10 MHz widths This is a collection of minor fixes: * don't allow HT IEs in IBSS for 5/10 MHz * don't allow HT IEs in Mesh for 5/10 MHz * don't downgrade from/to 5 and 10 MHz channels * don't try HT rates for 5 and 10 MHz channels when selecting rates Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 2 ++ net/mac80211/mesh.c | 4 +++- net/mac80211/mesh_plink.c | 8 +++++++- net/mac80211/mlme.c | 12 ++++++++++++ net/mac80211/rate.c | 8 +++++++- 5 files changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index caa4b4f7f6e4..3789c85282a0 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -176,6 +176,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* add HT capability and information IEs */ if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + chandef.width != NL80211_CHAN_WIDTH_5 && + chandef.width != NL80211_CHAN_WIDTH_10 && sband->ht_cap.ht_supported) { pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, sband->ht_cap.cap); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d5dea94216e4..447f41bbe744 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -416,7 +416,9 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[band]; if (!sband->ht_cap.ht_supported || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 09bebed99416..02c05fa15c20 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -154,8 +154,14 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; - if (sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + switch (sdata->vif.bss_conf.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: return 0; + default: + break; + } rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 87f2d4df31f8..e0939eb79060 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -190,6 +190,12 @@ static u32 chandef_downgrade(struct cfg80211_chan_def *c) c->width = NL80211_CHAN_WIDTH_20_NOHT; ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; break; + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + WARN_ON_ONCE(1); + /* keep c->width */ + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; } WARN_ON_ONCE(!cfg80211_chandef_valid(c)); @@ -3771,6 +3777,12 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, */ ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); + + /* don't downgrade for 5 and 10 MHz channels, though. */ + if (chandef.width == NL80211_CHAN_WIDTH_5 || + chandef.width == NL80211_CHAN_WIDTH_10) + return ret; + while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= chandef_downgrade(&chandef); ret = ieee80211_vif_use_channel(sdata, &chandef, diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d3f414fe67e0..dbbcd57b0fc6 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -397,8 +397,14 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, return; /* if HT BSS, and we handle a data frame, also try HT rates */ - if (chan_width == NL80211_CHAN_WIDTH_20_NOHT) + switch (chan_width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: return; + default: + break; + } alt_rate.idx = 0; /* keep protection flags */ -- cgit From 3aede78aad2a7e39a81b4b0caa771d40254a6787 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 16 May 2013 13:00:36 +0200 Subject: mac80211: change IBSS channel state to chandef This should make some parts cleaner and is also required for handling 5/10 MHz properly. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 22 +++++++++++----------- net/mac80211/ieee80211_i.h | 3 +-- 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 3789c85282a0..eaacfd27061c 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -81,7 +81,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - cfg80211_chandef_create(&chandef, chan, ifibss->channel_type); + chandef = ifibss->chandef; if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; @@ -516,7 +516,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, set_sta_flag(sta, WLAN_STA_WME); if (sta && elems->ht_operation && elems->ht_cap_elem && - sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) { + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_5 && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_10) { /* we both use HT */ struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; @@ -531,8 +533,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * fall back to HT20 if we don't use or use * the other extension channel */ - if (cfg80211_get_chandef_type(&chandef) != - sdata->u.ibss.channel_type) + if (chandef.center_freq1 != + sdata->u.ibss.chandef.center_freq1) htcap_ie.cap_info &= cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40); @@ -571,7 +573,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* different channel */ if (sdata->u.ibss.fixed_channel && - sdata->u.ibss.channel != cbss->channel) + sdata->u.ibss.chandef.chan != cbss->channel) goto put_bss; /* different SSID */ @@ -761,7 +763,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) sdata->drop_unencrypted = 0; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, - ifibss->channel, ifibss->basic_rates, + ifibss->chandef.chan, ifibss->basic_rates, capability, 0, true); } @@ -793,7 +795,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) if (ifibss->fixed_bssid) bssid = ifibss->bssid; if (ifibss->fixed_channel) - chan = ifibss->channel; + chan = ifibss->chandef.chan; if (!is_zero_ether_addr(ifibss->bssid)) bssid = ifibss->bssid; cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid, @@ -1060,9 +1062,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.beacon_int = params->beacon_interval; - sdata->u.ibss.channel = params->chandef.chan; - sdata->u.ibss.channel_type = - cfg80211_get_chandef_type(¶ms->chandef); + sdata->u.ibss.chandef = params->chandef; sdata->u.ibss.fixed_channel = params->channel_fixed; if (params->ie) { @@ -1121,7 +1121,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; - cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel, + cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, ifibss->ssid_len, WLAN_CAPABILITY_IBSS | WLAN_CAPABILITY_PRIVACY, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 194be3de16dd..1bfc3955005c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -504,8 +504,7 @@ struct ieee80211_if_ibss { u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len, ie_len; u8 *ie; - struct ieee80211_channel *channel; - enum nl80211_channel_type channel_type; + struct cfg80211_chan_def chandef; unsigned long ibss_join_req; /* probe response/beacon for IBSS */ -- cgit From 52874a5e3917dde3b081521b014d6e4b226aacff Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 18 Jun 2013 14:20:40 +0200 Subject: Revert "mac80211: in IBSS use the Auth frame to trigger STA reinsertion" This reverts commit 6d810f10325522cfcf498dc6d64b9f96e1f5153f In this way an IBSS station will not use the AUTH messages to trigger a state reinitialisation anymore. The behaviour was racy and was not working properly. It has been introduced to help wpa_supplicant to support IBSS/RSN, however all the logic is now getting moved into wpa_s itself which will also be in charge of handling the AUTH messages thanks to the mgmt frame registration. If userspace does not register for receiving AUTH frames then mac80211 will still reply by itself. At the same time, the auth frame registration counter can be removed since it is not needed anymore. Signed-off-by: Antonio Quartulli [remove unused variable] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 11 ----------- net/mac80211/ibss.c | 41 +++++++---------------------------------- net/mac80211/ieee80211_i.h | 1 - 3 files changed, 7 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 64cf294c2b96..18ba7ed3ef0b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2924,19 +2924,8 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, u16 frame_type, bool reg) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); switch (frame_type) { - case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH: - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - - if (reg) - ifibss->auth_frame_registrations++; - else - ifibss->auth_frame_registrations--; - } - break; case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ: if (reg) local->probe_req_reg++; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index eaacfd27061c..ea7b9c2c7e66 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -300,8 +300,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, tsf, false); } -static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, - bool auth) +static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) __acquires(RCU) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -323,20 +322,12 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, /* If it fails, maybe we raced another insertion? */ if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); - if (auth && !sdata->u.ibss.auth_frame_registrations) { - ibss_dbg(sdata, - "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, addr, sdata->u.ibss.bssid); - ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, 0, NULL, 0, - addr, sdata->u.ibss.bssid, NULL, 0, 0, 0); - } return sta; } static struct sta_info * -ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, const u8 *addr, - u32 supp_rates, bool auth) +ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, + const u8 *addr, u32 supp_rates) __acquires(RCU) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; @@ -387,7 +378,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->sta.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband); - return ieee80211_ibss_finish_sta(sta, auth); + return ieee80211_ibss_finish_sta(sta); } static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, @@ -409,8 +400,6 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, size_t len) { u16 auth_alg, auth_transaction; - struct sta_info *sta; - u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; sdata_assert_lock(sdata); @@ -427,22 +416,6 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - sta_info_destroy_addr(sdata, mgmt->sa); - sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); - rcu_read_unlock(); - - /* - * if we have any problem in allocating the new station, we reply with a - * DEAUTH frame to tell the other end that we had a problem - */ - if (!sta) { - ieee80211_send_deauth_disassoc(sdata, sdata->u.ibss.bssid, - IEEE80211_STYPE_DEAUTH, - WLAN_REASON_UNSPECIFIED, true, - deauth_frame_buf); - return; - } - /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. @@ -508,7 +481,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } else { rcu_read_unlock(); sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, - mgmt->sa, supp_rates, true); + mgmt->sa, supp_rates); } } @@ -614,7 +587,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, - supp_rates, true); + supp_rates); rcu_read_unlock(); } @@ -986,7 +959,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) list_del(&sta->list); spin_unlock_bh(&ifibss->incomplete_lock); - ieee80211_ibss_finish_sta(sta, true); + ieee80211_ibss_finish_sta(sta); rcu_read_unlock(); spin_lock_bh(&ifibss->incomplete_lock); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1bfc3955005c..00d71e9a8fb8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -498,7 +498,6 @@ struct ieee80211_if_ibss { bool privacy; bool control_port; - unsigned int auth_frame_registrations; u8 bssid[ETH_ALEN] __aligned(2); u8 ssid[IEEE80211_MAX_SSID_LEN]; -- cgit From 86e8cf98de3e74bbfb0003501e0004bf1e5e2618 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Jun 2013 10:57:22 +0200 Subject: nl80211: use small state buffer for wiphy_dump Avoid parsing the original dump message again and again by allocating a small state struct that is used by the functions involved in the dump, storing this struct in cb->args[0]. This reduces the memory allocation size as well. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 210 +++++++++++++++++++++++++++---------------------- 1 file changed, 116 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f8ffb9a59c83..7dc3343427c1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1111,10 +1111,16 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, return 0; } +struct nl80211_dump_wiphy_state { + s64 filter_wiphy; + long start; + long split_start, band_start, chan_start; + bool split; +}; + static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, struct sk_buff *msg, u32 portid, u32 seq, - int flags, bool split, long *split_start, - long *band_start, long *chan_start) + int flags, struct nl80211_dump_wiphy_state *state) { void *hdr; struct nlattr *nl_bands, *nl_band; @@ -1125,19 +1131,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, int i; const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - long start = 0, start_chan = 0, start_band = 0; u32 features; hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -ENOBUFS; - /* allow always using the variables */ - if (!split) { - split_start = &start; - band_start = &start_band; - chan_start = &start_chan; - } + if (WARN_ON(!state)) + return -EINVAL; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, @@ -1146,7 +1147,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, cfg80211_rdev_list_generation)) goto nla_put_failure; - switch (*split_start) { + switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, dev->wiphy.retry_short) || @@ -1192,8 +1193,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, @@ -1237,22 +1238,23 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } } - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, dev->wiphy.interface_modes)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 3: nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; - for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) { + for (band = state->band_start; + band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; sband = dev->wiphy.bands[band]; @@ -1264,12 +1266,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nl_band) goto nla_put_failure; - switch (*chan_start) { + switch (state->chan_start) { case 0: if (nl80211_send_band_rateinfo(msg, sband)) goto nla_put_failure; - (*chan_start)++; - if (split) + state->chan_start++; + if (state->split) break; default: /* add frequencies */ @@ -1278,7 +1280,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nl_freqs) goto nla_put_failure; - for (i = *chan_start - 1; + for (i = state->chan_start - 1; i < sband->n_channels; i++) { nl_freq = nla_nest_start(msg, i); @@ -1287,26 +1289,27 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, chan = &sband->channels[i]; - if (nl80211_msg_put_channel(msg, chan, - split)) + if (nl80211_msg_put_channel( + msg, chan, + state->split)) goto nla_put_failure; nla_nest_end(msg, nl_freq); - if (split) + if (state->split) break; } if (i < sband->n_channels) - *chan_start = i + 2; + state->chan_start = i + 2; else - *chan_start = 0; + state->chan_start = 0; nla_nest_end(msg, nl_freqs); } nla_nest_end(msg, nl_band); - if (split) { + if (state->split) { /* start again here */ - if (*chan_start) + if (state->chan_start) band--; break; } @@ -1314,14 +1317,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, nla_nest_end(msg, nl_bands); if (band < IEEE80211_NUM_BANDS) - *band_start = band + 1; + state->band_start = band + 1; else - *band_start = 0; + state->band_start = 0; /* if bands & channels are done, continue outside */ - if (*band_start == 0 && *chan_start == 0) - (*split_start)++; - if (split) + if (state->band_start == 0 && state->chan_start == 0) + state->split_start++; + if (state->split) break; case 4: nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); @@ -1387,7 +1390,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); - if (split) { + if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); } @@ -1411,8 +1414,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } nla_nest_end(msg, nl_cmds); - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 5: if (dev->ops->remain_on_channel && @@ -1428,29 +1431,30 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 6: #ifdef CONFIG_PM - if (nl80211_send_wowlan(msg, dev, split)) + if (nl80211_send_wowlan(msg, dev, state->split)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; #else - (*split_start)++; + state->split_start++; #endif case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, dev->wiphy.software_iftypes)) goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg, split)) + if (nl80211_put_iface_combinations(&dev->wiphy, msg, + state->split)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 8: if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && @@ -1464,7 +1468,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, * dump is split, otherwise it makes it too big. Therefore * only advertise it in that case. */ - if (split) + if (state->split) features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; @@ -1491,7 +1495,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, * case we'll continue with more data in the next round, * but break unconditionally so unsplit data stops here. */ - (*split_start)++; + state->split_start++; break; case 9: if (dev->wiphy.extended_capabilities && @@ -1510,7 +1514,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, goto nla_put_failure; /* done */ - *split_start = 0; + state->split_start = 0; break; } return genlmsg_end(msg, hdr); @@ -1520,66 +1524,76 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, return -EMSGSIZE; } +static int nl80211_dump_wiphy_parse(struct sk_buff *skb, + struct netlink_callback *cb, + struct nl80211_dump_wiphy_state *state) +{ + struct nlattr **tb = nl80211_fam.attrbuf; + int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, nl80211_policy); + /* ignore parse errors for backward compatibility */ + if (ret) + return 0; + + state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; + if (tb[NL80211_ATTR_WIPHY]) + state->filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); + if (tb[NL80211_ATTR_WDEV]) + state->filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; + if (tb[NL80211_ATTR_IFINDEX]) { + struct net_device *netdev; + struct cfg80211_registered_device *rdev; + int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); + + netdev = dev_get_by_index(sock_net(skb->sk), ifidx); + if (!netdev) + return -ENODEV; + if (netdev->ieee80211_ptr) { + rdev = wiphy_to_dev( + netdev->ieee80211_ptr->wiphy); + state->filter_wiphy = rdev->wiphy_idx; + } + dev_put(netdev); + } + + return 0; +} + static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; - int start = cb->args[0]; + struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; struct cfg80211_registered_device *dev; - s64 filter_wiphy = -1; - bool split = false; - struct nlattr **tb; - int res; - - /* will be zeroed in nlmsg_parse() */ - tb = kmalloc(sizeof(*tb) * (NL80211_ATTR_MAX + 1), GFP_KERNEL); - if (!tb) - return -ENOMEM; rtnl_lock(); - res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - tb, NL80211_ATTR_MAX, nl80211_policy); - if (res == 0) { - split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; - if (tb[NL80211_ATTR_WIPHY]) - filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); - if (tb[NL80211_ATTR_WDEV]) - filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; - if (tb[NL80211_ATTR_IFINDEX]) { - struct net_device *netdev; - int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); - - netdev = dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) { - rtnl_unlock(); - kfree(tb); - return -ENODEV; - } - if (netdev->ieee80211_ptr) { - dev = wiphy_to_dev( - netdev->ieee80211_ptr->wiphy); - filter_wiphy = dev->wiphy_idx; - } - dev_put(netdev); + if (!state) { + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + state->filter_wiphy = -1; + ret = nl80211_dump_wiphy_parse(skb, cb, state); + if (ret) { + kfree(state); + rtnl_unlock(); + return ret; } + cb->args[0] = (long)state; } - kfree(tb); list_for_each_entry(dev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) continue; - if (++idx <= start) + if (++idx <= state->start) continue; - if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy) + if (state->filter_wiphy != -1 && + state->filter_wiphy != dev->wiphy_idx) continue; /* attempt to fit multiple wiphy data chunks into the skb */ do { ret = nl80211_send_wiphy(dev, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, - split, &cb->args[1], - &cb->args[2], - &cb->args[3]); + NLM_F_MULTI, state); if (ret < 0) { /* * If sending the wiphy data didn't fit (ENOBUFS @@ -1604,27 +1618,34 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) idx--; break; } - } while (cb->args[1] > 0); + } while (state->split_start > 0); break; } rtnl_unlock(); - cb->args[0] = idx; + state->start = idx; return skb->len; } +static int nl80211_dump_wiphy_done(struct netlink_callback *cb) +{ + kfree((void *)cb->args[0]); + return 0; +} + static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, - false, NULL, NULL, NULL) < 0) { + &state) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -8418,6 +8439,7 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_WIPHY, .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, + .done = nl80211_dump_wiphy_done, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -9038,13 +9060,13 @@ static struct genl_multicast_group nl80211_regulatory_mcgrp = { void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) { struct sk_buff *msg; + struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, - false, NULL, NULL, NULL) < 0) { + if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } -- cgit From 959867fa55d0cb55fb3d08656e5e62607167617f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Jun 2013 13:05:42 +0200 Subject: cfg80211: require passing BSS struct back to cfg80211_assoc_timeout Doing so will allow us to hold the BSS (not just ref it) over the association process, thus ensuring that it doesn't time out and gets invisible to the user (e.g. in 'iw wlan0 link'.) This also fixes a leak in mac80211 where it doesn't always release the BSS struct properly in all cases where calling this function. This leak was reported by Ben Greear. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 15 +++++++-------- net/wireless/mlme.c | 8 +++++--- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 34d54fe81483..ae31968d42d3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2795,8 +2795,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(sdata->local->hw.wiphy, bss); - cfg80211_assoc_timeout(sdata->dev, mgmt->bssid); + cfg80211_assoc_timeout(sdata->dev, bss); return; } sdata_info(sdata, "associated\n"); @@ -3513,13 +3512,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) time_after(jiffies, ifmgd->assoc_data->timeout)) { if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) || ieee80211_do_assoc(sdata)) { - u8 bssid[ETH_ALEN]; - - memcpy(bssid, ifmgd->assoc_data->bss->bssid, ETH_ALEN); + struct cfg80211_bss *bss = ifmgd->assoc_data->bss; ieee80211_destroy_assoc_data(sdata, false); - - cfg80211_assoc_timeout(sdata->dev, bssid); + cfg80211_assoc_timeout(sdata->dev, bss); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(sdata, ifmgd->assoc_data->timeout); @@ -4445,8 +4441,11 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->chswitch_work); sdata_lock(sdata); - if (ifmgd->assoc_data) + if (ifmgd->assoc_data) { + struct cfg80211_bss *bss = ifmgd->assoc_data->bss; ieee80211_destroy_assoc_data(sdata, false); + cfg80211_assoc_timeout(sdata->dev, bss); + } if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); del_timer_sync(&ifmgd->timer); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index a61a44bc6cf0..dd6f79d7bd2e 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -131,16 +131,18 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) } EXPORT_SYMBOL(cfg80211_auth_timeout); -void cfg80211_assoc_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_send_assoc_timeout(dev, addr); + trace_cfg80211_send_assoc_timeout(dev, bss->bssid); - nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); + nl80211_send_assoc_timeout(rdev, dev, bss->bssid, GFP_KERNEL); cfg80211_sme_assoc_timeout(wdev); + + cfg80211_put_bss(wiphy, bss); } EXPORT_SYMBOL(cfg80211_assoc_timeout); -- cgit From f1940c5730f0f0555e42afbcf629be7f7fbbce8e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Jun 2013 13:21:15 +0200 Subject: cfg80211: hold BSS over association process This fixes the potential issue that the BSS struct that we use and later assign to wdev->current_bss is removed from the scan list while associating. Also warn when we don't have a BSS struct in connect_result unless it's from a driver that only has the connect() API. Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 4 ++++ net/wireless/sme.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index dd6f79d7bd2e..bfac5e186f57 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -38,6 +38,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, * frame instead of reassoc. */ if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) { + cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wiphy, bss); return; } @@ -142,6 +143,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) nl80211_send_assoc_timeout(rdev, dev, bss->bssid, GFP_KERNEL); cfg80211_sme_assoc_timeout(wdev); + cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wiphy, bss); } EXPORT_SYMBOL(cfg80211_assoc_timeout); @@ -309,6 +311,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, goto out; err = rdev_assoc(rdev, dev, req); + if (!err) + cfg80211_hold_bss(bss_from_pub(req->bss)); out: if (err) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ae7e2cbf45cb..c0bf781d4fbe 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -615,19 +615,24 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, kfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; - cfg80211_put_bss(wdev->wiphy, bss); + if (bss) { + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wdev->wiphy, bss); + } return; } - if (!bss) + if (!bss) { + WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) - return; + if (WARN_ON(!bss)) + return; + cfg80211_hold_bss(bss_from_pub(bss)); + } - cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); cfg80211_upload_connect_keys(wdev); -- cgit From 20fd4d1f04da07d09192ad8ad366a70d5125bfaf Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:32 -0700 Subject: gre: Simplify gre protocol registration locking. Use cmpxchg() for atomic protocol registration which saves code and data space. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index b2e805af9b87..1e294d510ac3 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -26,46 +26,32 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; -static DEFINE_SPINLOCK(gre_proto_lock); int gre_add_protocol(const struct gre_protocol *proto, u8 version) { if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (gre_proto[version]) - goto err_out_unlock; - - RCU_INIT_POINTER(gre_proto[version], proto); - spin_unlock(&gre_proto_lock); - return 0; + return -EINVAL; -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? + 0 : -EBUSY; } EXPORT_SYMBOL_GPL(gre_add_protocol); int gre_del_protocol(const struct gre_protocol *proto, u8 version) { + int ret; + if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (rcu_dereference_protected(gre_proto[version], - lockdep_is_held(&gre_proto_lock)) != proto) - goto err_out_unlock; - RCU_INIT_POINTER(gre_proto[version], NULL); - spin_unlock(&gre_proto_lock); + return -EINVAL; + + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? + 0 : -EBUSY; + + if (ret) + return ret; + synchronize_rcu(); return 0; - -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; } EXPORT_SYMBOL_GPL(gre_del_protocol); -- cgit From bda7bb46343647f68591366731295a0f3eea59ed Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:38 -0700 Subject: gre: Allow multiple protocol listener for gre protocol. Currently there is only one user is allowed to register for gre protocol. Following patch adds de-multiplexer. So that multiple modules can listen on gre protocol e.g. kernel gre devices and ovs. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/ip_gre.c | 173 +++++++----------------------------------- 2 files changed, 243 insertions(+), 151 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 1e294d510ac3..8b9a373890ab 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -13,6 +13,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include +#include #include #include #include @@ -24,8 +26,12 @@ #include #include +#include +#include +#include static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; +static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -55,6 +61,173 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + + if (!csum) + break; + /* Fall through. */ + + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; + } + + return csum; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err) +{ + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = ip_gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; + + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; + + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + return 0; +} + +static int gre_cisco_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + int i; + bool csum_err = false; + + if (parse_gre_header(skb, &tpi, &csum_err) < 0) + goto drop; + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + int ret; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + ret = proto->handler(skb, &tpi); + if (ret == PACKET_RCVD) { + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + +static void gre_cisco_err(struct sk_buff *skb, u32 info) +{ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + */ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + bool csum_err = false; + int i; + + if (parse_gre_header(skb, &tpi, &csum_err)) { + if (!csum_err) /* ignore csum errors. */ + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + skb->dev->ifindex, 0, IPPROTO_GRE, 0); + return; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, + IPPROTO_GRE, 0); + return; + } + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + + if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) + goto out; + + } +out: + rcu_read_unlock(); +} + static int gre_rcv(struct sk_buff *skb) { const struct gre_protocol *proto; @@ -206,27 +379,68 @@ static const struct net_offload gre_offload = { }, }; +static const struct gre_protocol ipgre_protocol = { + .handler = gre_cisco_rcv, + .err_handler = gre_cisco_err, +}; + +int gre_cisco_register(struct gre_cisco_protocol *newp) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[newp->priority]; + + return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_cisco_register); + +int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[del_proto->priority]; + int ret; + + ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; + + if (ret) + return ret; + + synchronize_net(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_cisco_unregister); + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { pr_err("can't add protocol\n"); - return -EAGAIN; + goto err; + } + + if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { + pr_info("%s: can't add ipgre handler\n", __func__); + goto err_gre; } if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { pr_err("can't add protocol offload\n"); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); - return -EAGAIN; + goto err_gso; } return 0; +err_gso: + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); +err_gre: + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +err: + return -EAGAIN; } static void __exit gre_exit(void) { inet_del_offload(&gre_offload, IPPROTO_GRE); + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } @@ -236,4 +450,3 @@ module_exit(gre_exit); MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); MODULE_LICENSE("GPL"); - diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a982657d05e7..19863a81cea1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -121,103 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_net_id __read_mostly; static int gre_tap_net_id __read_mostly; -static __sum16 check_checksum(struct sk_buff *skb) -{ - __sum16 csum = 0; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - - if (!csum) - break; - /* Fall through. */ - - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - break; - } - - return csum; -} - -static int ip_gre_calc_hlen(__be16 o_flags) -{ - int addend = 4; - - if (o_flags&TUNNEL_CSUM) - addend += 4; - if (o_flags&TUNNEL_KEY) - addend += 4; - if (o_flags&TUNNEL_SEQ) - addend += 4; - return addend; -} - -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, int *hdr_len) -{ - unsigned int ip_hlen = ip_hdrlen(skb); - const struct gre_base_hdr *greh; - __be32 *options; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - *hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, *hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (check_checksum(skb)) { - *csum_err = true; - return -EINVAL; - } - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else - tpi->key = 0; - - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else - tpi->seq = 0; - - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - *hdr_len += 4; - if (!pskb_may_pull(skb, *hdr_len)) - return -EINVAL; - } - } - - return 0; -} - -static void ipgre_err(struct sk_buff *skb, u32 info) +static int ipgre_err(struct sk_buff *skb, u32 info, + const struct tnl_ptk_info *tpi) { /* All the routers (except for Linux) return only @@ -239,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info) const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - struct tnl_ptk_info tpi; - int hdr_len; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { - if (!csum_err) /* ignore csum errors. */ - return; - } switch (type) { default: case ICMP_PARAMETERPROB: - return; + return PACKET_RCVD; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ - return; + return PACKET_RCVD; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -269,79 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) - return; + return PACKET_RCVD; break; case ICMP_REDIRECT: break; } - if (tpi.proto == htons(ETH_P_TEB)) + if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else itn = net_generic(net, ipgre_net_id); iph = (const struct iphdr *)skb->data; - t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, - iph->daddr, iph->saddr, tpi.key); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->daddr, iph->saddr, tpi->key); if (t == NULL) - return; + return PACKET_REJECT; - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_GRE, 0); - return; - } - if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_GRE, 0); - return; - } if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - return; + return PACKET_RCVD; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - return; + return PACKET_RCVD; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; + return PACKET_RCVD; } -static int ipgre_rcv(struct sk_buff *skb) +static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn; const struct iphdr *iph; struct ip_tunnel *tunnel; - struct tnl_ptk_info tpi; - int hdr_len; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) - goto drop; - if (tpi.proto == htons(ETH_P_TEB)) + if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else itn = net_generic(net, ipgre_net_id); iph = ip_hdr(skb); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, - iph->saddr, iph->daddr, tpi.key); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->saddr, iph->daddr, tpi->key); if (tunnel) { - ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); - return 0; + ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); + return PACKET_RCVD; } - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); -drop: - kfree_skb(skb); - return 0; + return PACKET_REJECT; } static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb) @@ -708,9 +587,10 @@ static int ipgre_tunnel_init(struct net_device *dev) return ip_tunnel_init(dev); } -static const struct gre_protocol ipgre_protocol = { - .handler = ipgre_rcv, - .err_handler = ipgre_err, +static struct gre_cisco_protocol ipgre_protocol = { + .handler = ipgre_rcv, + .err_handler = ipgre_err, + .priority = 0, }; static int __net_init ipgre_init_net(struct net *net) @@ -978,7 +858,7 @@ static int __init ipgre_init(void) if (err < 0) goto pnet_tap_faied; - err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); + err = gre_cisco_register(&ipgre_protocol); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; @@ -997,7 +877,7 @@ static int __init ipgre_init(void) tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); + gre_cisco_unregister(&ipgre_protocol); add_proto_failed: unregister_pernet_device(&ipgre_tap_net_ops); pnet_tap_faied: @@ -1009,8 +889,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) - pr_info("%s: can't remove protocol\n", __func__); + gre_cisco_unregister(&ipgre_protocol); unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } -- cgit From 752f36da68e9136df8918461d651723a43627e04 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:45 -0700 Subject: gre: export gre_build_header() function. This is required for ovs gre module. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 40 +--------------------------------------- 2 files changed, 33 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 8b9a373890ab..1cbc46536d1f 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -61,6 +61,38 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + int hdr_len) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; + + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; + } + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; + } + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); + } + } +} +EXPORT_SYMBOL_GPL(gre_build_header); + static __sum16 check_checksum(struct sk_buff *skb) { __sum16 csum = 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 19863a81cea1..362c7c4c13ca 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -248,40 +248,6 @@ error: return ERR_PTR(err); } -static struct sk_buff *gre_build_header(struct sk_buff *skb, - const struct tnl_ptk_info *tpi, - int hdr_len) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(tpi->flags); - greh->protocol = tpi->proto; - - if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (tpi->flags&TUNNEL_SEQ) { - *ptr = tpi->seq; - ptr--; - } - if (tpi->flags&TUNNEL_KEY) { - *ptr = tpi->key; - ptr--; - } - if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { - *(__sum16 *)ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, - skb->len, 0)); - } - } - - return skb; -} - static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) @@ -302,11 +268,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, tpi.seq = htonl(tunnel->o_seqno); /* Push GRE header. */ - skb = gre_build_header(skb, &tpi, tunnel->hlen); - if (unlikely(!skb)) { - dev->stats.tx_dropped++; - return; - } + gre_build_header(skb, &tpi, tunnel->hlen); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } -- cgit From 45f2e9976cb6fc3f1cc533fd53fe74da5a9dbce4 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:51 -0700 Subject: gre: export gre_handle_offloads() function. This is required for OVS GRE offloading. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 29 +++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 34 ++-------------------------------- 2 files changed, 31 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 1cbc46536d1f..5ecc9c49b4dc 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -93,6 +93,35 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, } EXPORT_SYMBOL_GPL(gre_build_header); +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(gre_handle_offloads); + static __sum16 check_checksum(struct sk_buff *skb) { __sum16 csum = 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 362c7c4c13ca..c326e869993a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -223,31 +223,6 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) return PACKET_REJECT; } -static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb) -{ - int err; - - if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - goto error; - skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; - return skb; - } else if (skb->ip_summed == CHECKSUM_PARTIAL && - tunnel->parms.o_flags&TUNNEL_CSUM) { - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - return skb; - -error: - kfree_skb(skb); - return ERR_PTR(err); -} - static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) @@ -255,11 +230,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel *tunnel = netdev_priv(dev); struct tnl_ptk_info tpi; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - tpi.flags = tunnel->parms.o_flags; tpi.proto = proto; tpi.key = tunnel->parms.o_key; @@ -279,7 +249,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; - skb = handle_offloads(tunnel, skb); + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); if (IS_ERR(skb)) goto out; @@ -318,7 +288,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); - skb = handle_offloads(tunnel, skb); + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); if (IS_ERR(skb)) goto out; -- cgit From 0e6fbc5b6c6218987c93b8c7ca60cf786062899d Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:56 -0700 Subject: ip_tunnels: extend iptunnel_xmit() Refactor various ip tunnels xmit functions and extend iptunnel_xmit() so that there is more code sharing. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/Makefile | 2 +- net/ipv4/ip_tunnel.c | 38 +++++--------------- net/ipv4/ip_tunnel_core.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/sit.c | 39 ++++++--------------- 4 files changed, 108 insertions(+), 59 deletions(-) create mode 100644 net/ipv4/ip_tunnel_core.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 7fcf8101d85f..86ded0bac9c7 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o + inet_fragment.o ping.o ip_tunnel_core.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e189db409b0e..a06a2ed49597 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -491,19 +491,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; - struct iphdr *iph; struct flowi4 fl4; u8 tos, ttl; __be16 df; struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int mtu; + int err; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -571,14 +569,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->stats.tx_carrier_errors++; goto tx_error; } - tdev = rt->dst.dev; - - if (tdev == dev) { + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; goto tx_error; } - df = tnl_params->frag_off; if (df) @@ -596,6 +591,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (!skb_is_gso(skb) && (inner_iph->frag_off&htons(IP_DF)) && mtu < ntohs(inner_iph->tot_len)) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); goto tx_error; @@ -646,8 +642,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) - + rt->dst.header_len; + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { @@ -657,27 +653,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* Push down and install the IP header. */ - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - - iph = ip_hdr(skb); - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + err = iptunnel_xmit(dev_net(dev), rt, skb, + fl4.saddr, fl4.daddr, protocol, + ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = protocol; - iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - tunnel_ip_select_ident(skb, inner_iph, &rt->dst); - - iptunnel_xmit(skb, dev); return; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c new file mode 100644 index 000000000000..927687e83f18 --- /dev/null +++ b/net/ipv4/ip_tunnel_core.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int iptunnel_xmit(struct net *net, struct rtable *rt, + struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df) +{ + int pkt_len = skb->len; + struct iphdr *iph; + int err; + + nf_reset(skb); + secpath_reset(skb); + skb->rxhash = 0; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + __skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = dst; + iph->saddr = src; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, + (const struct iphdr *)skb_inner_network_header(skb), + &rt->dst); + + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + pkt_len = 0; + return pkt_len; +} +EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6b9c1f128eaf..76bb8de435b2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -723,13 +723,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; const struct in6_addr *addr6; int addr_type; + u8 ttl; + int err; if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; @@ -872,34 +873,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb = new_skb; iph6 = ipv6_hdr(skb); } - - skb->transport_header = skb->network_header; - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags = 0; - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ - - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr)>>2; - iph->frag_off = df; - iph->protocol = IPPROTO_IPV6; - iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - - if ((iph->ttl = tiph->ttl) == 0) - iph->ttl = iph6->hop_limit; - - skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); - iptunnel_xmit(skb, dev); + ttl = tiph->ttl; + if (ttl == 0) + ttl = iph6->hop_limit; + tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); + + err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; tx_error_icmp: -- cgit From 3d7b46cd20e300bd6989fb1f43d46f1b9645816e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:02 -0700 Subject: ip_tunnel: push generic protocol handling to ip_tunnel module. Process skb tunnel header before sending packet to protocol handler. this allows code sharing between gre and ovs gre modules. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 3 ++- net/ipv4/ip_tunnel.c | 30 ++++++------------------------ net/ipv4/ip_tunnel_core.c | 34 ++++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 6 +++++- net/ipv6/sit.c | 7 ++++++- 5 files changed, 53 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 5ecc9c49b4dc..ba4803e609b5 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -201,7 +201,8 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return 0; + + return iptunnel_pull_header(skb, hdr_len, tpi->proto); } static int gre_cisco_rcv(struct sk_buff *skb) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a06a2ed49597..bd227e5ea9da 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -408,13 +408,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); int err; - secpath_reset(skb); - - skb->protocol = tpi->proto; - - skb->mac_header = skb->network_header; - __pskb_pull(skb, tunnel->hlen); - skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ @@ -442,23 +435,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - skb->pkt_type = PACKET_HOST; - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -477,6 +453,12 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + if (tunnel->dev->type == ARPHRD_ETHER) { + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } gro_cells_receive(&tunnel->gro_cells, skb); return 0; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 927687e83f18..7167b08977df 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,3 +86,37 @@ int iptunnel_xmit(struct net *net, struct rtable *rt, return pkt_len; } EXPORT_SYMBOL_GPL(iptunnel_xmit); + +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) +{ + if (unlikely(!pskb_may_pull(skb, hdr_len))) + return -ENOMEM; + + skb_pull_rcsum(skb, hdr_len); + + if (inner_proto == htons(ETH_P_TEB)) { + struct ethhdr *eh = (struct ethhdr *)skb->data; + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + return -ENOMEM; + + if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + skb->protocol = eh->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + + } else { + skb->protocol = inner_proto; + } + + nf_reset(skb); + secpath_reset(skb); + if (!skb->l4_rxhash) + skb->rxhash = 0; + skb_dst_drop(skb); + skb->vlan_tci = 0; + skb_set_queue_mapping(skb, 0); + skb->pkt_type = PACKET_HOST; + return 0; +} +EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9df7ecd393f2..e6905fbda2a2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -188,8 +188,12 @@ static int ipip_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); struct ip_tunnel *tunnel; - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 76bb8de435b2..6cee844678e2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -640,9 +640,14 @@ static const struct tnl_ptk_info tpi = { static int ipip_rcv(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; struct ip_tunnel *tunnel; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { -- cgit From 74f84a5726c7d08c27745305e67474b8645c541d Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:12 -0700 Subject: openvswitch: Copy individual actions. Rather than validating actions and then copying all actiaons in one block, following patch does same operation in single pass. This validate and copy action one by one. This is required for ovs tunneling patch. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 271 ++++++++++++++++++++++++++++++++++++--------- net/openvswitch/flow.c | 10 +- net/openvswitch/flow.h | 2 +- 3 files changed, 225 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0f783d9fa00d..f14816b80b80 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,16 +464,89 @@ static int flush_flows(struct datapath *dp) return 0; } -static int validate_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth); +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) +{ + + struct sw_flow_actions *acts; + int new_acts_size; + int req_size = NLA_ALIGN(attr_len); + int next_offset = offsetof(struct sw_flow_actions, actions) + + (*sfa)->actions_len; + + if (req_size <= (ksize(*sfa) - next_offset)) + goto out; + + new_acts_size = ksize(*sfa) * 2; + + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + return ERR_PTR(-EMSGSIZE); + new_acts_size = MAX_ACTIONS_BUFSIZE; + } + + acts = ovs_flow_actions_alloc(new_acts_size); + if (IS_ERR(acts)) + return (void *)acts; + + memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); + acts->actions_len = (*sfa)->actions_len; + kfree(*sfa); + *sfa = acts; + +out: + (*sfa)->actions_len += req_size; + return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); +} + +static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) +{ + struct nlattr *a; + + a = reserve_sfa_size(sfa, nla_attr_size(len)); + if (IS_ERR(a)) + return PTR_ERR(a); + + a->nla_type = attrtype; + a->nla_len = nla_attr_size(len); + + if (data) + memcpy(nla_data(a), data, len); + memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); + + return 0; +} + +static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) +{ + int used = (*sfa)->actions_len; + int err; + + err = add_action(sfa, attrtype, NULL, 0); + if (err) + return err; + + return used; +} -static int validate_sample(const struct nlattr *attr, - const struct sw_flow_key *key, int depth) +static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) +{ + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); + + a->nla_len = sfa->actions_len - st_offset; +} + +static int validate_and_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa); + +static int validate_and_copy_sample(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; - int rem; + int rem, start, err, st_acts; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { @@ -492,7 +565,26 @@ static int validate_sample(const struct nlattr *attr, actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) return -EINVAL; - return validate_actions(actions, key, depth + 1); + + /* validation done, copy sample action. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + if (start < 0) + return start; + err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); + if (err) + return err; + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + if (st_acts < 0) + return st_acts; + + err = validate_and_copy_actions(actions, key, depth + 1, sfa); + if (err) + return err; + + add_nested_action_end(*sfa, st_acts); + add_nested_action_end(*sfa, start); + + return 0; } static int validate_tp_port(const struct sw_flow_key *flow_key) @@ -606,8 +698,24 @@ static int validate_userspace(const struct nlattr *attr) return 0; } -static int validate_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth) +static int copy_action(const struct nlattr *from, + struct sw_flow_actions **sfa) +{ + int totlen = NLA_ALIGN(from->nla_len); + struct nlattr *to; + + to = reserve_sfa_size(sfa, from->nla_len); + if (IS_ERR(to)) + return PTR_ERR(to); + + memcpy(to, from, totlen); + return 0; +} + +static int validate_and_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, + struct sw_flow_actions **sfa) { const struct nlattr *a; int rem, err; @@ -627,12 +735,14 @@ static int validate_actions(const struct nlattr *attr, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); + bool skip_copy; if (type > OVS_ACTION_ATTR_MAX || (action_lens[type] != nla_len(a) && action_lens[type] != (u32)-1)) return -EINVAL; + skip_copy = false; switch (type) { case OVS_ACTION_ATTR_UNSPEC: return -EINVAL; @@ -667,14 +777,20 @@ static int validate_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_sample(a, key, depth); + err = validate_and_copy_sample(a, key, depth, sfa); if (err) return err; + skip_copy = true; break; default: return -EINVAL; } + if (!skip_copy) { + err = copy_action(a, sfa); + if (err) + return err; + } } if (rem > 0) @@ -742,18 +858,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - - err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); - if (err) - goto err_flow_free; - flow->hash = ovs_flow_hash(&flow->key, key_len); - - acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); + acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) goto err_flow_free; + + err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); rcu_assign_pointer(flow->sf_acts, acts); + if (err) + goto err_flow_free; OVS_CB(packet)->flow = flow; packet->priority = flow->key.phy.priority; @@ -843,6 +957,66 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); +static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + const struct nlattr *a; + struct nlattr *start; + int err = 0, rem; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + if (!start) + return -EMSGSIZE; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + struct nlattr *st_sample; + + switch (type) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) + return -EMSGSIZE; + break; + case OVS_SAMPLE_ATTR_ACTIONS: + st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!st_sample) + return -EMSGSIZE; + err = actions_to_attr(nla_data(a), nla_len(a), skb); + if (err) + return err; + nla_nest_end(skb, st_sample); + break; + } + } + + nla_nest_end(skb, start); + return err; +} + +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) +{ + const struct nlattr *a; + int rem, err; + + nla_for_each_attr(a, attr, len, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_ACTION_ATTR_SAMPLE: + err = sample_action_to_attr(a, skb); + if (err) + return err; + break; + default: + if (nla_put(skb, type, nla_len(a), nla_data(a))) + return -EMSGSIZE; + break; + } + } + + return 0; +} + static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -860,6 +1034,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, { const int skb_orig_len = skb->len; const struct sw_flow_actions *sf_acts; + struct nlattr *start; struct ovs_flow_stats stats; struct ovs_header *ovs_header; struct nlattr *nla; @@ -913,10 +1088,19 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, * This can only fail for dump operations because the skb is always * properly sized for single flows. */ - err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, - sf_acts->actions); - if (err < 0 && skb_orig_len) - goto error; + start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); + if (start) { + err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) + nla_nest_end(skb, start); + else { + if (skb_orig_len) + goto error; + + nla_nest_cancel(skb, start); + } + } else if (skb_orig_len) + goto nla_put_failure; return genlmsg_end(skb, ovs_header); @@ -961,6 +1145,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct datapath *dp; struct flow_table *table; + struct sw_flow_actions *acts = NULL; int error; int key_len; @@ -974,9 +1159,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); - if (error) + acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) goto error; + + error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts); + if (error) + goto err_kfree; } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { error = -EINVAL; goto error; @@ -991,8 +1181,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) table = ovsl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); if (!flow) { - struct sw_flow_actions *acts; - /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) @@ -1019,11 +1207,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->key = key; clear_stats(flow); - /* Obtain actions. */ - acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto error_free_flow; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ @@ -1036,7 +1219,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; - struct nlattr *acts_attrs; /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL @@ -1051,21 +1233,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); - acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; - if (acts_attrs && - (old_acts->actions_len != nla_len(acts_attrs) || - memcmp(old_acts->actions, nla_data(acts_attrs), - old_acts->actions_len))) { - struct sw_flow_actions *new_acts; - - new_acts = ovs_flow_actions_alloc(acts_attrs); - error = PTR_ERR(new_acts); - if (IS_ERR(new_acts)) - goto err_unlock_ovs; - - rcu_assign_pointer(flow->sf_acts, new_acts); - ovs_flow_deferred_free_acts(old_acts); - } + rcu_assign_pointer(flow->sf_acts, acts); + ovs_flow_deferred_free_acts(old_acts); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1086,10 +1255,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; -error_free_flow: - ovs_flow_free(flow); err_unlock_ovs: ovs_unlock(); +err_kfree: + kfree(acts); error: return error; } @@ -1866,8 +2035,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_DEL); + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, + info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; @@ -1896,8 +2065,8 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, + info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 093c191d4fc2..940d4b803ff5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -198,20 +198,18 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) spin_unlock(&flow->lock); } -struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) +struct sw_flow_actions *ovs_flow_actions_alloc(int size) { - int actions_len = nla_len(actions); struct sw_flow_actions *sfa; - if (actions_len > MAX_ACTIONS_BUFSIZE) + if (size > MAX_ACTIONS_BUFSIZE) return ERR_PTR(-EINVAL); - sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); + sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); - sfa->actions_len = actions_len; - nla_memcpy(sfa->actions, actions, actions_len); + sfa->actions_len = 0; return sfa; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 2a83e2141f08..e370f6246ee9 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -130,7 +130,7 @@ struct sw_flow *ovs_flow_alloc(void); void ovs_flow_deferred_free(struct sw_flow *); void ovs_flow_free(struct sw_flow *flow); -struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *); +struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); void ovs_flow_deferred_free_acts(struct sw_flow_actions *); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, -- cgit From 7d5437c709ded4f152cb8b305d17972d6707f20c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:18 -0700 Subject: openvswitch: Add tunneling interface. Add ovs tunnel interface for set tunnel action for userspace. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 4 ++ net/openvswitch/datapath.c | 78 +++++++++++++++++++++- net/openvswitch/datapath.h | 3 + net/openvswitch/flow.c | 125 +++++++++++++++++++++++++++++++++++ net/openvswitch/flow.h | 19 ++++++ net/openvswitch/vport-internal_dev.c | 2 +- net/openvswitch/vport-netdev.c | 2 +- net/openvswitch/vport.c | 4 +- net/openvswitch/vport.h | 3 +- 9 files changed, 233 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 596d6373399d..22c5f399f1cf 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -436,6 +436,10 @@ static int execute_set_action(struct sk_buff *skb, skb->mark = nla_get_u32(nested_attr); break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + OVS_CB(skb)->tun_key = nla_data(nested_attr); + break; + case OVS_KEY_ATTR_ETHERNET: err = set_eth_addr(skb, nla_data(nested_attr)); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f14816b80b80..bbd310646bc8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -362,6 +362,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, static size_t key_attr_size(void) { return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ @@ -600,8 +608,30 @@ static int validate_tp_port(const struct sw_flow_key *flow_key) return -EINVAL; } +static int validate_and_copy_set_tun(const struct nlattr *attr, + struct sw_flow_actions **sfa) +{ + struct ovs_key_ipv4_tunnel tun_key; + int err, start; + + err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key); + if (err) + return err; + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + if (start < 0) + return start; + + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); + add_nested_action_end(*sfa, start); + + return err; +} + static int validate_set(const struct nlattr *a, - const struct sw_flow_key *flow_key) + const struct sw_flow_key *flow_key, + struct sw_flow_actions **sfa, + bool *set_tun) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -611,18 +641,27 @@ static int validate_set(const struct nlattr *a, return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || - nla_len(ovs_key) != ovs_key_lens[key_type]) + (ovs_key_lens[key_type] != nla_len(ovs_key) && + ovs_key_lens[key_type] != -1)) return -EINVAL; switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv6 *ipv6_key; + int err; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_TUNNEL: + *set_tun = true; + err = validate_and_copy_set_tun(a, sfa); + if (err) + return err; + break; + case OVS_KEY_ATTR_IPV4: if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; @@ -771,7 +810,7 @@ static int validate_and_copy_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SET: - err = validate_set(a, key); + err = validate_set(a, key, sfa, &skip_copy); if (err) return err; break; @@ -993,6 +1032,33 @@ static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) return err; } +static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + struct nlattr *start; + int err; + + switch (key_type) { + case OVS_KEY_ATTR_IPV4_TUNNEL: + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!start) + return -EMSGSIZE; + + err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); + if (err) + return err; + nla_nest_end(skb, start); + break; + default: + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) + return -EMSGSIZE; + break; + } + + return 0; +} + static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -1002,6 +1068,12 @@ static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *s int type = nla_type(a); switch (type) { + case OVS_ACTION_ATTR_SET: + err = set_action_to_attr(a, skb); + if (err) + return err; + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 16b840695216..e88ebc2f1c54 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,9 +88,12 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the + * packet is not being tunneled. */ struct ovs_skb_cb { struct sw_flow *flow; + struct ovs_key_ipv4_tunnel *tun_key; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 940d4b803ff5..976a8b766a6a 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -603,6 +604,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; + if (OVS_CB(skb)->tun_key) + memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); key->phy.in_port = in_port; key->phy.skb_mark = skb->mark; @@ -818,6 +821,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_TUNNEL] = -1, }; static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, @@ -955,6 +959,105 @@ static int parse_flow_nlattrs(const struct nlattr *attr, return 0; } +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *a; + int rem; + bool ttl = false; + + memset(tun_key, 0, sizeof(*tun_key)); + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + }; + + if (type > OVS_TUNNEL_KEY_ATTR_MAX || + ovs_tunnel_key_lens[type] != nla_len(a)) + return -EINVAL; + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + tun_key->tun_id = nla_get_be64(a); + tun_key->tun_flags |= TUNNEL_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tun_key->ipv4_src = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tun_key->ipv4_dst = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tun_key->ipv4_tos = nla_get_u8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tun_key->ipv4_ttl = nla_get_u8(a); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun_key->tun_flags |= TUNNEL_CSUM; + break; + default: + return -EINVAL; + + } + } + if (rem > 0) + return -EINVAL; + + if (!tun_key->ipv4_dst) + return -EINVAL; + + if (!ttl) + return -EINVAL; + + return 0; +} + +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + if (tun_key->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) + return -EMSGSIZE; + if (tun_key->ipv4_src && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) + return -EMSGSIZE; + if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) + return -EMSGSIZE; + if (tun_key->ipv4_tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + + nla_nest_end(skb, nla); + return 0; +} + /** * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. * @swkey: receives the extracted flow key. @@ -997,6 +1100,14 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); } + if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { + err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); + if (err) + return err; + + attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); + } + /* Data attributes. */ if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) return -EINVAL; @@ -1135,17 +1246,21 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, const struct nlattr *attr) { + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; const struct nlattr *nla; int rem; flow->key.phy.in_port = DP_MAX_PORTS; flow->key.phy.priority = 0; flow->key.phy.skb_mark = 0; + memset(tun_key, 0, sizeof(flow->key.tun_key)); nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { + int err; + if (nla_len(nla) != ovs_key_lens[type]) return -EINVAL; @@ -1154,6 +1269,12 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, flow->key.phy.priority = nla_get_u32(nla); break; + case OVS_KEY_ATTR_TUNNEL: + err = ovs_ipv4_tun_from_nlattr(nla, tun_key); + if (err) + return err; + break; + case OVS_KEY_ATTR_IN_PORT: if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; @@ -1180,6 +1301,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; + if (swkey->tun_key.ipv4_dst && + ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) + goto nla_put_failure; + if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index e370f6246ee9..aec5e43f690b 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -40,7 +40,22 @@ struct sw_flow_actions { struct nlattr actions[]; }; +/* Used to memset ovs_key_ipv4_tunnel padding. */ +#define OVS_TUNNEL_KEY_SIZE \ + (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ + FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) + +struct ovs_key_ipv4_tunnel { + __be64 tun_id; + __be32 ipv4_src; + __be32 ipv4_dst; + u16 tun_flags; + u8 ipv4_tos; + u8 ipv4_ttl; +}; + struct sw_flow_key { + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ @@ -179,5 +194,9 @@ u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key); +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key); #endif /* flow.h */ diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index e284c7e1fec4..98d3edbbc235 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -67,7 +67,7 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { rcu_read_lock(); - ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); + ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); rcu_read_unlock(); return 0; } diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 40de815b4213..5982f3f62835 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -51,7 +51,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) skb_push(skb, ETH_HLEN); ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - ovs_vport_receive(vport, skb); + ovs_vport_receive(vport, skb, NULL); return; error: diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 176d449351eb..413287a1877f 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -325,7 +325,8 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ -void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) +void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, + struct ovs_key_ipv4_tunnel *tun_key) { struct pcpu_tstats *stats; @@ -335,6 +336,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) stats->rx_bytes += skb->len; u64_stats_update_end(&stats->syncp); + OVS_CB(skb)->tun_key = tun_key; ovs_dp_process_received_packet(vport, skb); } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 293278c4c2df..2d961aedd71d 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -184,7 +184,8 @@ static inline struct vport *vport_from_priv(const void *priv) return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } -void ovs_vport_receive(struct vport *, struct sk_buff *); +void ovs_vport_receive(struct vport *, struct sk_buff *, + struct ovs_key_ipv4_tunnel *); void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also -- cgit From ffe3f4321745e743dd179ec2b12180c01ba0d3aa Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:23 -0700 Subject: openvswitch: Expand action buffer size. MAX_ACTIONS_BUFSIZE limits action list size, set tunnel action needs extra space on action list, for now increase max actions list limit. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/flow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index aec5e43f690b..bfe80b960759 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -159,7 +159,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, const struct nlattr *attr); -#define MAX_ACTIONS_BUFSIZE (16 * 1024) +#define MAX_ACTIONS_BUFSIZE (32 * 1024) #define TBL_MIN_BUCKETS 1024 struct flow_table { -- cgit From a3e82996a8874c4cfe8c7f1be4d552018d8cba7e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:28 -0700 Subject: openvswitch: Optimize flow key match for non tunnel flows. Following patch adds start offset for sw_flow-key, so that we can skip tunneling information in key for non-tunnel flows. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 7 ++----- net/openvswitch/flow.c | 49 ++++++++++++++++++++++++++++++++++------------ net/openvswitch/flow.h | 6 +++--- 3 files changed, 42 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index bbd310646bc8..f7e3a0d84c40 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -894,10 +894,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); + err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - flow->hash = ovs_flow_hash(&flow->key, key_len); acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) @@ -1276,14 +1275,12 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) error = PTR_ERR(flow); goto err_unlock_ovs; } - flow->key = key; clear_stats(flow); rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - flow->hash = ovs_flow_hash(&key, key_len); - ovs_flow_tbl_insert(table, flow); + ovs_flow_tbl_insert(table, flow, &key, key_len); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 976a8b766a6a..5c519b121e1b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -353,6 +353,14 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la return NULL; } +static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct hlist_head *head; + head = find_bucket(table, flow->hash); + hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); + table->count++; +} + static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) { int old_ver; @@ -369,7 +377,7 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new head = flex_array_get(old->buckets, i); hlist_for_each_entry(flow, head, hash_node[old_ver]) - ovs_flow_tbl_insert(new, flow); + __flow_tbl_insert(new, flow); } old->keep_flows = true; } @@ -763,9 +771,18 @@ out: return error; } -u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len) +static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) +{ + return jhash2((u32 *)((u8 *)key + key_start), + DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); +} + +static int flow_key_start(struct sw_flow_key *key) { - return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0); + if (key->tun_key.ipv4_dst) + return 0; + else + return offsetof(struct sw_flow_key, phy); } struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, @@ -773,28 +790,31 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, { struct sw_flow *flow; struct hlist_head *head; + u8 *_key; + int key_start; u32 hash; - hash = ovs_flow_hash(key, key_len); + key_start = flow_key_start(key); + hash = ovs_flow_hash(key, key_start, key_len); + _key = (u8 *) key + key_start; head = find_bucket(table, hash); hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->hash == hash && - !memcmp(&flow->key, key, key_len)) { + !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) { return flow; } } return NULL; } -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_key *key, int key_len) { - struct hlist_head *head; - - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - table->count++; + flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); + memcpy(&flow->key, key, sizeof(flow->key)); + __flow_tbl_insert(table, flow); } void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) @@ -1235,6 +1255,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @key_len: Length of key in @flow. Used for calculating flow hash. * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. * @@ -1243,7 +1264,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const struct nlattr *attr) { struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; @@ -1289,6 +1310,10 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, } if (rem) return -EINVAL; + + flow->hash = ovs_flow_hash(&flow->key, + flow_key_start(&flow->key), key_len); + return 0; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index bfe80b960759..999842f247a0 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -156,7 +156,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const struct nlattr *attr); #define MAX_ACTIONS_BUFSIZE (32 * 1024) @@ -188,9 +188,9 @@ void ovs_flow_tbl_deferred_destroy(struct flow_table *table); struct flow_table *ovs_flow_tbl_alloc(int new_size); struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_key *key, int key_len); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); -u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; -- cgit From aa310701e787087dbfbccf1409982a96e16c57a6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:33 -0700 Subject: openvswitch: Add gre tunnel support. Add gre vport implementation. Most of gre protocol processing is pushed to gre module. It make use of gre demultiplexer therefore it can co-exist with linux device based gre tunnels. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/Kconfig | 2 + net/openvswitch/Makefile | 3 +- net/openvswitch/datapath.h | 1 + net/openvswitch/flow.h | 18 ++- net/openvswitch/vport-gre.c | 274 ++++++++++++++++++++++++++++++++++++++++++++ net/openvswitch/vport.c | 19 +++ net/openvswitch/vport.h | 7 ++ 7 files changed, 322 insertions(+), 2 deletions(-) create mode 100644 net/openvswitch/vport-gre.c (limited to 'net') diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d9ea33c361be..9fbc04a31ed6 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -19,6 +19,8 @@ config OPENVSWITCH which is able to accept configuration from a variety of sources and translate it into packet processing rules. + Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. + See http://openvswitch.org for more information and userspace utilities. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 15e7384745c1..01bddb2991e3 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -10,5 +10,6 @@ openvswitch-y := \ dp_notify.o \ flow.o \ vport.o \ + vport-gre.o \ vport-internal_dev.o \ - vport-netdev.o \ + vport-netdev.o diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index e88ebc2f1c54..a91486484916 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -122,6 +122,7 @@ struct dp_upcall_info { struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; + struct vport_net vport_net; }; extern int ovs_net_id; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 999842f247a0..66ef7220293e 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -49,11 +49,27 @@ struct ovs_key_ipv4_tunnel { __be64 tun_id; __be32 ipv4_src; __be32 ipv4_dst; - u16 tun_flags; + __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; }; +static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, + const struct iphdr *iph, __be64 tun_id, + __be16 tun_flags) +{ + tun_key->tun_id = tun_id; + tun_key->ipv4_src = iph->saddr; + tun_key->ipv4_dst = iph->daddr; + tun_key->ipv4_tos = iph->tos; + tun_key->ipv4_ttl = iph->ttl; + tun_key->tun_flags = tun_flags; + + /* clear struct padding. */ + memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, + sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); +} + struct sw_flow_key { struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c new file mode 100644 index 000000000000..3a8d1900aa78 --- /dev/null +++ b/net/openvswitch/vport-gre.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifdef CONFIG_NET_IPGRE_DEMUX +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "vport.h" + +/* Returns the least-significant 32 bits of a __be64. */ +static __be32 be64_get_low32(__be64 x) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)x; +#else + return (__force __be32)((__force u64)x >> 32); +#endif +} + +static __be16 filter_tnl_flags(__be16 flags) +{ + return flags & (TUNNEL_CSUM | TUNNEL_KEY); +} + +static struct sk_buff *__build_header(struct sk_buff *skb, + int tunnel_hlen) +{ + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; + struct tnl_ptk_info tpi; + + skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); + if (IS_ERR(skb)) + return NULL; + + tpi.flags = filter_tnl_flags(tun_key->tun_flags); + tpi.proto = htons(ETH_P_TEB); + tpi.key = be64_get_low32(tun_key->tun_id); + tpi.seq = 0; + gre_build_header(skb, &tpi, tunnel_hlen); + + return skb; +} + +static __be64 key_to_tunnel_id(__be32 key, __be32 seq) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)((__force u64)seq << 32 | (__force u32)key); +#else + return (__force __be64)((__force u64)key << 32 | (__force u32)seq); +#endif +} + +/* Called with rcu_read_lock and BH disabled. */ +static int gre_rcv(struct sk_buff *skb, + const struct tnl_ptk_info *tpi) +{ + struct ovs_key_ipv4_tunnel tun_key; + struct ovs_net *ovs_net; + struct vport *vport; + __be64 key; + + ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); + vport = rcu_dereference(ovs_net->vport_net.gre_vport); + if (unlikely(!vport)) + return PACKET_REJECT; + + key = key_to_tunnel_id(tpi->key, tpi->seq); + ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, + filter_tnl_flags(tpi->flags)); + + ovs_vport_receive(vport, skb, &tun_key); + return PACKET_RCVD; +} + +static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct flowi4 fl; + struct rtable *rt; + int min_headroom; + int tunnel_hlen; + __be16 df; + int err; + + if (unlikely(!OVS_CB(skb)->tun_key)) { + err = -EINVAL; + goto error; + } + + /* Route lookup */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst; + fl.saddr = OVS_CB(skb)->tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos); + fl.flowi4_mark = skb->mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); + + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + tunnel_hlen + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + if (vlan_tx_tag_present(skb)) { + if (unlikely(!__vlan_put_tag(skb, + skb->vlan_proto, + vlan_tx_tag_get(skb)))) { + err = -ENOMEM; + goto err_free_rt; + } + skb->vlan_tci = 0; + } + + /* Push Tunnel header. */ + skb = __build_header(skb, tunnel_hlen); + if (unlikely(!skb)) { + err = 0; + goto err_free_rt; + } + + df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? + htons(IP_DF) : 0; + + skb->local_df = 1; + + return iptunnel_xmit(net, rt, skb, fl.saddr, + OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, + OVS_CB(skb)->tun_key->ipv4_tos, + OVS_CB(skb)->tun_key->ipv4_ttl, df); +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + +static struct gre_cisco_protocol gre_protocol = { + .handler = gre_rcv, + .priority = 1, +}; + +static int gre_ports; +static int gre_init(void) +{ + int err; + + gre_ports++; + if (gre_ports > 1) + return 0; + + err = gre_cisco_register(&gre_protocol); + if (err) + pr_warn("cannot register gre protocol handler\n"); + + return err; +} + +static void gre_exit(void) +{ + gre_ports--; + if (gre_ports > 0) + return; + + gre_cisco_unregister(&gre_protocol); +} + +static const char *gre_get_name(const struct vport *vport) +{ + return vport_priv(vport); +} + +static struct vport *gre_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct ovs_net *ovs_net; + struct vport *vport; + int err; + + err = gre_init(); + if (err) + return ERR_PTR(err); + + ovs_net = net_generic(net, ovs_net_id); + if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { + vport = ERR_PTR(-EEXIST); + goto error; + } + + vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); + if (IS_ERR(vport)) + goto error; + + strncpy(vport_priv(vport), parms->name, IFNAMSIZ); + rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); + return vport; + +error: + gre_exit(); + return vport; +} + +static void gre_tnl_destroy(struct vport *vport) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct ovs_net *ovs_net; + + ovs_net = net_generic(net, ovs_net_id); + + rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + ovs_vport_deferred_free(vport); + gre_exit(); +} + +const struct vport_ops ovs_gre_vport_ops = { + .type = OVS_VPORT_TYPE_GRE, + .create = gre_create, + .destroy = gre_tnl_destroy, + .get_name = gre_get_name, + .send = gre_tnl_send, +}; +#endif diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 413287a1877f..f52dfb9cb5a7 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -38,6 +38,10 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, + +#ifdef CONFIG_NET_IPGRE_DEMUX + &ovs_gre_vport_ops, +#endif }; /* Protected by RCU read lock for reading, ovs_mutex for writing. */ @@ -404,3 +408,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) spin_unlock(&vport->stats_lock); } + +static void free_vport_rcu(struct rcu_head *rcu) +{ + struct vport *vport = container_of(rcu, struct vport, rcu); + + ovs_vport_free(vport); +} + +void ovs_vport_deferred_free(struct vport *vport) +{ + if (!vport) + return; + + call_rcu(&vport->rcu, free_vport_rcu); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 2d961aedd71d..376045c42f8b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -34,6 +34,11 @@ struct vport_parms; /* The following definitions are for users of the vport subsytem: */ +/* The following definitions are for users of the vport subsytem: */ +struct vport_net { + struct vport __rcu *gre_vport; +}; + int ovs_vport_init(void); void ovs_vport_exit(void); @@ -152,6 +157,7 @@ enum vport_err_type { struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); +void ovs_vport_deferred_free(struct vport *vport); #define VPORT_ALIGN 8 @@ -192,6 +198,7 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); * add yours to the list at the top of vport.c. */ extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +extern const struct vport_ops ovs_gre_vport_ops; static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) -- cgit From cf89d6b2803ab99ac596f95d585c3057d2be645c Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 20 Jun 2013 10:01:32 +0800 Subject: neigh: no need to call lookup_neigh_parms in neigh_parms_alloc neigh_table.parms always exist and is initialized,kmemdup can use it to create new neigh_parms, actually lookup_neigh_parms here will return neigh_table.parms too. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/core/neighbour.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index decaa4b9db2f..53eab513955a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1429,15 +1429,11 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p, *ref; + struct neigh_parms *p; struct net *net = dev_net(dev); const struct net_device_ops *ops = dev->netdev_ops; - ref = lookup_neigh_parms(tbl, net, 0); - if (!ref) - return NULL; - - p = kmemdup(ref, sizeof(*p), GFP_KERNEL); + p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); -- cgit From 170d6f99541600ec7512f1d2b0b0c349009098d2 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 20 Jun 2013 10:01:33 +0800 Subject: neigh: only allow init_net to change the default neigh_parms Though we don't export the /proc/sys/net/ipv[4,6]/neigh/default/ directory to the un-init_net, but we can still use cmd such as "ip ntable change name arp_cache locktime 129" to change the locktime of default neigh_parms. This patch disallows the un-init_net to find out the neigh_table.parms. So the un-init_net will failed to influence the init_net. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 53eab513955a..86f9b165bbba 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1419,7 +1419,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, for (p = &tbl->parms; p; p = p->next) { if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || - (!p->dev && !ifindex)) + (!p->dev && !ifindex && net_eq(net, &init_net))) return p; } -- cgit From dc25c676f54addb10e598daa9da9b8dd4fd487ab Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 20 Jun 2013 10:01:34 +0800 Subject: neigh: disallow un-init_net to change thresh of neigh thresh and interval are global resources, only init net can change them. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 86f9b165bbba..2569ab2cafbe 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2049,6 +2049,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) } } + err = -ENOENT; + if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || + tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && + !net_eq(net, &init_net)) + goto errout_tbl_lock; + if (tb[NDTA_THRESH1]) tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); -- cgit From c2ff682a6f5c5ae2cb23b32bb4fd7a6fb059d4fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Jun 2013 12:03:13 +0200 Subject: sit: fix an oops when IFLA_IPTUN_PROTO is not set The use of this attribute has been added in 32b8a8e59c9c (sit: add IPv4 over IPv4 support). It is optional, by default proto is IPPROTO_IPV6. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6cee844678e2..f639866b3dcf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1296,7 +1296,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); -- cgit From 9ef71e0c820987c899e454e2e7ef94bc2d4c8d04 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Tue, 18 Jun 2013 21:00:31 +0800 Subject: tcp:typo unset should be unsent Signed-off-by: Weiping Pan Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3dd46eab3b05..e2c1333ee318 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -185,7 +185,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) u32 tcp_default_init_rwnd(u32 mss) { /* Initial receive window should be twice of TCP_INIT_CWND to - * enable proper sending of new unset data during fast recovery + * enable proper sending of new unsent data during fast recovery * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a * limit when mss is larger than 1460. */ -- cgit From 2c0740e4e122239bcf6127fd2063733c5fb20c93 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 17 Jun 2013 22:26:52 -0400 Subject: sctp: Convert __list_for_each use to list_for_each Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 57b568c38ef6..1de49c802d83 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -178,7 +178,7 @@ static void sctp_get_local_addr_list(struct net *net) rcu_read_lock(); for_each_netdev_rcu(net, dev) { - __list_for_each(pos, &sctp_address_families) { + list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&net->sctp.local_addr_list, dev); } -- cgit From bcefe17cffd06efdda3e7ad679ea743236e6271a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 15 Jun 2013 09:39:18 +0800 Subject: tcp: introduce a per-route knob for quick ack In previous discussions, I tried to find some reasonable heuristics for delayed ACK, however this seems not possible, according to Eric: "ACKS might also be delayed because of bidirectional traffic, and is more controlled by the application response time. TCP stack can not easily estimate it." "ACK can be incredibly useful to recover from losses in a short time. The vast majority of TCP sessions are small lived, and we send one ACK per received segment anyway at beginning or retransmits to let the sender smoothly increase its cwnd, so an auto-tuning facility wont help them that much." and according to David: "ACKs are the only information we have to detect loss. And, for the same reasons that TCP VEGAS is fundamentally broken, we cannot measure the pipe or some other receiver-side-visible piece of information to determine when it's "safe" to stretch ACK. And even if it's "safe", we should not do it so that losses are accurately detected and we don't spuriously retransmit. The only way to know when the bandwidth increases is to "test" it, by sending more and more packets until drops happen. That's why all successful congestion control algorithms must operate on explicited tested pieces of information. Similarly, it's not really possible to universally know if it's safe to stretch ACK or not." It still makes sense to enable or disable quick ack mode like what TCP_QUICK_ACK does. Similar to TCP_QUICK_ACK option, but for people who can't modify the source code and still wants to control TCP delayed ACK behavior. As David suggested, this should belong to per-path scope, since different pathes may want different behaviors. Cc: Eric Dumazet Cc: Rick Jones Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf CC: David Laight Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++++- net/ipv4/tcp_output.c | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 46271cdcf088..28af45abe062 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3717,6 +3717,7 @@ void tcp_reset(struct sock *sk) static void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst; inet_csk_schedule_ack(sk); @@ -3728,7 +3729,9 @@ static void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - inet_csk(sk)->icsk_ack.pingpong = 1; + dst = __sk_dst_get(sk); + if (!dst || !dst_metric(dst, RTAX_QUICKACK)) + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2c1333ee318..3d609490f118 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -160,6 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; + const struct dst_entry *dst = __sk_dst_get(sk); if (sysctl_tcp_slow_start_after_idle && (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) @@ -170,8 +171,9 @@ static void tcp_event_data_sent(struct tcp_sock *tp, /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - icsk->icsk_ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato && + (!dst || !dst_metric(dst, RTAX_QUICKACK))) + icsk->icsk_ack.pingpong = 1; } /* Account for an ACK we sent. */ -- cgit From c9364636dcb01a6fc37ca2c6a51c5aa0c663013c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 15 Jun 2013 03:30:10 -0700 Subject: htb: refactor struct htb_sched fields for performance htb_sched structures are big, and source of false sharing on SMP. Every time a packet is queued or dequeue, many cache lines must be touched because structures are not lay out properly. By carefully splitting htb_sched in two parts, and define sub structures to increase data locality, we can improve performance dramatically on SMP. New htb_prio structure can also be used in htb_class to increase data locality. I got 26 % performance increase on a 24 threads machine, with 200 concurrent netperf in TCP_RR mode, using a HTB hierarchy of 4 classes. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 181 +++++++++++++++++++++++++++------------------------- 1 file changed, 95 insertions(+), 86 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 7954e73d118a..c2124ea29f45 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -76,6 +76,20 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; +struct htb_prio { + union { + struct rb_root row; + struct rb_root feed; + }; + struct rb_node *ptr; + /* When class changes from state 1->2 and disconnects from + * parent's feed then we lost ptr value and start from the + * first child again. Here we store classid of the + * last valid ptr (used when ptr is NULL). + */ + u32 last_ptr_id; +}; + /* interior & leaf nodes; props specific to leaves are marked L: * To reduce false sharing, place mostly read fields at beginning, * and mostly written ones at the end. @@ -112,19 +126,12 @@ struct htb_class { union { struct htb_class_leaf { - struct Qdisc *q; - int deficit[TC_HTB_MAXDEPTH]; struct list_head drop_list; + int deficit[TC_HTB_MAXDEPTH]; + struct Qdisc *q; } leaf; struct htb_class_inner { - struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ - struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ - /* When class changes from state 1->2 and disconnects from - * parent's feed then we lost ptr value and start from the - * first child again. Here we store classid of the - * last valid ptr (used when ptr is NULL). - */ - u32 last_ptr_id[TC_HTB_NUMPRIO]; + struct htb_prio clprio[TC_HTB_NUMPRIO]; } inner; } un; s64 pq_key; @@ -135,40 +142,39 @@ struct htb_class { struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ }; +struct htb_level { + struct rb_root wait_pq; + struct htb_prio hprio[TC_HTB_NUMPRIO]; +}; + struct htb_sched { struct Qdisc_class_hash clhash; - struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ - - /* self list - roots of self generating tree */ - struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - int row_mask[TC_HTB_MAXDEPTH]; - struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + int defcls; /* class where unclassified flows go to */ + int rate2quantum; /* quant = rate / rate2quantum */ - /* self wait list - roots of wait PQs per row */ - struct rb_root wait_pq[TC_HTB_MAXDEPTH]; + /* filters for qdisc itself */ + struct tcf_proto *filter_list; - /* time of nearest event per level (row) */ - s64 near_ev_cache[TC_HTB_MAXDEPTH]; +#define HTB_WARN_TOOMANYEVENTS 0x1 + unsigned int warned; /* only one warning */ + int direct_qlen; + struct work_struct work; - int defcls; /* class where unclassified flows go to */ + /* non shaped skbs; let them go directly thru */ + struct sk_buff_head direct_queue; + long direct_pkts; - /* filters for qdisc itself */ - struct tcf_proto *filter_list; + struct qdisc_watchdog watchdog; - int rate2quantum; /* quant = rate / rate2quantum */ - s64 now; /* cached dequeue time */ - struct qdisc_watchdog watchdog; + s64 now; /* cached dequeue time */ + struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ - /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; - int direct_qlen; /* max qlen of above */ + /* time of nearest event per level (row) */ + s64 near_ev_cache[TC_HTB_MAXDEPTH]; - long direct_pkts; + int row_mask[TC_HTB_MAXDEPTH]; -#define HTB_WARN_TOOMANYEVENTS 0x1 - unsigned int warned; /* only one warning */ - struct work_struct work; + struct htb_level hlevel[TC_HTB_MAXDEPTH]; }; /* find class in global hash table using given handle */ @@ -284,7 +290,7 @@ static void htb_add_to_id_tree(struct rb_root *root, static void htb_add_to_wait_tree(struct htb_sched *q, struct htb_class *cl, s64 delay) { - struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; + struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL; cl->pq_key = q->now + delay; if (cl->pq_key == q->now) @@ -304,7 +310,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q, p = &parent->rb_left; } rb_link_node(&cl->pq_node, parent, p); - rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]); + rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq); } /** @@ -331,7 +337,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q, while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); + htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio); } } @@ -357,16 +363,18 @@ static inline void htb_remove_class_from_row(struct htb_sched *q, struct htb_class *cl, int mask) { int m = 0; + struct htb_level *hlevel = &q->hlevel[cl->level]; while (mask) { int prio = ffz(~mask); + struct htb_prio *hprio = &hlevel->hprio[prio]; mask &= ~(1 << prio); - if (q->ptr[cl->level][prio] == cl->node + prio) - htb_next_rb_node(q->ptr[cl->level] + prio); + if (hprio->ptr == cl->node + prio) + htb_next_rb_node(&hprio->ptr); - htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); - if (!q->row[cl->level][prio].rb_node) + htb_safe_rb_erase(cl->node + prio, &hprio->row); + if (!hprio->row.rb_node) m |= 1 << prio; } q->row_mask[cl->level] &= ~m; @@ -390,13 +398,13 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) int prio = ffz(~m); m &= ~(1 << prio); - if (p->un.inner.feed[prio].rb_node) + if (p->un.inner.clprio[prio].feed.rb_node) /* parent already has its feed in use so that * reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); + htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio); } p->prio_activity |= mask; cl = p; @@ -426,18 +434,19 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) int prio = ffz(~m); m &= ~(1 << prio); - if (p->un.inner.ptr[prio] == cl->node + prio) { + if (p->un.inner.clprio[prio].ptr == cl->node + prio) { /* we are removing child which is pointed to from * parent feed - forget the pointer but remember * classid */ - p->un.inner.last_ptr_id[prio] = cl->common.classid; - p->un.inner.ptr[prio] = NULL; + p->un.inner.clprio[prio].last_ptr_id = cl->common.classid; + p->un.inner.clprio[prio].ptr = NULL; } - htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); + htb_safe_rb_erase(cl->node + prio, + &p->un.inner.clprio[prio].feed); - if (!p->un.inner.feed[prio].rb_node) + if (!p->un.inner.clprio[prio].feed.rb_node) mask |= 1 << prio; } @@ -652,7 +661,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, htb_change_class_mode(q, cl, &diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq); if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } @@ -672,7 +681,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq, q->now for too many events). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static s64 htb_do_events(struct htb_sched *q, int level, +static s64 htb_do_events(struct htb_sched *q, const int level, unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of @@ -680,10 +689,12 @@ static s64 htb_do_events(struct htb_sched *q, int level, * too soon */ unsigned long stop_at = start + 2; + struct rb_root *wait_pq = &q->hlevel[level].wait_pq; + while (time_before(jiffies, stop_at)) { struct htb_class *cl; s64 diff; - struct rb_node *p = rb_first(&q->wait_pq[level]); + struct rb_node *p = rb_first(wait_pq); if (!p) return 0; @@ -692,7 +703,7 @@ static s64 htb_do_events(struct htb_sched *q, int level, if (cl->pq_key > q->now) return cl->pq_key; - htb_safe_rb_erase(p, q->wait_pq + level); + htb_safe_rb_erase(p, wait_pq); diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) @@ -736,8 +747,7 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, * * Find leaf where current feed pointers points to. */ -static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, - struct rb_node **pptr, u32 * pid) +static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) { int i; struct { @@ -746,10 +756,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_ON(!tree->rb_node); - sp->root = tree->rb_node; - sp->pptr = pptr; - sp->pid = pid; + BUG_ON(!hprio->row.rb_node); + sp->root = hprio->row.rb_node; + sp->pptr = &hprio->ptr; + sp->pid = &hprio->last_ptr_id; for (i = 0; i < 65535; i++) { if (!*sp->pptr && *sp->pid) { @@ -776,12 +786,15 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, } } else { struct htb_class *cl; + struct htb_prio *clp; + cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); if (!cl->level) return cl; - (++sp)->root = cl->un.inner.feed[prio].rb_node; - sp->pptr = cl->un.inner.ptr + prio; - sp->pid = cl->un.inner.last_ptr_id + prio; + clp = &cl->un.inner.clprio[prio]; + (++sp)->root = clp->feed.rb_node; + sp->pptr = &clp->ptr; + sp->pid = &clp->last_ptr_id; } } WARN_ON(1); @@ -791,15 +804,16 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, /* dequeues packet at given priority and level; call only if * you are sure that there is active class at prio/level */ -static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, - int level) +static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, + const int level) { struct sk_buff *skb = NULL; struct htb_class *cl, *start; + struct htb_level *hlevel = &q->hlevel[level]; + struct htb_prio *hprio = &hlevel->hprio[prio]; + /* look initial class up in the row */ - start = cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + start = cl = htb_lookup_leaf(hprio, prio); do { next: @@ -819,9 +833,7 @@ next: if ((q->row_mask[level] & (1 << prio)) == 0) return NULL; - next = htb_lookup_leaf(q->row[level] + prio, - prio, q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + next = htb_lookup_leaf(hprio, prio); if (cl == start) /* fix start if we just deleted it */ start = next; @@ -834,11 +846,9 @@ next: break; qdisc_warn_nonwc("htb", cl->un.leaf.q); - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); - cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr: + &q->hlevel[0].hprio[prio].ptr); + cl = htb_lookup_leaf(hprio, prio); } while (cl != start); @@ -847,8 +857,8 @@ next: cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); if (cl->un.leaf.deficit[level] < 0) { cl->un.leaf.deficit[level] += cl->quantum; - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); + htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr : + &q->hlevel[0].hprio[prio].ptr); } /* this used to be after charge_class but this constelation * gives us slightly better performance @@ -888,15 +898,14 @@ ok: for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; - s64 event; + s64 event = q->near_ev_cache[level]; - if (q->now >= q->near_ev_cache[level]) { + if (q->now >= event) { event = htb_do_events(q, level, start_at); if (!event) event = q->now + NSEC_PER_SEC; q->near_ev_cache[level] = event; - } else - event = q->near_ev_cache[level]; + } if (next_event > event) next_event = event; @@ -976,10 +985,8 @@ static void htb_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; - memset(q->row, 0, sizeof(q->row)); + memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); - memset(q->wait_pq, 0, sizeof(q->wait_pq)); - memset(q->ptr, 0, sizeof(q->ptr)); for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); } @@ -1200,7 +1207,8 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); if (parent->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); + htb_safe_rb_erase(&parent->pq_node, + &q->hlevel[parent->level].wait_pq); parent->level = 0; memset(&parent->un.inner, 0, sizeof(parent->un.inner)); @@ -1289,7 +1297,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, + &q->hlevel[cl->level].wait_pq); if (last_child) htb_parent_to_leaf(q, cl, new_q); @@ -1411,7 +1420,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - htb_safe_rb_erase(&parent->pq_node, q->wait_pq); + htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level -- cgit From af92e5425e4a7cfbc9b85dc268acfaadb551cc56 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Sat, 15 Jun 2013 23:04:56 +0300 Subject: inet: frag , remove an empty ifdef. This patch removes an empty ifdef from inet_frag_intern() in net/ipv4/inet_fragment.c. commit b67bfe0d42cac56c512dd5da4b1b347a23f4b70a (hlist: drop the node parameter from iterators) removed hlist from net/ipv4/inet_fragment.c, but did not remove the enclosing ifdef command, which is now empty. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7e06641e36ae..4b864430a8c4 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -247,8 +247,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, { struct inet_frag_bucket *hb; struct inet_frag_queue *qp; -#ifdef CONFIG_SMP -#endif unsigned int hash; read_lock(&f->lock); /* Protects against hash rebuild */ -- cgit From 130ffbc2638ddc290fcbabe1b9ce6a5d333a6a97 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 12 Jun 2013 17:54:51 +0200 Subject: netfilter: check return code from nla_parse_tested These are the only calls under net/ that do not check nla_parse_nested() for its error code, but simply continue execution. If parsing of netlink attributes fails, we should return with an error instead of continuing. In nearly all of these calls we have a policy attached, that is being type verified during nla_parse_nested(), which we would miss checking for otherwise. Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 30 +++++++++++++++++++++++------- net/netfilter/nfnetlink_cthelper.c | 16 ++++++++++++---- net/netfilter/nfnetlink_cttimeout.c | 6 ++++-- 3 files changed, 39 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6d0f8a17c5b7..f83a52298efe 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -828,7 +828,9 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) struct nf_conntrack_l3proto *l3proto; int ret = 0; - nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + if (ret < 0) + return ret; rcu_read_lock(); l3proto = __nf_ct_l3proto_find(tuple->src.l3num); @@ -895,7 +897,9 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + if (err < 0) + return err; if (!tb[CTA_TUPLE_IP]) return -EINVAL; @@ -946,9 +950,12 @@ static inline int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, struct nlattr **helpinfo) { + int err; struct nlattr *tb[CTA_HELP_MAX+1]; - nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + if (err < 0) + return err; if (!tb[CTA_HELP_NAME]) return -EINVAL; @@ -1431,7 +1438,9 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] struct nf_conntrack_l4proto *l4proto; int err = 0; - nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + if (err < 0) + return err; rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); @@ -1452,9 +1461,12 @@ static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { static inline int change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) { + int err; struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; - nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + if (err < 0) + return err; if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) return -EINVAL; @@ -2115,7 +2127,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + if (ret < 0) + return ret; spin_lock_bh(&nf_conntrack_lock); ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); @@ -2710,7 +2724,9 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_tuple nat_tuple = {}; int err; - nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + if (err < 0) + return err; if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE]) return -EINVAL; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a191b6db657e..9e287cb56a04 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -67,9 +67,12 @@ static int nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + if (err < 0) + return err; if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; @@ -121,9 +124,12 @@ static int nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + if (err < 0) + return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || @@ -153,8 +159,10 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set); + ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (ret < 0) + return ret; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 65074dfb9383..50580494148d 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -59,8 +59,10 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; - nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy); + ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, + attr, l4proto->ctnl_timeout.nla_policy); + if (ret < 0) + return ret; ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, &timeout->data); -- cgit From 6547a221871f139cc56328a38105d47c14874cbe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Jun 2013 17:31:28 +0200 Subject: netfilter: nf_conntrack: avoid large timeout for mid-stream pickup When loose tracking is enabled (default), non-syn packets cause creation of new conntracks in established state with default timeout for established state (5 days). This causes the table to fill up with UNREPLIED when the 'new ack' packet happened to be the last-ack of a previous, already timed-out connection. Consider: A 192.168.x.52792 > 10.184.y.80: F, 426:426(0) ack 9237 win 255 B 10.184.y.80 > 192.168.x.52792: ., ack 427 win 123 <61 second pause> C 10.184.y.80 > 192.168.x.52792: F, 9237:9237(0) ack 427 win 123 D 192.168.x.52792 > 10.184.y.80: ., ack 9238 win 255 B moves conntrack to CLOSE_WAIT and will kill it after 60 second timeout, C is ignored (FIN set), but last packet (D) causes new ct with 5-days timeout. Use UNACK timeout (5 minutes) instead to get rid of these entries sooner when in ESTABLISHED state without having seen traffic in both directions. Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4d4d8f1d01fc..7dcc376eea5f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1043,6 +1043,12 @@ static int tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection + * pickup with loose=1. Avoid large ESTABLISHED timeout. + */ + if (new_state == TCP_CONNTRACK_ESTABLISHED && + timeout > timeouts[TCP_CONNTRACK_UNACK]) + timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) -- cgit From 681f130f39e10087475383e6771b9366e26bab0c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jun 2013 05:52:22 -0700 Subject: netfilter: xt_socket: add XT_SOCKET_NOWILDCARD flag xt_socket module can be a nice replacement to conntrack module in some cases (SYN filtering for example) But it lacks the ability to match the 3rd packet of TCP handshake (ACK coming from the client). Add a XT_SOCKET_NOWILDCARD flag to disable the wildcard mechanism. The wildcard is the legacy socket match behavior, that ignores LISTEN sockets bound to INADDR_ANY (or ipv6 equivalent) iptables -I INPUT -p tcp --syn -j SYN_CHAIN iptables -I INPUT -m socket --nowildcard -j ACCEPT Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 70 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 02704245710e..f8b71911037a 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -163,8 +163,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY, + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, @@ -197,7 +200,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) } static bool -socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } @@ -259,7 +262,7 @@ extract_icmp6_fields(const struct sk_buff *skb, } static bool -socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -302,8 +305,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); /* Ignore non-transparent sockets, @@ -331,6 +337,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_v1_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V1) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + return -EINVAL; + } + return 0; +} + +static int socket_mt_v2_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V2) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + return -EINVAL; + } + return 0; +} + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", @@ -345,7 +373,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt4_v1, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), @@ -356,7 +385,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, - .match = socket_mt6_v1, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v1_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#endif + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV4, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v2_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), -- cgit From c5623556fc61804713b1569b4f748e36956bc6e8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:33 +0300 Subject: Bluetooth: Handle LE L2CAP signalling in its own function The LE L2CAP signalling channel follows its own rules and will continue to evolve independently from the BR/EDR signalling channel. Therefore, it makes sense to have a clear split from BR/EDR by having a dedicated function for handling LE signalling commands. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 53 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4be6a264b475..ab9961118181 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5292,6 +5292,51 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, } } +static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, + struct sk_buff *skb) +{ + u8 *data = skb->data; + int len = skb->len; + struct l2cap_cmd_hdr cmd; + int err; + + l2cap_raw_recv(conn, skb); + + while (len >= L2CAP_CMD_HDR_SIZE) { + u16 cmd_len; + memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); + data += L2CAP_CMD_HDR_SIZE; + len -= L2CAP_CMD_HDR_SIZE; + + cmd_len = le16_to_cpu(cmd.len); + + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, + cmd.ident); + + if (cmd_len > len || !cmd.ident) { + BT_DBG("corrupted command"); + break; + } + + err = l2cap_le_sig_cmd(conn, &cmd, data); + if (err) { + struct l2cap_cmd_rej_unk rej; + + BT_ERR("Wrong link type (%d)", err); + + /* FIXME: Map err to a valid reason */ + rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); + } + + data += cmd_len; + len -= cmd_len; + } + + kfree_skb(skb); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -5318,11 +5363,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, break; } - if (conn->hcon->type == LE_LINK) - err = l2cap_le_sig_cmd(conn, &cmd, data); - else - err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); - + err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); if (err) { struct l2cap_cmd_rej_unk rej; @@ -6395,6 +6436,8 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) switch (cid) { case L2CAP_CID_LE_SIGNALING: + l2cap_le_sig_channel(conn, skb); + break; case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; -- cgit From 073d1cf35fe45d89f5a553c21eea18b504dd6937 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:35 +0300 Subject: Bluetooth: Rename L2CAP_CID_LE_DATA to L2CAP_CID_ATT In future Core Specification versions the ATT CID will be just one of many possible CIDs that can be used for data transfer. Therefore, it makes sense to rename the define for the ATT CID to something less ambigous. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 14 +++++++------- net/bluetooth/l2cap_sock.c | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ab9961118181..c87f8f94083d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -504,8 +504,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) if (conn->hcon->type == LE_LINK) { /* LE connection */ chan->omtu = L2CAP_DEFAULT_MTU; - chan->scid = L2CAP_CID_LE_DATA; - chan->dcid = L2CAP_CID_LE_DATA; + chan->scid = L2CAP_CID_ATT; + chan->dcid = L2CAP_CID_ATT; } else { /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); @@ -1344,7 +1344,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) BT_DBG(""); /* Check if we have socket listening on cid */ - pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA, + pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT, conn->src, conn->dst); if (!pchan) return; @@ -1792,7 +1792,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, auth_type = l2cap_get_auth_type(chan); - if (chan->dcid == L2CAP_CID_LE_DATA) + if (chan->dcid == L2CAP_CID_ATT) hcon = hci_connect(hdev, LE_LINK, dst, dst_type, chan->sec_level, auth_type); else @@ -6397,7 +6397,7 @@ static void l2cap_att_channel(struct l2cap_conn *conn, { struct l2cap_chan *chan; - chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA, + chan = l2cap_global_chan_by_scid(0, L2CAP_CID_ATT, conn->src, conn->dst); if (!chan) goto drop; @@ -6448,7 +6448,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) l2cap_conless_channel(conn, psm, skb); break; - case L2CAP_CID_LE_DATA: + case L2CAP_CID_ATT: l2cap_att_channel(conn, skb); break; @@ -6574,7 +6574,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) continue; } - if (chan->scid == L2CAP_CID_LE_DATA) { + if (chan->scid == L2CAP_CID_ATT) { if (!status && encrypt) { chan->sec_level = hcon->sec_level; l2cap_chan_ready(chan); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 36fed40c162c..0098af80b213 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -466,7 +466,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { - case L2CAP_CID_LE_DATA: + case L2CAP_CID_ATT: if (mtu < L2CAP_LE_MIN_MTU) return false; break; @@ -630,7 +630,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, conn = chan->conn; /*change security for LE channels */ - if (chan->scid == L2CAP_CID_LE_DATA) { + if (chan->scid == L2CAP_CID_ATT) { if (!conn->hcon->out) { err = -EINVAL; break; -- cgit From f224ca5fc207a9957164e6f42ec6766da0f55d54 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:36 +0300 Subject: Bluetooth: Fix LE vs BR/EDR selection when connecting The choice between LE and BR/EDR should be made on the destination address type instead of the destination CID. This is particularly important when in the future more than one CID will be allowed for LE. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c87f8f94083d..29398293d501 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1792,7 +1792,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, auth_type = l2cap_get_auth_type(chan); - if (chan->dcid == L2CAP_CID_ATT) + if (bdaddr_type_is_le(dst_type)) hcon = hci_connect(hdev, LE_LINK, dst, dst_type, chan->sec_level, auth_type); else -- cgit From 141d57065afd11977c4d346f64b25350445bf689 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:37 +0300 Subject: Bluetooth: Fix EBUSY condition test in l2cap_chan_connect The current test in l2cap_chan_connect is intended to protect against multiple conflicting connect attempts. However, it assumes that there will ever only be a single CID that is connected to, which is not true. We do need to check for conflicts with connect attempts to the same destination CID but this check is not in anyway specific to LE but can be applied to BR/EDR as well. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 29398293d501..640423b4f411 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1811,16 +1811,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, goto done; } - if (hcon->type == LE_LINK) { - err = 0; - - if (!list_empty(&conn->chan_l)) { - err = -EBUSY; - hci_conn_drop(hcon); - } - - if (err) - goto done; + if (cid && __l2cap_get_chan_by_dcid(conn, cid)) { + hci_conn_drop(hcon); + err = -EBUSY; + goto done; } /* Update source addr of the socket */ -- cgit From 9f22398ce4baf816535415e65949d03f55a7973a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:38 +0300 Subject: Bluetooth: Fix hardcoding ATT CID in __l2cap_chan_add() Since in the future more than the ATT CID may be permissible we should not be hardcoding it for all LE connections in __l2cap_chan_add(). Instead, the source ATT CID should only be set if the destination is also ATT, and in other cases we should just use the existing dynamic CID allocation function. Assigning scid based on dcid means that whenever __l2cap_chan_add() is called that chan->dcid is properly initialized. l2cap_le_conn_ready() wasn't initializing is properly so this is also taken care of in this patch. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 640423b4f411..4803610187ed 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -504,8 +504,10 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) if (conn->hcon->type == LE_LINK) { /* LE connection */ chan->omtu = L2CAP_DEFAULT_MTU; - chan->scid = L2CAP_CID_ATT; - chan->dcid = L2CAP_CID_ATT; + if (chan->dcid == L2CAP_CID_ATT) + chan->scid = L2CAP_CID_ATT; + else + chan->scid = l2cap_alloc_cid(conn); } else { /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); @@ -1357,6 +1359,8 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) if (!chan) goto clean; + chan->dcid = L2CAP_CID_ATT; + sk = chan->sk; hci_conn_hold(conn->hcon); -- cgit From d8729922b474eab65ca738028a2e69fb12e2eaa6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:39 +0300 Subject: Bluetooth: Add clarifying comment to l2cap_conn_ready() There is an extra call to smp_conn_security() for outgoing LE connections from l2cap_conn_ready() but the reason for this call is far from clear. After a bit of commit history research and using git blame I found out that this extra call is for socket-less pairing processes added by commit 160dc6ac1. This patch adds a clarifying comment to the code for this. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4803610187ed..417d17723ee9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1387,6 +1387,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) if (!hcon->out && hcon->type == LE_LINK) l2cap_le_conn_ready(conn); + /* For outgoing pairing which doesn't necessarily have an + * associated socket (e.g. mgmt_pair_device). + */ if (hcon->out && hcon->type == LE_LINK) smp_conn_security(hcon, hcon->pending_sec_level); -- cgit From 97f57c0b14ad2ef0628fc6db48cd6c08e0e52c50 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:40 +0300 Subject: Bluetooth: Fix duplicate call to l2cap_chan_ready() In l2cap_le_conn_ready() after doing l2cap_chann_add() the LE channel is part of the list which is subsequently iterated in l2cap_conn_ready() in this loop each channel will get l2cap_chan_ready() called which would result in trying to set the channel two times into BT_CONNECTED state. Instead it makes sense to just add the channel but not call chan_ready in l2cap_le_conn_ready, which is what this patch does. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 417d17723ee9..843463ecd7bc 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1371,8 +1371,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) l2cap_chan_add(conn, chan); - l2cap_chan_ready(chan); - clean: release_sock(parent); } -- cgit From 60bac184c9c7df2299aca4dc45c4b5b486f49a89 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:41 +0300 Subject: Bluetooth: Remove useless sk variable in l2cap_le_conn_ready The sk variable is of quite little use since it's only used to simplify access in the two bt_sk() calls. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 843463ecd7bc..1557c3c774f1 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1340,7 +1340,7 @@ static struct l2cap_chan *l2cap_global_chan_by_scid(int state, u16 cid, static void l2cap_le_conn_ready(struct l2cap_conn *conn) { - struct sock *parent, *sk; + struct sock *parent; struct l2cap_chan *chan, *pchan; BT_DBG(""); @@ -1361,13 +1361,11 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) chan->dcid = L2CAP_CID_ATT; - sk = chan->sk; - hci_conn_hold(conn->hcon); conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - bacpy(&bt_sk(sk)->src, conn->src); - bacpy(&bt_sk(sk)->dst, conn->dst); + bacpy(&bt_sk(chan->sk)->src, conn->src); + bacpy(&bt_sk(chan->sk)->dst, conn->dst); l2cap_chan_add(conn, chan); -- cgit From af1c01349ecc2b8ab2c329e4dbd46e9018469bd1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:42 +0300 Subject: Bluetooth: Remove unnecessary L2CAP channel state check In l2cap_att_channel() we're only interested in the BT_CONNECTED state so this state can directly be passed to l2cap_global_chan_by_scid(). This way there's no need to do any additional state check later. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1557c3c774f1..55c6836796f7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6394,16 +6394,13 @@ static void l2cap_att_channel(struct l2cap_conn *conn, { struct l2cap_chan *chan; - chan = l2cap_global_chan_by_scid(0, L2CAP_CID_ATT, + chan = l2cap_global_chan_by_scid(BT_CONNECTED, L2CAP_CID_ATT, conn->src, conn->dst); if (!chan) goto drop; BT_DBG("chan %p, len %d", chan, skb->len); - if (chan->state != BT_BOUND && chan->state != BT_CONNECTED) - goto drop; - if (chan->imtu < skb->len) goto drop; -- cgit From 5ee9891dd8a63df1bf2ccd437872ad30a5850449 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:43 +0300 Subject: Bluetooth: Simplify hci_conn_hold/drop logic for L2CAP The L2CAP code has been incrementing the hci_conn reference for each l2cap_chan instance in the l2cap_conn list. Likewise, the reference is dropped each time an l2cap_chan is removed from the list. The reference counting policy with respect to removal has been clear and explicit in the l2cap_chan_del function, however for addition the function calling 2cap_chan_add has always had to do a separate hci_conn_hold call. What made the counting even more hard to follow is that the hci_connect() procedure increments the reference and the L2CAP layer making this call took advantage of it to use it as its own reference. This patch aims to clarify things by having the call to hci_conn_hold inside __l2cap_chan_add, thereby removing the need to do it in the functions calling __l2cap_chan_add. The reference count for hci_connect is still kept as it's necessary for users such as mgmt_pair_device, however for the L2CAP layer it means that an extra call to hci_conn_drop must be performed once l2cap_chan_add has been done. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 55c6836796f7..d7b501ba2716 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -545,6 +545,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) l2cap_chan_hold(chan); + hci_conn_hold(conn->hcon); + list_add(&chan->list, &conn->chan_l); } @@ -1361,7 +1363,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) chan->dcid = L2CAP_CID_ATT; - hci_conn_hold(conn->hcon); conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&bt_sk(chan->sk)->src, conn->src); @@ -1827,6 +1828,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, l2cap_chan_add(conn, chan); l2cap_chan_lock(chan); + /* l2cap_chan_add takes its own ref so we can drop this one */ + hci_conn_drop(hcon); + l2cap_state_change(chan, BT_CONNECT); __set_chan_timer(chan, sk->sk_sndtimeo); @@ -3748,8 +3752,6 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, sk = chan->sk; - hci_conn_hold(conn->hcon); - bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); chan->psm = psm; -- cgit From 0cc59a72c723979cf8973aff4df874a5f7a697c7 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:44 +0300 Subject: Bluetooth: Remove useless hci_conn disc_timeout setting There's no need to reset disc_timeout in l2cap_le_conn_ready since HCI_DISCONN_TIMEOUT is the default when the hci_conn is created and there should be no way for it to get changed between creation and l2cap_le_conn_ready being called. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d7b501ba2716..8ae6a21cf643 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1363,8 +1363,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) chan->dcid = L2CAP_CID_ATT; - conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - bacpy(&bt_sk(chan->sk)->src, conn->src); bacpy(&bt_sk(chan->sk)->dst, conn->dst); -- cgit From 44f3b0fbaa9bfa7a88577ee8c446d0a78cb1d73a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:45 +0300 Subject: Bluetooth: Fix multiple LE socket handling The LE ATT server socket needs to be superseded by any ATT client sockets. Previously this was done by looking at the hcon->out variable (indicating whether the connection is outgoing or incoming) which is a too crude way of determining whether the server socket needs to be picked or not (an outgoing connection doesn't necessarily mean that an ATT client socket has triggered it). This patch extends the ATT server socket lookup function (l2cap_le_conn_ready) to be used for all LE connections (regardless of the hcon->out value) and adds an internal check into the function for the existence of any ATT client sockets (in which case the server socket should be skipped). For this to work reliably all lookups must be done while the l2cap_conn->chan_lock is held, meaning also that the call to l2cap_chan_add needs to be changed to its lockless __l2cap_chan_add counterpart. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8ae6a21cf643..9af3a76844f7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1353,6 +1353,10 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) if (!pchan) return; + /* Client ATT sockets should override the server one */ + if (__l2cap_get_chan_by_dcid(conn, L2CAP_CID_ATT)) + return; + parent = pchan->sk; lock_sock(parent); @@ -1366,7 +1370,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) bacpy(&bt_sk(chan->sk)->src, conn->src); bacpy(&bt_sk(chan->sk)->dst, conn->dst); - l2cap_chan_add(conn, chan); + __l2cap_chan_add(conn, chan); clean: release_sock(parent); @@ -1379,9 +1383,6 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) BT_DBG("conn %p", conn); - if (!hcon->out && hcon->type == LE_LINK) - l2cap_le_conn_ready(conn); - /* For outgoing pairing which doesn't necessarily have an * associated socket (e.g. mgmt_pair_device). */ @@ -1390,6 +1391,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) mutex_lock(&conn->chan_lock); + if (hcon->type == LE_LINK) + l2cap_le_conn_ready(conn); + list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); -- cgit From 1f9b9a5dc5bb8ee360db9d32b2090aac497ae82a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:27 -0300 Subject: Bluetooth: Make inquiry_cache_flush non-static In order to use HCI request framework in start_discovery, we'll need to call inquiry_cache_flush in mgmt.c. Therefore, this patch adds the hci_ prefix to inquiry_cache_flush and makes it non-static. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ace5e55fe5a3..43c63877c5b7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -751,7 +751,7 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) hdev->discovery.state = state; } -static void inquiry_cache_flush(struct hci_dev *hdev) +void hci_inquiry_cache_flush(struct hci_dev *hdev) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *p, *n; @@ -964,7 +964,7 @@ int hci_inquiry(void __user *arg) hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); do_inquiry = 1; } hci_dev_unlock(hdev); @@ -1230,7 +1230,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work_sync(&hdev->le_scan_disable); hci_dev_lock(hdev); - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); @@ -1331,7 +1331,7 @@ int hci_dev_reset(__u16 dev) skb_queue_purge(&hdev->cmd_q); hci_dev_lock(hdev); - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); @@ -3562,7 +3562,7 @@ int hci_do_inquiry(struct hci_dev *hdev, u8 length) if (test_bit(HCI_INQUIRY, &hdev->flags)) return -EINPROGRESS; - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); memset(&cp, 0, sizeof(cp)); memcpy(&cp.lap, lap, sizeof(cp.lap)); -- cgit From 7c3077207c705d0aa200ce22d49a0376d194dfca Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:28 -0300 Subject: Bluetooth: Update start_discovery to use HCI request This patch modifies the start_discovery function so it uses the HCI request framework. We build the HCI request according to the discovery type (add inquiry or LE scan HCI commands) and run the HCI request. We also register the start_discovery_complete callback which handles mgmt command complete events for this command. This way, we move all start_ discovery mgmt handling code spread in hci_event.c to a single place in mgmt.c. This patch also merges the LE-only and interleaved discovery type cases since these cases are pretty much the same now. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 85 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f8ecbc70293d..434df715448e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2650,11 +2650,51 @@ int mgmt_interleaved_discovery(struct hci_dev *hdev) return err; } +static void start_discovery_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status %d", status); + + if (status) { + hci_dev_lock(hdev); + mgmt_start_discovery_failed(hdev, status); + hci_dev_unlock(hdev); + return; + } + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_dev_unlock(hdev); + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + LE_SCAN_TIMEOUT_LE_ONLY); + break; + + case DISCOV_TYPE_INTERLEAVED: + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + LE_SCAN_TIMEOUT_BREDR_LE); + break; + + case DISCOV_TYPE_BREDR: + break; + + default: + BT_ERR("Invalid discovery type %d", hdev->discovery.type); + } +} + static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; struct pending_cmd *cmd; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + struct hci_cp_inquiry inq_cp; + struct hci_request req; + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; int err; BT_DBG("%s", hdev->name); @@ -2687,6 +2727,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; + hci_req_init(&req, hdev); + switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: if (!lmp_bredr_capable(hdev)) { @@ -2696,10 +2738,23 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY); + mgmt_pending_remove(cmd); + goto failed; + } + + hci_inquiry_cache_flush(hdev); + + memset(&inq_cp, 0, sizeof(inq_cp)); + memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap)); + inq_cp.length = INQUIRY_LEN_BREDR; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp); break; case DISCOV_TYPE_LE: + case DISCOV_TYPE_INTERLEAVED: if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, MGMT_STATUS_NOT_SUPPORTED); @@ -2707,20 +2762,40 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); - break; - - case DISCOV_TYPE_INTERLEAVED: - if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) { + if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && + !lmp_bredr_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, MGMT_STATUS_NOT_SUPPORTED); mgmt_pending_remove(cmd); goto failed; } - err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE); + if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_REJECTED); + mgmt_pending_remove(cmd); + goto failed; + } + + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY); + mgmt_pending_remove(cmd); + goto failed; + } + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(LE_SCAN_INT); + param_cp.window = cpu_to_le16(LE_SCAN_WIN); + hci_req_add(&req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); break; default: @@ -2730,6 +2805,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + err = hci_req_run(&req, start_discovery_complete); if (err < 0) mgmt_pending_remove(cmd); else -- cgit From fef5234a791507a2fe1ccfc85f080523fe762320 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:29 -0300 Subject: Bluetooth: Remove start discovery handling from hci_event.c Since all mgmt start discovery command complete events are now handled in start_discovery_complete callback in mgmt.c, we can remove this handling from hci_event.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b93cd2eb5d58..0e71e6c47391 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -943,12 +943,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (status) { - hci_dev_lock(hdev); - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); - return; - } } static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, @@ -965,18 +959,10 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCAN_ENABLE: - if (status) { - hci_dev_lock(hdev); - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); + if (status) return; - } set_bit(HCI_LE_SCAN, &hdev->dev_flags); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - hci_dev_unlock(hdev); break; case LE_SCAN_DISABLE: @@ -1077,18 +1063,10 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) if (status) { hci_conn_check_pending(hdev); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); return; } set_bit(HCI_INQUIRY, &hdev->flags); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - hci_dev_unlock(hdev); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) -- cgit From 41dc2bd6d13bfccc34d05586be2eb65876a5990a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:30 -0300 Subject: Bluetooth: Make mgmt_start_discovery_failed static mgmt_start_discovery_failed is now only used in mgmt.c so we can make it a local function. This patch also moves the mgmt_start_ discovery_failed definition up in mgmt.c to avoid forward declaration. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 434df715448e..a9bd271a736e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2650,6 +2650,27 @@ int mgmt_interleaved_discovery(struct hci_dev *hdev) return err; } +static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + u8 type; + int err; + + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + + cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; + + type = hdev->discovery.type; + + err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), + &type, sizeof(type)); + mgmt_pending_remove(cmd); + + return err; +} + static void start_discovery_complete(struct hci_dev *hdev, u8 status) { BT_DBG("status %d", status); @@ -4190,27 +4211,6 @@ int mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, sizeof(*ev) + eir_len, NULL); } -int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) -{ - struct pending_cmd *cmd; - u8 type; - int err; - - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - - cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); - if (!cmd) - return -ENOENT; - - type = hdev->discovery.type; - - err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), - &type, sizeof(type)); - mgmt_pending_remove(cmd); - - return err; -} - int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) { struct pending_cmd *cmd; -- cgit From 0d8cc935e01c0fd1312a10881f4c0f1c4b4d05ab Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:31 -0300 Subject: Bluetooth: Move discovery macros to hci_core.h Some of discovery macros will be used in hci_core so we need to define them in common place such as hci_core.h. Thus, this patch moves discovery macros to hci_core.h and also adds the DISCOV_ prefix to them. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a9bd271a736e..6b31e93af761 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -102,18 +102,6 @@ static const u16 mgmt_events[] = { MGMT_EV_PASSKEY_NOTIFY, }; -/* - * These LE scan and inquiry parameters were chosen according to LE General - * Discovery Procedure specification. - */ -#define LE_SCAN_WIN 0x12 -#define LE_SCAN_INT 0x12 -#define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240) -#define LE_SCAN_TIMEOUT_BREDR_LE msecs_to_jiffies(5120) - -#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */ -#define INQUIRY_LEN_BREDR_LE 0x04 /* TGAP(100)/2 */ - #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) #define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \ @@ -2641,7 +2629,7 @@ int mgmt_interleaved_discovery(struct hci_dev *hdev) hci_dev_lock(hdev); - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR_LE); + err = hci_do_inquiry(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN); if (err < 0) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); @@ -2689,12 +2677,12 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) switch (hdev->discovery.type) { case DISCOV_TYPE_LE: queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - LE_SCAN_TIMEOUT_LE_ONLY); + DISCOV_LE_TIMEOUT); break; case DISCOV_TYPE_INTERLEAVED: queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - LE_SCAN_TIMEOUT_BREDR_LE); + DISCOV_INTERLEAVED_TIMEOUT); break; case DISCOV_TYPE_BREDR: @@ -2770,7 +2758,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, memset(&inq_cp, 0, sizeof(inq_cp)); memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap)); - inq_cp.length = INQUIRY_LEN_BREDR; + inq_cp.length = DISCOV_BREDR_INQUIRY_LEN; hci_req_add(&req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp); break; @@ -2807,8 +2795,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, memset(¶m_cp, 0, sizeof(param_cp)); param_cp.type = LE_SCAN_ACTIVE; - param_cp.interval = cpu_to_le16(LE_SCAN_INT); - param_cp.window = cpu_to_le16(LE_SCAN_WIN); + param_cp.interval = cpu_to_le16(DISCOV_LE_SCAN_INT); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); hci_req_add(&req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), ¶m_cp); -- cgit From 4c87eaab01df271c81f6a68e3c28dbd44d348004 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:32 -0300 Subject: Bluetooth: Use HCI request in interleaved discovery In order to have a better HCI error handling in interleaved discovery functionality, we should use the HCI request framework. This patch updates le_scan_disable_work function so it uses the HCI request framework instead of the hci_send_cmd helper. A complete callback is registered (le_scan_disable_work_complete function) so we are able to trigger the inquiry procedure (if we are running the interleaved discovery) or to stop the discovery procedure (if we are running LE-only discovery). This patch also removes the extra logic in hci_cc_le_set_scan_enable to trigger the inquiry procedure and the mgmt_interleaved_discovery function since they become useless. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++- net/bluetooth/hci_event.c | 10 -------- net/bluetooth/mgmt.c | 17 ------------- 3 files changed, 64 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 43c63877c5b7..9270d7ee489d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2067,17 +2067,80 @@ int hci_cancel_le_scan(struct hci_dev *hdev) return 0; } +static void inquiry_complete(struct hci_dev *hdev, u8 status) +{ + if (status) { + BT_ERR("Failed to start inquiry: status %d", status); + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + return; + } +} + +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +{ + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + struct hci_request req; + struct hci_cp_inquiry cp; + int err; + + if (status) { + BT_ERR("Failed to disable LE scanning: status %d", status); + return; + } + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + break; + + case DISCOV_TYPE_INTERLEAVED: + hci_req_init(&req, hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + hci_dev_lock(hdev); + + hci_inquiry_cache_flush(hdev); + + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + } + + hci_dev_unlock(hdev); + break; + } +} + static void le_scan_disable_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan_disable.work); struct hci_cp_le_set_scan_enable cp; + struct hci_request req; + int err; BT_DBG("%s", hdev->name); + hci_req_init(&req, hdev); + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_DISABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + err = hci_req_run(&req, le_scan_disable_work_complete); + if (err) + BT_ERR("Disable LE scanning request failed: err %d", err); } static void le_scan_work(struct work_struct *work) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0e71e6c47391..faaf1f31345d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -974,16 +974,6 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, } clear_bit(HCI_LE_SCAN, &hdev->dev_flags); - - if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && - hdev->discovery.state == DISCOVERY_FINDING) { - mgmt_interleaved_discovery(hdev); - } else { - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - } - break; default: diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6b31e93af761..743100f3ab9c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2621,23 +2621,6 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, return err; } -int mgmt_interleaved_discovery(struct hci_dev *hdev) -{ - int err; - - BT_DBG("%s", hdev->name); - - hci_dev_lock(hdev); - - err = hci_do_inquiry(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN); - if (err < 0) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - - hci_dev_unlock(hdev); - - return err; -} - static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) { struct pending_cmd *cmd; -- cgit From 0e05bba6f6f8c2dca7a13fe0566742277e92df07 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:33 -0300 Subject: Bluetooth: Update stop_discovery to use HCI request This patch modifies the stop_discovery function so it uses the HCI request framework. The HCI request is built according to the current discovery state (inquiry, LE scanning or name resolving) and a complete callback is register to handle the command complete event for the stop discovery command. This way, we move all stop_discovery mgmt handling code spread in hci_event.c to a single place in mgmt.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 743100f3ab9c..c33bc4f4d006 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2808,6 +2808,23 @@ failed: return err; } +static void stop_discovery_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status %d", status); + + hci_dev_lock(hdev); + + if (status) { + mgmt_stop_discovery_failed(hdev, status); + goto unlock; + } + + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + +unlock: + hci_dev_unlock(hdev); +} + static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -2815,6 +2832,8 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, struct pending_cmd *cmd; struct hci_cp_remote_name_req_cancel cp; struct inquiry_entry *e; + struct hci_request req; + struct hci_cp_le_set_scan_enable enable_cp; int err; BT_DBG("%s", hdev->name); @@ -2841,12 +2860,20 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } + hci_req_init(&req, hdev); + switch (hdev->discovery.state) { case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) - err = hci_cancel_inquiry(hdev); - else - err = hci_cancel_le_scan(hdev); + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL); + } else { + cancel_delayed_work(&hdev->le_scan_disable); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_DISABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, + sizeof(enable_cp), &enable_cp); + } break; @@ -2864,16 +2891,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, } bacpy(&cp.bdaddr, &e->data.bdaddr); - err = hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ_CANCEL, - sizeof(cp), &cp); + hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), + &cp); break; default: BT_DBG("unknown discovery state %u", hdev->discovery.state); - err = -EFAULT; + + mgmt_pending_remove(cmd); + err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_FAILED, &mgmt_cp->type, + sizeof(mgmt_cp->type)); + goto unlock; } + err = hci_req_run(&req, stop_discovery_complete); if (err < 0) mgmt_pending_remove(cmd); else -- cgit From 82f4785ca7b8d04ca6d0aaa37f1185c779744bc4 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:34 -0300 Subject: Bluetooth: Remove stop discovery handling from hci_event.c Since all mgmt stop discovery command complete events are now handled in stop_discovery_complete callback in mgmt.c, we can remove this handling from hci_event.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index faaf1f31345d..27f66dc88c9b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -40,21 +40,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (status) { - hci_dev_lock(hdev); - mgmt_stop_discovery_failed(hdev, status); - hci_dev_unlock(hdev); + if (status) return; - } clear_bit(HCI_INQUIRY, &hdev->flags); smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - hci_conn_check_pending(hdev); } @@ -966,12 +958,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, break; case LE_SCAN_DISABLE: - if (status) { - hci_dev_lock(hdev); - mgmt_stop_discovery_failed(hdev, status); - hci_dev_unlock(hdev); + if (status) return; - } clear_bit(HCI_LE_SCAN, &hdev->dev_flags); break; -- cgit From 1183fdcad42495073045a2d9755e0a6ac2fa874e Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:35 -0300 Subject: Bluetooth: Make mgmt_stop_discovery_failed static mgmt_stop_discovery_failed is now only used in mgmt.c so we can make it a local function. This patch also moves the mgmt_stop_ discovery_failed definition up in mgmt.c to avoid forward declaration. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c33bc4f4d006..69d17205745b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2808,6 +2808,22 @@ failed: return err; } +static int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; + + err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), + &hdev->discovery.type, sizeof(hdev->discovery.type)); + mgmt_pending_remove(cmd); + + return err; +} + static void stop_discovery_complete(struct hci_dev *hdev, u8 status) { BT_DBG("status %d", status); @@ -4215,22 +4231,6 @@ int mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, sizeof(*ev) + eir_len, NULL); } -int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) -{ - struct pending_cmd *cmd; - int err; - - cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); - if (!cmd) - return -ENOENT; - - err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), - &hdev->discovery.type, sizeof(hdev->discovery.type)); - mgmt_pending_remove(cmd); - - return err; -} - int mgmt_discovering(struct hci_dev *hdev, u8 discovering) { struct mgmt_ev_discovering ev; -- cgit From 3fd319b830247a3fe5f489e622ab404b618e0906 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:36 -0300 Subject: Bluetooth: Refactor hci_cc_le_set_scan_enable This patch does a trivial refactoring in hci_cc_le_set_scan_enable. Since start and stop discovery command failures are now handled in mgmt layer, the status check became empty. So, we can move it to outside the switch statement. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 27f66dc88c9b..76ff1af05693 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -949,18 +949,15 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, if (!cp) return; + if (status) + return; + switch (cp->enable) { case LE_SCAN_ENABLE: - if (status) - return; - set_bit(HCI_LE_SCAN, &hdev->dev_flags); break; case LE_SCAN_DISABLE: - if (status) - return; - clear_bit(HCI_LE_SCAN, &hdev->dev_flags); break; -- cgit From 917eedc56c65ba96a3bab4c346d948e73dd872f1 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:37 -0300 Subject: Bluetooth: Remove LE scan helpers This patch removes the LE scan helpers hci_le_scan and hci_cancel_ le_scan and all code related to it. We now use the HCI request framework in device discovery functionality and these helpers are no longer needed. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 113 ----------------------------------------------- 1 file changed, 113 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9270d7ee489d..100539fcbfe5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1201,8 +1201,6 @@ static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); - cancel_work_sync(&hdev->le_scan); - cancel_delayed_work(&hdev->power_off); hci_req_cancel(hdev, ENODEV); @@ -1991,82 +1989,6 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return mgmt_device_unblocked(hdev, bdaddr, type); } -static void le_scan_param_req(struct hci_request *req, unsigned long opt) -{ - struct le_scan_params *param = (struct le_scan_params *) opt; - struct hci_cp_le_set_scan_param cp; - - memset(&cp, 0, sizeof(cp)); - cp.type = param->type; - cp.interval = cpu_to_le16(param->interval); - cp.window = cpu_to_le16(param->window); - - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); -} - -static void le_scan_enable_req(struct hci_request *req, unsigned long opt) -{ - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); -} - -static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, - u16 window, int timeout) -{ - long timeo = msecs_to_jiffies(3000); - struct le_scan_params param; - int err; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return -EINPROGRESS; - - param.type = type; - param.interval = interval; - param.window = window; - - hci_req_lock(hdev); - - err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) ¶m, - timeo); - if (!err) - err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo); - - hci_req_unlock(hdev); - - if (err < 0) - return err; - - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - timeout); - - return 0; -} - -int hci_cancel_le_scan(struct hci_dev *hdev) -{ - BT_DBG("%s", hdev->name); - - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return -EALREADY; - - if (cancel_delayed_work(&hdev->le_scan_disable)) { - struct hci_cp_le_set_scan_enable cp; - - /* Send HCI command to disable LE Scan */ - memset(&cp, 0, sizeof(cp)); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - } - - return 0; -} - static void inquiry_complete(struct hci_dev *hdev, u8 status) { if (status) { @@ -2143,40 +2065,6 @@ static void le_scan_disable_work(struct work_struct *work) BT_ERR("Disable LE scanning request failed: err %d", err); } -static void le_scan_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan); - struct le_scan_params *param = &hdev->le_scan_params; - - BT_DBG("%s", hdev->name); - - hci_do_le_scan(hdev, param->type, param->interval, param->window, - param->timeout); -} - -int hci_le_scan(struct hci_dev *hdev, u8 type, u16 interval, u16 window, - int timeout) -{ - struct le_scan_params *param = &hdev->le_scan_params; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) - return -ENOTSUPP; - - if (work_busy(&hdev->le_scan)) - return -EINPROGRESS; - - param->type = type; - param->interval = interval; - param->window = window; - param->timeout = timeout; - - queue_work(system_long_wq, &hdev->le_scan); - - return 0; -} - /* Alloc HCI device */ struct hci_dev *hci_alloc_dev(void) { @@ -2211,7 +2099,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); - INIT_WORK(&hdev->le_scan, le_scan_work); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); -- cgit From b0434345f2a7330be5277b63606cff26a7965982 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:38 -0300 Subject: Bluetooth: Remove inquiry helpers This patch removes hci_do_inquiry and hci_cancel_inquiry helpers. We now use the HCI request framework in device discovery functionality and these helpers are no longer needed. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 100539fcbfe5..e2e9d409d0f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3501,36 +3501,6 @@ static void hci_cmd_work(struct work_struct *work) } } -int hci_do_inquiry(struct hci_dev *hdev, u8 length) -{ - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_INQUIRY, &hdev->flags)) - return -EINPROGRESS; - - hci_inquiry_cache_flush(hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = length; - - return hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); -} - -int hci_cancel_inquiry(struct hci_dev *hdev) -{ - BT_DBG("%s", hdev->name); - - if (!test_bit(HCI_INQUIRY, &hdev->flags)) - return -EALREADY; - - return hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); -} - u8 bdaddr_to_le(u8 bdaddr_type) { switch (bdaddr_type) { -- cgit From 8892d8beb37cb4ea531a5076946d5cc809b04c25 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:39 -0300 Subject: Bluetooth: Remove empty event handler This patch removes the hci_cc_le_set_scan_param event handler. This handler became empty because failures of this event are now handled by start_discovery_complete function in mgmt.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 76ff1af05693..db58e72316b9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -929,14 +929,6 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } -static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - -} - static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { @@ -2251,10 +2243,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_user_passkey_neg_reply(hdev, skb); break; - case HCI_OP_LE_SET_SCAN_PARAM: - hci_cc_le_set_scan_param(hdev, skb); - break; - case HCI_OP_LE_SET_ADV_ENABLE: hci_cc_le_set_adv_enable(hdev, skb); break; -- cgit From 12602d0cc005354a519b3eba443d7912ab71313a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:40 -0300 Subject: Bluetooth: Mgmt Device Found Event We only want to send Mgmt Device Found Events if we are running the Device Discovery procedure (started by the MGMT Start Discovery Command). Inquiry or LE scanning triggered by HCI raw interface (e.g. hcitool) or kernel internals should not send Mgmt Device Found Events. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 69d17205745b..7ae737fcf5e7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4180,6 +4180,9 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, struct mgmt_ev_device_found *ev = (void *) buf; size_t ev_size; + if (!hci_discovery_active(hdev)) + return -EPERM; + /* Leave 5 bytes for a potential CoD field */ if (sizeof(*ev) + eir_len + 5 > sizeof(buf)) return -EINVAL; -- cgit From 034cbea0931433cf88a1f79a385402604f08bd67 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 14 May 2013 11:44:16 +0300 Subject: Bluetooth: Use HCI_MGMT instead of HCI_LINK_KEYS flag Use HCI_MGMT flag instead of HCI_LINK_KEYS flag. There is a problem with HCI_LINK_KEYS flag since it is set only when link keys are loaded. Otherwise kernel assumes that old interface is used. Signed-off-by: Andrei Emeltchenko Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index db58e72316b9..0437200d92f4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2611,7 +2611,7 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s", hdev->name); - if (!test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) + if (!test_bit(HCI_MGMT, &hdev->dev_flags)) return; hci_dev_lock(hdev); @@ -2687,7 +2687,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_drop(conn); } - if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) + if (test_bit(HCI_MGMT, &hdev->dev_flags)) hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key, ev->key_type, pin_len); -- cgit From 0a804654af62dfea4899c66561d74d72273b2921 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 14 May 2013 11:44:17 +0300 Subject: Bluetooth: Remove unneeded flag Remove HCI_LINK_KEYS flag since using HCI_MGMT is enough for test that user space expects the kernel managing link keys. Signed-off-by: Andrei Emeltchenko Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7ae737fcf5e7..fedc5399d465 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1736,8 +1736,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, hci_link_keys_clear(hdev); - set_bit(HCI_LINK_KEYS, &hdev->dev_flags); - if (cp->debug_keys) set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); else -- cgit From 673e1dd7ed7701cac8c5c247d152fd3d2da2a4f1 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 13 May 2013 10:07:11 +0800 Subject: Bluetooth: hidp: using strlcpy instead of strncpy, also beautify code. For NULL terminated string, need always let it ended by zero. Since have already called memcpy() to initialize 'ci', so need not redundant initialization. Better use ''if(session->hid) {} else if(session->input) {}"" instead of ''if(session->hid) {}; if(session->input) {};'' Signed-off-by: Chen Gang Reviewed-by: David Herrmann Acked-by: Jiri Kosina Signed-off-by: Gustavo Padovan --- net/bluetooth/hidp/core.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 940f5acb6694..f13a8da441a8 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -76,25 +76,19 @@ static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo ci->flags = session->flags; ci->state = BT_CONNECTED; - ci->vendor = 0x0000; - ci->product = 0x0000; - ci->version = 0x0000; - if (session->input) { ci->vendor = session->input->id.vendor; ci->product = session->input->id.product; ci->version = session->input->id.version; if (session->input->name) - strncpy(ci->name, session->input->name, 128); + strlcpy(ci->name, session->input->name, 128); else - strncpy(ci->name, "HID Boot Device", 128); - } - - if (session->hid) { + strlcpy(ci->name, "HID Boot Device", 128); + } else if (session->hid) { ci->vendor = session->hid->vendor; ci->product = session->hid->product; ci->version = session->hid->version; - strncpy(ci->name, session->hid->name, 128); + strlcpy(ci->name, session->hid->name, 128); } } -- cgit From b8f4e068004859eefac7b1ced59ddb67ca6d2d80 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Thu, 13 Jun 2013 12:34:31 +0100 Subject: Bluetooth: Improve comments on the HCI_Delete_Store_Link_Key issue Some Bluetooth controllers doesn't support this command so we first need to check for its support before sending it. This patch adds a lengthful commentary about this. Signed-off-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e2e9d409d0f6..061523eb52a1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -597,7 +597,15 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) struct hci_dev *hdev = req->hdev; u8 p; - /* Only send HCI_Delete_Stored_Link_Key if it is supported */ + /* Some Broadcom based Bluetooth controllers do not support the + * Delete Stored Link Key command. They are clearly indicating its + * absence in the bit mask of supported commands. + * + * Check the supported commands and only if the the command is marked + * as supported send it. If not supported assume that the controller + * does not have actual support for stored link keys which makes this + * command redundant anyway. + */ if (hdev->commands[6] & 0x80) { struct hci_cp_delete_stored_link_key cp; -- cgit From b33698e267ff17a97ecf3ee621a925bb356e9120 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 20 Jun 2013 16:30:00 +0800 Subject: ipv6: remove a useless pr_info() in addrconf_gre_config() This is debug info, should at least be pr_debug(), but given that this code is in upstream for two years, there is no need to keep this debugging printk any more, so just remove it. Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 80449121afa2..90788a1c6bbc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2760,8 +2760,6 @@ static void addrconf_gre_config(struct net_device *dev) struct inet6_dev *idev; struct in6_addr addr; - pr_info("%s(%s)\n", __func__, dev->name); - ASSERT_RTNL(); if ((idev = ipv6_find_idev(dev)) == NULL) { -- cgit From b3a6dfe8178c5159e54117078134fef806a913ca Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:30 +0800 Subject: VSOCK: Introduce vsock_auto_bind helper This peace of code is called three times, let's have a helper for it. Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 49 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3f77f42a3b58..b0b362ad051d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -165,6 +165,18 @@ static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; static DEFINE_SPINLOCK(vsock_table_lock); +/* Autobind this socket to the local address if necessary. */ +static int vsock_auto_bind(struct vsock_sock *vsk) +{ + struct sock *sk = sk_vsock(vsk); + struct sockaddr_vm local_addr; + + if (vsock_addr_bound(&vsk->local_addr)) + return 0; + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + return __vsock_bind(sk, &local_addr); +} + static void vsock_init_tables(void) { int i; @@ -956,15 +968,10 @@ static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, lock_sock(sk); - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; + err = vsock_auto_bind(vsk); + if (err) + goto out; - } /* If the provided message contains an address, use that. Otherwise * fall back on the socket's remote handle (if it has been connected). @@ -1038,15 +1045,9 @@ static int vsock_dgram_connect(struct socket *sock, lock_sock(sk); - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; - - } + err = vsock_auto_bind(vsk); + if (err) + goto out; if (!transport->dgram_allow(remote_addr->svm_cid, remote_addr->svm_port)) { @@ -1163,17 +1164,9 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); - /* Autobind this socket to the local address if necessary. */ - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, - VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; - - } + err = vsock_auto_bind(vsk); + if (err) + goto out; sk->sk_state = SS_CONNECTING; -- cgit From dce1a2877778fee172ab74411fcabd77bceb8e12 Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:31 +0800 Subject: VSOCK: Return VMCI_ERROR_NO_MEM when fails to allocate skb vmci_transport_recv_dgram_cb always return VMCI_SUCESS even if we fail to allocate skb, return VMCI_ERROR_NO_MEM instead. Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index daff75200e25..99b511ddb4cb 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -625,13 +625,14 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) /* Attach the packet to the socket's receive queue as an sk_buff. */ skb = alloc_skb(size, GFP_ATOMIC); - if (skb) { - /* sk_receive_skb() will do a sock_put(), so hold here. */ - sock_hold(sk); - skb_put(skb, size); - memcpy(skb->data, dg, size); - sk_receive_skb(sk, skb, 0); - } + if (!skb) + return VMCI_ERROR_NO_MEM; + + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); return VMCI_SUCCESS; } -- cgit From 0fc932467688e1c81fc109a93f323cef4993dc24 Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:32 +0800 Subject: VSOCK: Remove unnecessary label Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 99b511ddb4cb..ffc11df02af2 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -940,10 +940,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) * reset to prevent that. */ vmci_transport_send_reset(sk, pkt); - goto out; + break; } -out: release_sock(sk); kfree(recv_pkt_info); /* Release reference obtained in the stream callback when we fetched -- cgit From a49dd9dcb50195b35a5e59eb65b8e56584874630 Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:33 +0800 Subject: VSOCK: Fix VSOCK_HASH and VSOCK_CONN_HASH If we mod with VSOCK_HASH_SIZE -1, we get 0, 1, .... 249. Actually, we have vsock_bind_table[0 ... 250] and vsock_connected_table[0 .. 250]. In this case the last entry will never be used. We should mod with VSOCK_HASH_SIZE instead. Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b0b362ad051d..593071dabd1c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -144,18 +144,18 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function - * mods with VSOCK_HASH_SIZE - 1 to ensure this. + * mods with VSOCK_HASH_SIZE to ensure this. */ #define VSOCK_HASH_SIZE 251 #define MAX_PORT_RETRIES 24 -#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) /* XXX This can probably be implemented in a better way. */ #define VSOCK_CONN_HASH(src, dst) \ - (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) + (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE) #define vsock_connected_sockets(src, dst) \ (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) #define vsock_connected_sockets_vsk(vsk) \ -- cgit From 60877a32bce00041528576e6b8df5abe9251fa73 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jun 2013 01:15:51 -0700 Subject: net: allow large number of tx queues netif_alloc_netdev_queues() uses kcalloc() to allocate memory for the "struct netdev_queue *_tx" array. For large number of tx queues, kcalloc() might fail, so this patch does a fallback to vzalloc(). As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT to kzalloc() flags to do this fallback only when really needed. Signed-off-by: Eric Dumazet Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fa007dba6beb..722f633926e0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -130,6 +130,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -5253,17 +5254,28 @@ static void netdev_init_one_queue(struct net_device *dev, #endif } +static void netif_free_tx_queues(struct net_device *dev) +{ + if (is_vmalloc_addr(dev->_tx)) + vfree(dev->_tx); + else + kfree(dev->_tx); +} + static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; + size_t sz = count * sizeof(*tx); - BUG_ON(count < 1); - - tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) - return -ENOMEM; + BUG_ON(count < 1 || count > 0xffff); + tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!tx) { + tx = vzalloc(sz); + if (!tx) + return -ENOMEM; + } dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -5811,7 +5823,7 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); - kfree(dev->_tx); + netif_free_tx_queues(dev); #ifdef CONFIG_RPS kfree(dev->_rx); #endif @@ -5836,7 +5848,7 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); - kfree(dev->_tx); + netif_free_tx_queues(dev); #ifdef CONFIG_RPS kfree(dev->_rx); #endif -- cgit From 7c77602f57da3f526fa7cf7bb02c49d3397c0729 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 21 Jun 2013 15:37:25 +0800 Subject: bridge: fix a typo in comments Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 31952a103949..81befac015e1 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1012,7 +1012,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, #endif /* - * Add port to rotuer_list + * Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -- cgit From 479b1a5825f68f9b63d26a13ca25ffbb7d2617ad Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 20 Jun 2013 15:08:14 -0700 Subject: openvswitch: Use correct config guard. This bug was introduced by commit aa310701e787087 (openvswitch: Add gre tunnel support.) Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/vport-gre.c | 2 +- net/openvswitch/vport.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 3a8d1900aa78..943e5c431354 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -16,7 +16,7 @@ * 02110-1301, USA */ -#ifdef CONFIG_NET_IPGRE_DEMUX +#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index f52dfb9cb5a7..ba81294219ac 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -39,7 +39,7 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, -#ifdef CONFIG_NET_IPGRE_DEMUX +#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) &ovs_gre_vport_ops, #endif }; -- cgit From aeb193ea6cef28e33589de05ef932424f8e19bde Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sun, 23 Jun 2013 23:33:48 -0700 Subject: net: Unmap fragment page once iterator is done Callers of skb_seq_read() are currently forced to call skb_abort_seq_read() even when consuming all the data because the last call to skb_seq_read (the one that returns 0 to indicate the end) fails to unmap the last fragment page. With this patch callers will be allowed to traverse the SKB data by calling skb_prepare_seq_read() once and repeatedly calling skb_seq_read() as originally intended (and documented in the original commit 677e90eda), that is, only call skb_abort_seq_read() if the sequential read is actually aborted. Signed-off-by: Wedson Almeida Filho Signed-off-by: David S. Miller --- net/batman-adv/main.c | 1 - net/core/skbuff.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 51aafd669cbb..08125f3f6064 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -473,7 +473,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) crc = crc32c(crc, data, len); consumed += len; } - skb_abort_seq_read(&st); return htonl(crc); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index edf37578e21e..9f73eca29fbe 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2541,8 +2541,13 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, unsigned int block_limit, abs_offset = consumed + st->lower_offset; skb_frag_t *frag; - if (unlikely(abs_offset >= st->upper_offset)) + if (unlikely(abs_offset >= st->upper_offset)) { + if (st->frag_data) { + kunmap_atomic(st->frag_data); + st->frag_data = NULL; + } return 0; + } next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; -- cgit From 6f390908e58113b9199424749c32a05181ff69d9 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 19 Jun 2013 14:06:25 -0700 Subject: wireless: Make sure __cfg80211_connect_result always puts bss Otherwise, we can leak a bss reference. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/wireless/sme.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index c0bf781d4fbe..32dac8cdd2e3 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -557,6 +557,7 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); * SME event handling */ +/* This method must consume bss one way or another */ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, @@ -572,8 +573,10 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, ASSERT_WDEV_LOCK(wdev); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && - wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) { + cfg80211_put_bss(wdev->wiphy, bss); return; + } nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, -- cgit From 0e3a39b5620bc84f25ffb0592b05b0350e8b0520 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 19 Jun 2013 14:06:27 -0700 Subject: wireless: add comments about bss refcounting Should help the next person that tries to understand the bss refcounting logic. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 ++++ net/wireless/sme.c | 3 +++ 2 files changed, 7 insertions(+) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index dd01b58fa78c..ae8c186b50d6 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -523,6 +523,7 @@ static int cmp_bss(struct cfg80211_bss *a, } } +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, @@ -678,6 +679,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, return true; } +/* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *tmp) @@ -866,6 +868,7 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, return channel; } +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss* cfg80211_inform_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -923,6 +926,7 @@ cfg80211_inform_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_inform_bss); +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss * cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *channel, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 32dac8cdd2e3..1d3cfb1a3f28 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -239,6 +239,7 @@ void cfg80211_conn_work(struct work_struct *work) rtnl_unlock(); } +/* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -699,6 +700,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } EXPORT_SYMBOL(cfg80211_connect_result); +/* Consumes bss object one way or another */ void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, @@ -775,6 +777,7 @@ void cfg80211_roamed(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_roamed); +/* Consumes bss object one way or another */ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, -- cgit From f9bef3df52fe61067e4c1c6cfb2037cb6b259a6a Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 19 Jun 2013 14:06:26 -0700 Subject: wireless: check for dangling wdev->current_bss pointer If it *is* still set when the netdev is being deleted, then we are about to leak a pointer. Warn and clean up in that case. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/wireless/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 4224e7554a76..672459b9483b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -934,6 +934,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * freed. */ cfg80211_process_wdev_events(wdev); + + if (WARN_ON(wdev->current_bss)) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + wdev->current_bss = NULL; + } break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) -- cgit From a33d402610d2d3a422136defe8237f4ddfb69fd9 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 23 Jun 2013 12:51:21 +0200 Subject: cfg80211: fix compilation warning for cfg80211_leave_all() The following compilation issue popped up moving from v3.10-rc1 to v3.10-rc6 after merging wireless-testing. net/wireless/sysfs.c:86:13: error: 'cfg80211_leave_all' defined but not used [-Werror=unused-function] The function is only called when CONFIG_PM is enabled. Moving the function under CONFIG_PM as well. Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 360a42c6f694..a23253e06358 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -83,6 +83,7 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +#ifdef CONFIG_PM static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) { struct wireless_dev *wdev; @@ -91,7 +92,6 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) cfg80211_leave(rdev, wdev); } -#ifdef CONFIG_PM static int wiphy_suspend(struct device *dev, pm_message_t state) { struct cfg80211_registered_device *rdev = dev_to_rdev(dev); -- cgit From ac49e1a8969eeb819c4fc2eced9ee9ef9f35a4a9 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 20 Jun 2013 23:50:58 -0700 Subject: mac80211: allow self-protected frame tx without sta Useful for userspace mesh to authenticate and peer without a station entry, since both steps may fail anyway. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 93120de776f0..8184d121ff09 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2827,7 +2827,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, !rcu_access_pointer(sdata->bss->beacon)) need_offchan = true; if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || + mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED) break; rcu_read_lock(); sta = sta_info_get(sdata, mgmt->da); -- cgit From 6c7c4cbfd5f59c04a40af67ad72d14e19215ef36 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 20 Jun 2013 23:50:59 -0700 Subject: mac80211: initialize power mode for mesh STAs Previously the default mesh STA nonpeer power mode was UNKNOWN (0) make the default mesh STA power mode ACTIVE, to prevent unnecessary frame buffering while peering is not yet complete. Fixes a panic in ath9k_htc when adding stations from userspace, and mcast buffered frames are later released. Thanks to Bob Copeland for his help debugging this. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index aaf68d297221..aeb967a0aeed 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -347,6 +347,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.user_mpm) init_timer(&sta->plink_timer); + sta->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; #endif memcpy(sta->sta.addr, addr, ETH_ALEN); -- cgit From bcbde0d449eda7afa8f63280b165c8300dbd00e2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jun 2013 19:38:07 +0200 Subject: net: netlink: virtual tap device management Similarly to the networking receive path with ptype_all taps, we add the possibility to register netdevices that are for ARPHRD_NETLINK to the netlink subsystem, so that those can be used for netlink analyzers resp. debuggers. We do not offer a direct callback function as out-of-tree modules could do crap with it. Instead, a netdevice must be registered properly and only receives a clone, managed by the netlink layer. Symbols are exported as GPL-only. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 275d901d7e46..6967fbcca6c5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,9 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); static ATOMIC_NOTIFIER_HEAD(netlink_chain); +static DEFINE_SPINLOCK(netlink_tap_lock); +static struct list_head netlink_tap_all __read_mostly; + static inline u32 netlink_group_mask(u32 group) { return group ? 1 << (group - 1) : 0; @@ -111,6 +115,100 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } +int netlink_add_tap(struct netlink_tap *nt) +{ + if (unlikely(nt->dev->type != ARPHRD_NETLINK)) + return -EINVAL; + + spin_lock(&netlink_tap_lock); + list_add_rcu(&nt->list, &netlink_tap_all); + spin_unlock(&netlink_tap_lock); + + if (nt->module) + __module_get(nt->module); + + return 0; +} +EXPORT_SYMBOL_GPL(netlink_add_tap); + +int __netlink_remove_tap(struct netlink_tap *nt) +{ + bool found = false; + struct netlink_tap *tmp; + + spin_lock(&netlink_tap_lock); + + list_for_each_entry(tmp, &netlink_tap_all, list) { + if (nt == tmp) { + list_del_rcu(&nt->list); + found = true; + goto out; + } + } + + pr_warn("__netlink_remove_tap: %p not found\n", nt); +out: + spin_unlock(&netlink_tap_lock); + + if (found && nt->module) + module_put(nt->module); + + return found ? 0 : -ENODEV; +} +EXPORT_SYMBOL_GPL(__netlink_remove_tap); + +int netlink_remove_tap(struct netlink_tap *nt) +{ + int ret; + + ret = __netlink_remove_tap(nt); + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL_GPL(netlink_remove_tap); + +static int __netlink_deliver_tap_skb(struct sk_buff *skb, + struct net_device *dev) +{ + struct sk_buff *nskb; + int ret = -ENOMEM; + + dev_hold(dev); + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { + nskb->dev = dev; + ret = dev_queue_xmit(nskb); + if (unlikely(ret > 0)) + ret = net_xmit_errno(ret); + } + + dev_put(dev); + return ret; +} + +static void __netlink_deliver_tap(struct sk_buff *skb) +{ + int ret; + struct netlink_tap *tmp; + + list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { + ret = __netlink_deliver_tap_skb(skb, tmp->dev); + if (unlikely(ret)) + break; + } +} + +static void netlink_deliver_tap(struct sk_buff *skb) +{ + rcu_read_lock(); + + if (unlikely(!list_empty(&netlink_tap_all))) + __netlink_deliver_tap(skb); + + rcu_read_unlock(); +} + static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1518,6 +1616,8 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; + netlink_deliver_tap(skb); + #ifdef CONFIG_NETLINK_MMAP if (netlink_skb_is_mmaped(skb)) netlink_queue_mmaped_skb(sk, skb); @@ -1578,6 +1678,11 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { + /* We could do a netlink_deliver_tap(skb) here as well + * but since this is intended for the kernel only, we + * should rather let it stay under the hood. + */ + ret = skb->len; netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; @@ -2975,6 +3080,8 @@ static int __init netlink_proto_init(void) nl_table[i].compare = netlink_compare; } + INIT_LIST_HEAD(&netlink_tap_all); + netlink_add_usersock_entry(); sock_register(&netlink_family_ops); -- cgit From 6da334ee0c101fc5ecf62f2b1e11b1524be7b159 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jun 2013 01:30:11 -0700 Subject: ipv6: add include file to suppress sparse warnings commit f88c91ddba95 ("ipv6: statically link register_inet6addr_notifier()" added following sparse warnings : net/ipv6/addrconf_core.c:83:5: warning: symbol 'register_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:89:5: warning: symbol 'unregister_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:95:5: warning: symbol 'inet6addr_notifier_call_chain' was not declared. Should it be static? Signed-off-by: Eric Dumazet Cc: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 72104562c864..d2f87427244b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -5,6 +5,7 @@ #include #include +#include #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) -- cgit From f693dff7107063f0ce08502052b78c4d4feb0e87 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 25 Jun 2013 16:01:55 +0300 Subject: rtnetlink: allow using zero MAC address in rtnl_fdb_{add,del} This is required for multiple default destinations management in VXLAN Signed-off-by: Mike Rapoport Signed-off-by: Stephen Hemminger --- net/core/rtnetlink.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9007533867f0..3de740834d1f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2109,10 +2109,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) } addr = nla_data(tb[NDA_LLADDR]); - if (is_zero_ether_addr(addr)) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); - return -EINVAL; - } err = -EOPNOTSUPP; @@ -2210,10 +2206,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) } addr = nla_data(tb[NDA_LLADDR]); - if (is_zero_ether_addr(addr)) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); - return -EINVAL; - } err = -EOPNOTSUPP; -- cgit From b7b1bfce0bb68bd8f6e62a28295922785cc63781 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 23 Jun 2013 18:39:01 +0200 Subject: ipv6: split duplicate address detection and router solicitation timer This patch splits the timers for duplicate address detection and router solicitations apart. The router solicitations timer goes into inet6_dev and the dad timer stays in inet6_ifaddr. The reason behind this patch is to reduce the number of unneeded router solicitations send out by the host if additional link-local addresses are created. Currently we send out RS for every link-local address on an interface. If the RS timer fires we pick a source address with ipv6_get_lladdr. This change could hurt people adding additional link-local addresses and specifying these addresses in the radvd clients section because we no longer guarantee that we use every ll address as source address in router solicitations. Cc: Flavio Leitner Cc: Hideaki YOSHIFUJI Cc: David Stevens Signed-off-by: Hannes Frederic Sowa Reviewed-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 164 +++++++++++++++++++++++++++++----------------------- 1 file changed, 91 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 90788a1c6bbc..c06bc76280b2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -253,37 +253,32 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev) return !qdisc_tx_is_noop(dev); } -static void addrconf_del_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_rs_timer(struct inet6_dev *idev) { - if (del_timer(&ifp->timer)) + if (del_timer(&idev->rs_timer)) + __in6_dev_put(idev); +} + +static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +{ + if (del_timer(&ifp->dad_timer)) __in6_ifa_put(ifp); } -enum addrconf_timer_t { - AC_NONE, - AC_DAD, - AC_RS, -}; +static void addrconf_mod_rs_timer(struct inet6_dev *idev, + unsigned long when) +{ + if (!timer_pending(&idev->rs_timer)) + in6_dev_hold(idev); + mod_timer(&idev->rs_timer, jiffies + when); +} -static void addrconf_mod_timer(struct inet6_ifaddr *ifp, - enum addrconf_timer_t what, - unsigned long when) +static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, + unsigned long when) { - if (!del_timer(&ifp->timer)) + if (!timer_pending(&ifp->dad_timer)) in6_ifa_hold(ifp); - - switch (what) { - case AC_DAD: - ifp->timer.function = addrconf_dad_timer; - break; - case AC_RS: - ifp->timer.function = addrconf_rs_timer; - break; - default: - break; - } - ifp->timer.expires = jiffies + when; - add_timer(&ifp->timer); + mod_timer(&ifp->dad_timer, jiffies + when); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -326,6 +321,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) WARN_ON(!list_empty(&idev->addr_list)); WARN_ON(idev->mc_list != NULL); + WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); @@ -357,7 +353,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; INIT_LIST_HEAD(&ndev->addr_list); - + setup_timer(&ndev->rs_timer, addrconf_rs_timer, + (unsigned long)ndev); memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; @@ -776,7 +773,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->timer)) + if (del_timer(&ifp->dad_timer)) pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { @@ -869,9 +866,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - init_timer(&ifa->timer); + setup_timer(&ifa->dad_timer, addrconf_dad_timer, + (unsigned long)ifa); INIT_HLIST_NODE(&ifa->addr_lst); - ifa->timer.data = (unsigned long) ifa; ifa->scope = scope; ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; @@ -994,7 +991,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); - addrconf_del_timer(ifp); + addrconf_del_dad_timer(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1447,6 +1444,23 @@ try_nextdev: } EXPORT_SYMBOL(ipv6_dev_get_saddr); +static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_ifaddr *ifp; + int err = -EADDRNOTAVAIL; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + *addr = ifp->addr; + err = 0; + break; + } + } + return err; +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) { @@ -1456,17 +1470,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ifp->scope == IFA_LINK && - !(ifp->flags & banned_flags)) { - *addr = ifp->addr; - err = 0; - break; - } - } + err = __ipv6_get_lladdr(idev, addr, banned_flags); read_unlock_bh(&idev->lock); } rcu_read_unlock(); @@ -1580,7 +1585,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_timer(ifp); + addrconf_del_dad_timer(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -3036,7 +3041,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_timer(ifa); + addrconf_del_dad_timer(ifa); goto restart; } } @@ -3045,6 +3050,8 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); + addrconf_del_rs_timer(idev); + /* Step 2: clear flags for stateless addrconf */ if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3074,7 +3081,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_timer(ifa); + addrconf_del_dad_timer(ifa); list_del(&ifa->if_list); @@ -3116,10 +3123,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) static void addrconf_rs_timer(unsigned long data) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; - struct inet6_dev *idev = ifp->idev; + struct inet6_dev *idev = (struct inet6_dev *)data; + struct in6_addr lladdr; - read_lock(&idev->lock); + write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) goto out; @@ -3130,18 +3137,19 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - spin_lock(&ifp->lock); - if (ifp->probes++ < idev->cnf.rtr_solicits) { - /* The wait after the last probe can be shorter */ - addrconf_mod_timer(ifp, AC_RS, - (ifp->probes == idev->cnf.rtr_solicits) ? - idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); - spin_unlock(&ifp->lock); + if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (!__ipv6_get_lladdr(idev, &lladdr, IFA_F_TENTATIVE)) + ndisc_send_rs(idev->dev, &lladdr, + &in6addr_linklocal_allrouters); + else + goto out; - ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + /* The wait after the last probe can be shorter */ + addrconf_mod_rs_timer(idev, (idev->rs_probes == + idev->cnf.rtr_solicits) ? + idev->cnf.rtr_solicit_delay : + idev->cnf.rtr_solicit_interval); } else { - spin_unlock(&ifp->lock); /* * Note: we do not support deprecated "all on-link" * assumption any longer. @@ -3150,8 +3158,8 @@ static void addrconf_rs_timer(unsigned long data) } out: - read_unlock(&idev->lock); - in6_ifa_put(ifp); + write_unlock(&idev->lock); + in6_dev_put(idev); } /* @@ -3167,8 +3175,8 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) else rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); - ifp->probes = idev->cnf.dad_transmits; - addrconf_mod_timer(ifp, AC_DAD, rand_num); + ifp->dad_probes = idev->cnf.dad_transmits; + addrconf_mod_dad_timer(ifp, rand_num); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) @@ -3229,40 +3237,40 @@ static void addrconf_dad_timer(unsigned long data) struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; - if (!ifp->probes && addrconf_dad_end(ifp)) + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - read_lock(&idev->lock); + write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - read_unlock(&idev->lock); + write_unlock(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); goto out; } - if (ifp->probes == 0) { + if (ifp->dad_probes == 0) { /* * DAD was successful */ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); addrconf_dad_completed(ifp); goto out; } - ifp->probes--; - addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); + ifp->dad_probes--; + addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time); spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); @@ -3274,6 +3282,9 @@ out: static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; + struct in6_addr lladdr; + + addrconf_del_dad_timer(ifp); /* * Configure the address for reception. Now it is valid. @@ -3294,13 +3305,20 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) * [...] as part of DAD [...] there is no need * to delay again before sending the first RS */ - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) + ndisc_send_rs(dev, &lladdr, + &in6addr_linklocal_allrouters); + else + return; - spin_lock_bh(&ifp->lock); - ifp->probes = 1; + write_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); - spin_unlock_bh(&ifp->lock); + addrconf_mod_rs_timer(ifp->idev, + ifp->idev->cnf.rtr_solicit_interval); + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); } } -- cgit From 876fd05ddbae03166e7037fca957b55bb3be6594 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 24 Jun 2013 00:22:20 +0200 Subject: ipv6: don't disable interface if last ipv6 address is removed The reason behind this change is that as soon as we delete the last ipv6 address of an interface we also lose the /proc/sys/net/ipv6/conf/ directory. This seems to be a usability problem for me. I don't see any reason why we should shutdown ipv6 on that interface in such cases. Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c06bc76280b2..e799a8838ed0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2507,12 +2507,6 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p read_unlock_bh(&idev->lock); ipv6_del_addr(ifp); - - /* If the last address is deleted administratively, - disable IPv6 on this interface. - */ - if (list_empty(&idev->addr_list)) - addrconf_ifdown(idev->dev, 1); return 0; } } -- cgit From 2b9651d72d3fc1a9053ae1a323f8407e1f63b436 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 24 Jun 2013 22:03:28 +0200 Subject: ipv6: remove old token ipv6 address as soon as possible If the tokenized ip address is re-set on an interface we depend on the arrival of a new router advertisment to call addrconf_verify to clean up the old address (which valid_lft is now set to 0). Old addresses can linger around for a longer time if e.g. the source of router advertisments vanishes. So, call addrconf_verify immediately after setting the new tokenized address to get rid of the old tokenized addresses. Cc: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e799a8838ed0..afaf3cdadf58 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4375,6 +4375,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); + addrconf_verify(0); return 0; } -- cgit From 52db882f3fc2903014e638ee91e690085fe37fdb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:27 +0200 Subject: net: sctp: migrate cookie life from timeval to ktime Currently, SCTP code defines its own timeval functions (since timeval is rarely used inside the kernel by others), namely tv_lt() and TIMEVAL_ADD() macros, that operate on SCTP cookie expiration. We might as well remove all those, and operate directly on ktime structures for a couple of reasons: ktime is available on all archs; complexity of ktime calculations depending on the arch is less than (reduces to a simple arithmetic operations on archs with BITS_PER_LONG == 64 or CONFIG_KTIME_SCALAR) or equal to timeval functions (other archs); code becomes more readable; macros can be thrown out. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 8 +------- net/sctp/sm_make_chunk.c | 19 ++++++++----------- net/sctp/socket.c | 14 +++----------- 3 files changed, 12 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index bf6e6bd553c0..9a383a8774e8 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -102,13 +102,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); asoc->state = SCTP_STATE_CLOSED; - - /* Set these values from the socket values, a conversion between - * millsecons to seconds/microseconds must also be done. - */ - asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; - asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) - * 1000; + asoc->cookie_life = ms_to_ktime(sp->assocparams.sasoc_cookie_life); asoc->frag_point = 0; asoc->user_frag = sp->user_frag; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fc8548743ed5..dd71f1f9ba10 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1630,8 +1630,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, cookie->c.adaptation_ind = asoc->peer.adaptation_ind; /* Set an expiration time for the cookie. */ - do_gettimeofday(&cookie->c.expiration); - TIMEVAL_ADD(asoc->cookie_life, cookie->c.expiration); + cookie->c.expiration = ktime_add(asoc->cookie_life, + ktime_get()); /* Copy the peer's init packet. */ memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, @@ -1680,7 +1680,7 @@ struct sctp_association *sctp_unpack_cookie( unsigned int len; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; - struct timeval tv; + ktime_t kt; struct hash_desc desc; /* Header size is static data prior to the actual cookie, including @@ -1757,11 +1757,11 @@ no_hmac: * down the new association establishment instead of every packet. */ if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) - skb_get_timestamp(skb, &tv); + kt = skb_get_ktime(skb); else - do_gettimeofday(&tv); + kt = ktime_get(); - if (!asoc && tv_lt(bear_cookie->expiration, tv)) { + if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) { /* * Section 3.3.10.3 Stale Cookie Error (3) * @@ -1773,9 +1773,7 @@ no_hmac: len = ntohs(chunk->chunk_hdr->length); *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { - suseconds_t usecs = (tv.tv_sec - - bear_cookie->expiration.tv_sec) * 1000000L + - tv.tv_usec - bear_cookie->expiration.tv_usec; + suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration)); __be32 n = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, @@ -2514,8 +2512,7 @@ do_addr_param: /* Suggested Cookie Life span increment's unit is msec, * (1/1000sec). */ - asoc->cookie_life.tv_sec += stale / 1000; - asoc->cookie_life.tv_usec += (stale % 1000) * 1000; + asoc->cookie_life = ktime_add_ms(asoc->cookie_life, stale); break; case SCTP_PARAM_HOST_NAME_ADDRESS: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 32db19ba4a21..4c47e5578d71 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2910,13 +2910,8 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig asoc->max_retrans = assocparams.sasoc_asocmaxrxt; } - if (assocparams.sasoc_cookie_life != 0) { - asoc->cookie_life.tv_sec = - assocparams.sasoc_cookie_life / 1000; - asoc->cookie_life.tv_usec = - (assocparams.sasoc_cookie_life % 1000) - * 1000; - } + if (assocparams.sasoc_cookie_life != 0) + asoc->cookie_life = ms_to_ktime(assocparams.sasoc_cookie_life); } else { /* Set the values to the endpoint */ struct sctp_sock *sp = sctp_sk(sk); @@ -5074,10 +5069,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, assocparams.sasoc_asocmaxrxt = asoc->max_retrans; assocparams.sasoc_peer_rwnd = asoc->peer.rwnd; assocparams.sasoc_local_rwnd = asoc->a_rwnd; - assocparams.sasoc_cookie_life = (asoc->cookie_life.tv_sec - * 1000) + - (asoc->cookie_life.tv_usec - / 1000); + assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life); list_for_each(pos, &asoc->peer.transport_addr_list) { cnt ++; -- cgit From b527fe693304d244b6103dc9f8a87150e71c29f7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:28 +0200 Subject: net: sctp: minor: sctp_seq_dump_local_addrs add missing newline A trailing newline has been forgotten to add into the WARN(). Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0c83162a6bf8..62526c477050 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -138,7 +138,7 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo peer = asoc->peer.primary_path; if (unlikely(peer == NULL)) { - WARN(1, "Association %p with NULL primary path!", asoc); + WARN(1, "Association %p with NULL primary path!\n", asoc); return; } -- cgit From 0a2fbac197441ebeafbbef09d4bbc0b5e73716d7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:29 +0200 Subject: net: sctp: decouple cleaning some socket data from endpoint Rather instead of having the endpoint clean the garbage from the socket, use a sk_destruct handler sctp_destruct_sock(), that does the job for that when there are no more references on the socket. At least do this for our crypto transform through crypto_free_hash() that is allocated when in listening state. Also, perform sctp_put_port() only when sk is valid. At a later point in time we can still determine if there's an option of placing this into sk_prot->unhash() or sctp_endpoint_free() without any races. For now, leave it in sctp_endpoint_destroy() though. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 19 ++++++++++--------- net/sctp/socket.c | 16 +++++++++++++++- 2 files changed, 25 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index a8b26741c0af..b26999d508ba 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -247,10 +247,9 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) /* Final destructor for endpoint. */ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { - SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); + struct sock *sk; - /* Free up the HMAC transform. */ - crypto_free_hash(sctp_sk(ep->base.sk)->hmac); + SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); /* Free the digest buffer */ kfree(ep->digest); @@ -271,13 +270,15 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) memset(ep->secret_key, 0, sizeof(ep->secret_key)); - /* Remove and free the port */ - if (sctp_sk(ep->base.sk)->bind_hash) - sctp_put_port(ep->base.sk); - /* Give up our hold on the sock. */ - if (ep->base.sk) - sock_put(ep->base.sk); + sk = ep->base.sk; + if (sk != NULL) { + /* Remove and free the port */ + if (sctp_sk(sk)->bind_hash) + sctp_put_port(sk); + + sock_put(sk); + } kfree(ep); SCTP_DBG_OBJCNT_DEC(ep); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4c47e5578d71..ba9359c20c8a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -93,6 +93,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); +static void sctp_destruct_sock(struct sock *sk); static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, union sctp_addr *addr, int len); static int sctp_bindx_add(struct sock *, struct sockaddr *, int); @@ -3966,6 +3967,8 @@ static int sctp_init_sock(struct sock *sk) sp->hmac = NULL; + sk->sk_destruct = sctp_destruct_sock; + SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); @@ -4008,6 +4011,17 @@ static void sctp_destroy_sock(struct sock *sk) local_bh_enable(); } +/* Triggered when there are no references on the socket anymore */ +static void sctp_destruct_sock(struct sock *sk) +{ + struct sctp_sock *sp = sctp_sk(sk); + + /* Free up the HMAC transform. */ + crypto_free_hash(sp->hmac); + + inet_sock_destruct(sk); +} + /* API 4.1.7 shutdown() - TCP Style Syntax * int shutdown(int socket, int how); * @@ -6848,7 +6862,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_reuse = sk->sk_reuse; newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = inet_sock_destruct; + newsk->sk_destruct = sctp_destruct_sock; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; -- cgit From 62208f12451f723cd9e9f1d6d22866a61545e488 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:30 +0200 Subject: net: sctp: simplify sctp_get_port No need to have an extra ret variable when we directly can return the value of sctp_get_port_local(). Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ba9359c20c8a..66fcdcfe1b74 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6036,7 +6036,6 @@ fail: */ static int sctp_get_port(struct sock *sk, unsigned short snum) { - long ret; union sctp_addr addr; struct sctp_af *af = sctp_sk(sk)->pf->af; @@ -6045,9 +6044,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) addr.v4.sin_port = htons(snum); /* Note: sk->sk_num gets filled in if ephemeral port request. */ - ret = sctp_get_port_local(sk, &addr); - - return ret ? 1 : 0; + return !!sctp_get_port_local(sk, &addr); } /* -- cgit From 2d48d67fa8cd129ea85ea02d91b4a793286866f8 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 24 Jun 2013 10:28:03 +0300 Subject: net: poll/select low latency socket support select/poll busy-poll support. Split sysctl value into two separate ones, one for read and one for poll. updated Documentation/sysctl/net.txt Add a new poll flag POLL_LL. When this flag is set, sock_poll will call sk_poll_ll if possible. sock_poll sets this flag in its return value to indicate to select/poll when a socket that can busy poll is found. When poll/select have nothing to report, call the low-level sock_poll again until we are out of time or we find something. Once the system call finds something, it stops setting POLL_LL, so it can return the result to the user ASAP. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- net/core/sysctl_net_core.c | 8 ++++++++ net/socket.c | 14 +++++++++++++- 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 1e744b12fda3..b6c619f4d47b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2307,7 +2307,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_LL_RX_POLL sk->sk_napi_id = 0; - sk->sk_ll_usec = sysctl_net_ll_poll; + sk->sk_ll_usec = sysctl_net_ll_read; #endif /* diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 62702c2053de..afc677eadd93 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -306,6 +306,14 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "low_latency_read", + .data = &sysctl_net_ll_read, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +# #endif #endif /* CONFIG_NET */ { diff --git a/net/socket.c b/net/socket.c index 3eec3f76b49c..4da14cbd49b6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -107,6 +107,7 @@ #include #ifdef CONFIG_NET_LL_RX_POLL +unsigned int sysctl_net_ll_read __read_mostly; unsigned int sysctl_net_ll_poll __read_mostly; #endif @@ -1147,13 +1148,24 @@ EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) { + unsigned int ll_flag = 0; struct socket *sock; /* * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; - return sock->ops->poll(file, sock, wait); + + if (sk_valid_ll(sock->sk)) { + /* this socket can poll_ll so tell the system call */ + ll_flag = POLL_LL; + + /* once, only if requested by syscall */ + if (wait && (wait->_key & POLL_LL)) + sk_poll_ll(sock->sk, 1); + } + + return ll_flag | sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) -- cgit From 537f7f8494be4219eb0ef47121ea16a6f9f0f49e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Jun 2013 09:34:36 -0700 Subject: bridge: check for zero ether address in fdb add The check for all-zero ether address was removed from rtnetlink core, since Vxlan uses all-zero ether address to signify default address. Need to add check back in for bridge. Signed-off-by: Stephen Hemminger --- net/bridge/br_fdb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ebfa4443c69b..60aca9109a50 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -707,6 +707,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } } + if (is_zero_ether_addr(addr)) { + pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); + return -EINVAL; + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", -- cgit From bba54de5bdd107d3841b560f1a9cb0ed06e79533 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 16 Jun 2013 09:09:36 +0300 Subject: ipvs: provide iph to schedulers Before now the schedulers needed access only to IP addresses and it was easy to get them from skb by using ip_vs_fill_iph_addr_only. New changes for the SH scheduler will need the protocol and ports which is difficult to get from skb for the IPv6 case. As we have all the data in the iph structure, to avoid the same slow lookups provide the iph to schedulers. Signed-off-by: Julian Anastasov Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 4 ++-- net/netfilter/ipvs/ip_vs_dh.c | 10 ++++------ net/netfilter/ipvs/ip_vs_lblc.c | 12 +++++------- net/netfilter/ipvs/ip_vs_lblcr.c | 12 +++++------- net/netfilter/ipvs/ip_vs_lc.c | 3 ++- net/netfilter/ipvs/ip_vs_nq.c | 3 ++- net/netfilter/ipvs/ip_vs_rr.c | 3 ++- net/netfilter/ipvs/ip_vs_sed.c | 3 ++- net/netfilter/ipvs/ip_vs_sh.c | 10 ++++------ net/netfilter/ipvs/ip_vs_wlc.c | 3 ++- net/netfilter/ipvs/ip_vs_wrr.c | 3 ++- 11 files changed, 32 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 05565d2b3a61..e9b0330f220d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -305,7 +305,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -452,7 +452,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index ccab120df45e..c3b84546ea9e 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -214,18 +214,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Destination hashing scheduling */ static struct ip_vs_dest * -ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_dh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); s = (struct ip_vs_dh_state *) svc->sched_data; - dest = ip_vs_dh_get(svc->af, s, &iph.daddr); + dest = ip_vs_dh_get(svc->af, s, &iph->daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -235,7 +233,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 44595b8ae37f..1383b0eadc0e 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -487,19 +487,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblc_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest = NULL; struct ip_vs_lblc_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr); if (en) { /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -529,12 +527,12 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblc_new(tbl, &iph.daddr, dest); + ip_vs_lblc_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 876937db0bf4..3cd85b2fc67c 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -655,19 +655,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblcr_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_lblcr_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr); if (en) { en->lastuse = jiffies; @@ -718,12 +716,12 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblcr_new(tbl, &iph.daddr, dest); + ip_vs_lblcr_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 5128e338a749..2bdcb1cf2127 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -26,7 +26,8 @@ * Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 646cfd4baa73..d8d9860934fe 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -55,7 +55,8 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index c35986c793d9..176b87c35e34 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -55,7 +55,8 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) * Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index f3205925359a..a5284cc3d882 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -59,7 +59,8 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index a65edfe4b16c..e0d5d1653566 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -227,18 +227,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Source Hashing scheduling */ static struct ip_vs_dest * -ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); s = (struct ip_vs_sh_state *) svc->sched_data; - dest = ip_vs_sh_get(svc->af, s, &iph.saddr); + dest = ip_vs_sh_get(svc->af, s, &iph->saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -248,7 +246,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.saddr), + IP_VS_DBG_ADDR(svc->af, &iph->saddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index c60a81c4ce9a..6dc1fa128840 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -31,7 +31,8 @@ * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 0e68555bceb9..0546cd572d6b 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -162,7 +162,8 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, * Weighted Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *last, *stop = NULL; struct ip_vs_wrr_mark *mark = svc->sched_data; -- cgit From c6c96c188336b2b95d5f14facd101f1e4165a9d3 Mon Sep 17 00:00:00 2001 From: Alexander Frolkin Date: Thu, 13 Jun 2013 08:56:15 +0100 Subject: ipvs: sloppy TCP and SCTP This adds support for sloppy TCP and SCTP modes to IPVS. When enabled (sysctls net.ipv4.vs.sloppy_tcp and net.ipv4.vs.sloppy_sctp), allows IPVS to create connection state on any packet, not just a TCP SYN (or SCTP INIT). This allows connections to fail over from one IPVS director to another mid-flight. Signed-off-by: Alexander Frolkin Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 14 ++++++++++++++ net/netfilter/ipvs/ip_vs_proto_sctp.c | 18 ++++++++++-------- net/netfilter/ipvs/ip_vs_proto_tcp.c | 14 ++++++++------ 3 files changed, 32 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 47e510819f54..da035fc01eb2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1738,6 +1738,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sloppy_tcp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sloppy_sctp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "expire_quiescent_template", .maxlen = sizeof(int), @@ -3723,6 +3735,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_sync_sock_size; tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; + tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 86464881cd20..df29d6417043 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct net *net; struct ip_vs_service *svc; + struct netns_ipvs *ipvs; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + ipvs = net_ipvs(net); rcu_read_lock(); - if ((sch->type == SCTP_CID_INIT) && + if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, sh->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -232,21 +234,21 @@ static struct ipvs_sctp_nextstate * STATE : IP_VS_SCTP_S_NONE */ /*next state *//*event */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 50a15944c6c1..e3a697234a98 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; + struct netns_ipvs *ipvs; th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) { @@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); + ipvs = net_ipvs(net); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); - if (th->syn && + if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, th->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }}, /* OUTPUT */ @@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; @@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }}, -/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, +/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /* OUTPUT */ @@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; -- cgit From 61e7c420b4b2a797ac209106ba743ab6ebe984d8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 18 Jun 2013 10:08:07 +0300 Subject: ipvs: replace the SCTP state machine Convert the SCTP state table, so that it is more readable. Change the states to be according to the diagram in RFC 2960 and add more states suitable for middle box. Still, such change in states adds incompatibility if systems in sync setup include this change and others do not include it. With this change we also have proper transitions in INPUT-ONLY mode (DR/TUN) where we see packets only from client. Now we should not switch to 10-second CLOSED state at a time when we should stay in ESTABLISHED state. The short names for states are because we have 16-char space in ipvsadm and 11-char limit for the connection list format. It is a sequence of the TCP implementation where the longest state name is ESTABLISHED. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 854 ++++++---------------------------- net/netfilter/ipvs/ip_vs_sync.c | 7 +- 2 files changed, 157 insertions(+), 704 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index df29d6417043..3c0da8728036 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -185,710 +185,159 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) return 1; } -struct ipvs_sctp_nextstate { - int next_state; -}; enum ipvs_sctp_event_t { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_SER, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_SER, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_INIT_ACK_SER, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_SER, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_SER, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE__ABORT_SER, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_SER, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_SER, - IP_VS_SCTP_EVE_SHUT_COM_CLI, - IP_VS_SCTP_EVE_SHUT_COM_SER, - IP_VS_SCTP_EVE_LAST + IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ + IP_VS_SCTP_INIT, + IP_VS_SCTP_INIT_ACK, + IP_VS_SCTP_COOKIE_ECHO, + IP_VS_SCTP_COOKIE_ACK, + IP_VS_SCTP_SHUTDOWN, + IP_VS_SCTP_SHUTDOWN_ACK, + IP_VS_SCTP_SHUTDOWN_COMPLETE, + IP_VS_SCTP_ERROR, + IP_VS_SCTP_ABORT, + IP_VS_SCTP_EVENT_LAST }; -static enum ipvs_sctp_event_t sctp_events[256] = { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_SHUT_COM_CLI, +/* RFC 2960, 3.2 Chunk Field Descriptions */ +static __u8 sctp_events[] = { + [SCTP_CID_DATA] = IP_VS_SCTP_DATA, + [SCTP_CID_INIT] = IP_VS_SCTP_INIT, + [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, + [SCTP_CID_SACK] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, + [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, + [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, + [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, + [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, + [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, + [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, + [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, + [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, + [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, }; -static struct ipvs_sctp_nextstate - sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = { - /* - * STATE : IP_VS_SCTP_S_NONE - */ - /*next state *//*event */ - {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, - }, - /* - * STATE : IP_VS_SCTP_S_INIT_CLI - * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT) - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_SER - * Server sent INIT and waiting for INIT ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_CLI - * Client sent INIT ACK and waiting for ECHO from the server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been resent by the client, let us stay is in - * the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * ECHO by client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO by server, this is what we are expecting, move to ECHO_SER - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * Unexpected COOKIE ACK from server, staty in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_SER - * Server sent INIT ACK and waiting for ECHO from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * Unexpected INIT_ACK by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK resent by the server, let us move to same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client send the ECHO, this is what we are expecting, - * move to ECHO_CLI - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, let us stay in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, hmm... this should not happen, lets close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_CLI - * Cient sent ECHO and waiting COOKEI ACK from the Server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client resent the ECHO, let us stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this shoud not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this is what we are awaiting,lets move to - * ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_SER - * Server sent ECHO and waiting COOKEI ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK has been by the server, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent the ECHO, not sure what to do, let's close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO resent by the server, stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this is what we are expecting, let's move - * to ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this should not happen, lets close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ESTABLISHED - * Association established - */ - {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_CLI - * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this is what we are expecting, let's move - * to SHUDOWN_ACK_SER - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_SER - * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN resent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this is what we are expecting, let's - * move to SHUT_ACK_CLI - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_CLI - * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client resent SHUDTDOWN_ACK, let's stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this is what we are expecting. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_SER - * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ +/* SCTP States: + * See RFC 2960, 4. SCTP Association State Diagram + * + * New states (not in diagram): + * - INIT1 state: use shorter timeout for dropped INIT packets + * - REJECTED state: use shorter timeout if INIT is rejected with ABORT + * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging + * + * The states are as seen in real server. In the diagram, INIT1, INIT, + * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. + * + * States as per packets from client (C) and server (S): + * + * Setup of client connection: + * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK + * + * Setup of server connection: + * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK + * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK + */ - {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen let's close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server resent SHUTDOWN ACK, stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this what we are expecting, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this should not happen. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_CLOSED - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - } +#define sNO IP_VS_SCTP_S_NONE +#define sI1 IP_VS_SCTP_S_INIT1 +#define sIN IP_VS_SCTP_S_INIT +#define sCS IP_VS_SCTP_S_COOKIE_SENT +#define sCR IP_VS_SCTP_S_COOKIE_REPLIED +#define sCW IP_VS_SCTP_S_COOKIE_WAIT +#define sCO IP_VS_SCTP_S_COOKIE +#define sCE IP_VS_SCTP_S_COOKIE_ECHOED +#define sES IP_VS_SCTP_S_ESTABLISHED +#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT +#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED +#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT +#define sRJ IP_VS_SCTP_S_REJECTED +#define sCL IP_VS_SCTP_S_CLOSED + +static const __u8 sctp_states + [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { + { /* INPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* OUTPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, +/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, +/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* INPUT-ONLY */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, }; -/* - * Timeout table[state] - */ +#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) + +/* Timeout table[state] */ static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = 2 * HZ, - [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_CLOSED] = 10 * HZ, - [IP_VS_SCTP_S_LAST] = 2 * HZ, + [IP_VS_SCTP_S_NONE] = 2 * HZ, + [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, + [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_LAST] = 2 * HZ, }; static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = "NONE", - [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI", - [IP_VS_SCTP_S_INIT_SER] = "INIT_SER", - [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI", - [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER", - [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI", - [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER", - [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED", - [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI", - [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER", - [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI", - [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER", - [IP_VS_SCTP_S_CLOSED] = "CLOSED", - [IP_VS_SCTP_S_LAST] = "BUG!" + [IP_VS_SCTP_S_NONE] = "NONE", + [IP_VS_SCTP_S_INIT1] = "INIT1", + [IP_VS_SCTP_S_INIT] = "INIT", + [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", + [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", + [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", + [IP_VS_SCTP_S_COOKIE] = "COOKIE", + [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", + [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", + [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", + [IP_VS_SCTP_S_REJECTED] = "REJECTED", + [IP_VS_SCTP_S_CLOSED] = "CLOSED", + [IP_VS_SCTP_S_LAST] = "BUG!", }; @@ -945,17 +394,20 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, } } - event = sctp_events[chunk_type]; + event = (chunk_type < sizeof(sctp_events)) ? + sctp_events[chunk_type] : IP_VS_SCTP_DATA; - /* - * If the direction is IP_VS_DIR_OUTPUT, this event is from server - */ - if (direction == IP_VS_DIR_OUTPUT) - event++; - /* - * get next state + /* Update direction to INPUT_ONLY if necessary + * or delete NO_OUTPUT flag if output packet detected */ - next_state = sctp_states_table[cp->state][event].next_state; + if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { + if (direction == IP_VS_DIR_OUTPUT) + cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; + else + direction = IP_VS_DIR_INPUT_ONLY; + } + + next_state = sctp_states[direction][event][cp->state]; if (next_state != cp->state) { struct ip_vs_dest *dest = cp->dest; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f6046d9af8d3..2fc66394d86d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -461,9 +461,10 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { if (!((1 << cp->state) & ((1 << IP_VS_SCTP_S_ESTABLISHED) | - (1 << IP_VS_SCTP_S_CLOSED) | - (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) | - (1 << IP_VS_SCTP_S_SHUT_ACK_SER)))) + (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | + (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | + (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | + (1 << IP_VS_SCTP_S_CLOSED)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) -- cgit From acaac5d8bbedf6bd96f53960780942e1ad90d70e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 18 Jun 2013 10:08:08 +0300 Subject: ipvs: drop SCTP connections depending on state Drop SCTP connections under load (dropentry context) depending on the protocol state, just like for TCP: INIT conns are dropped immediately, established are dropped randomly while connections in progress or shutdown are skipped. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c8c52a98590b..4c8e5c0aa1ab 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1231,6 +1231,18 @@ void ip_vs_random_dropentry(struct net *net) default: continue; } + } else if (cp->protocol == IPPROTO_SCTP) { + switch (cp->state) { + case IP_VS_SCTP_S_INIT1: + case IP_VS_SCTP_S_INIT: + break; + case IP_VS_SCTP_S_ESTABLISHED: + if (todrop_entry(cp)) + break; + continue; + default: + continue; + } } else { if (!todrop_entry(cp)) continue; -- cgit From eba3b5a78799d21dea05118b294524958f0ab592 Mon Sep 17 00:00:00 2001 From: Alexander Frolkin Date: Wed, 19 Jun 2013 10:54:25 +0100 Subject: ipvs: SH fallback and L4 hashing By default the SH scheduler rejects connections that are hashed onto a realserver of weight 0. This patch adds a flag to make SH choose a different realserver in this case, instead of rejecting the connection. The patch also adds a flag to make SH include the source port (TCP, UDP, SCTP) in the hash as well as the source address. This basically allows for deterministic round-robin load balancing (i.e., where any director in a cluster of directors with identical config will send the same packet the same way). The flags are service flags (IP_VS_SVC_F_SCHED*) so that these options can be set per service. They are set using a new option to ipvsadm. Signed-off-by: Alexander Frolkin Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sh.c | 100 +++++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index e0d5d1653566..f16c027df15b 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -48,6 +48,10 @@ #include +#include +#include +#include + /* * IPVS SH bucket @@ -71,10 +75,19 @@ struct ip_vs_sh_state { struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; +/* Helper function to determine if server is unavailable */ +static inline bool is_unavailable(struct ip_vs_dest *dest) +{ + return atomic_read(&dest->weight) <= 0 || + dest->flags & IP_VS_DEST_F_OVERLOAD; +} + /* * Returns hash value for IPVS SH entry */ -static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) +static inline unsigned int +ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, + __be16 port, unsigned int offset) { __be32 addr_fold = addr->ip; @@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif - return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; + return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & + IP_VS_SH_TAB_MASK; } @@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) +ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) { - return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); + unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); + + return (!dest || is_unavailable(dest)) ? NULL : dest; } +/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +static inline struct ip_vs_dest * +ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) +{ + unsigned int offset; + unsigned int hash; + struct ip_vs_dest *dest; + + for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { + hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + dest = rcu_dereference(s->buckets[hash].dest); + if (!dest) + break; + if (is_unavailable(dest)) + IP_VS_DBG_BUF(6, "SH: selected unavailable server " + "%s:%d (offset %d)", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), offset); + else + return dest; + } + + return NULL; +} + /* * Assign all the hash buckets of the specified table with the service. */ @@ -213,13 +257,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, } -/* - * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, - * consider that the server is overloaded here. - */ -static inline int is_overloaded(struct ip_vs_dest *dest) +/* Helper function to get port number */ +static inline __be16 +ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - return dest->flags & IP_VS_DEST_F_OVERLOAD; + __be16 port; + struct tcphdr _tcph, *th; + struct udphdr _udph, *uh; + sctp_sctphdr_t _sctph, *sh; + + switch (iph->protocol) { + case IPPROTO_TCP: + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + port = th->source; + break; + case IPPROTO_UDP: + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + port = uh->source; + break; + case IPPROTO_SCTP: + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + port = sh->source; + break; + default: + port = 0; + } + + return port; } @@ -232,15 +296,21 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; + __be16 port = 0; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); + if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) + port = ip_vs_sh_get_port(skb, iph); + s = (struct ip_vs_sh_state *) svc->sched_data; - dest = ip_vs_sh_get(svc->af, s, &iph->saddr); - if (!dest - || !(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest)) { + + if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) + dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + else + dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + + if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } -- cgit From 4d0c875dcc4923476f364e83912d134da2df224c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 24 Jun 2013 22:44:41 +0300 Subject: ipvs: add sync_persist_mode flag Add sync_persist_mode flag to reduce sync traffic by syncing only persistent templates. Signed-off-by: Julian Anastasov Tested-by: Aleksey Chudov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++++++ net/netfilter/ipvs/ip_vs_sync.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index da035fc01eb2..c8148e487386 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1714,6 +1714,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = &proc_do_sync_ports, }, + { + .procname = "sync_persist_mode", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sync_qlen_max", .maxlen = sizeof(unsigned long), @@ -3729,6 +3735,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_sync_ver; ipvs->sysctl_sync_ports = 1; tbl[idx++].data = &ipvs->sysctl_sync_ports; + tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; ipvs->sysctl_sync_sock_size = 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 2fc66394d86d..f4484719f3e6 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -425,6 +425,16 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) return sb; } +/* Check if connection is controlled by persistence */ +static inline bool in_persistence(struct ip_vs_conn *cp) +{ + for (cp = cp->control; cp; cp = cp->control) { + if (cp->flags & IP_VS_CONN_F_TEMPLATE) + return true; + } + return false; +} + /* Check if conn should be synced. * pkts: conn packets, use sysctl_sync_threshold to avoid packet check * - (1) sync_refresh_period: reduce sync rate. Additionally, retry @@ -447,6 +457,8 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, /* Check if we sync in current state */ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) force = 0; + else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) + return 0; else if (likely(cp->protocol == IPPROTO_TCP)) { if (!((1 << cp->state) & ((1 << IP_VS_TCP_S_ESTABLISHED) | -- cgit From 963b89e80d9fb7f22fc2688428e121b410b76504 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 17:40:33 +0200 Subject: sit: fix 4in4 + IPsec scenario Since commit 32b8a8e59c9c "sit: add IPv4 over IPv4 support", tunnel->parms.iph.protocol is 0 when both 4in4 and 6in4 are setup, but xfrm_lookup() is called only when proto is != 0, thus we need to pass the real value. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd227e5ea9da..3b00d81c8f1e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -542,7 +542,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } rt = ip_route_output_tunnel(dev_net(dev), &fl4, - tunnel->parms.iph.protocol, + protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), -- cgit From 77ecaace6c5487eae8ede633ad51478511a8e125 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 26 Jun 2013 03:41:49 +0200 Subject: ipv6: rearm router solicitaion timer when setting new tokenized address When a new tokenized address gets installed we send out just one router solicition. We should send out `rtr_solicits' in case one router advertisment got lost. So, rearm the timer as we do in addrconf_dad_complete. Cc: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index afaf3cdadf58..4e4cc1fc26d1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4361,8 +4361,11 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) write_lock_bh(&idev->lock); - if (update_rs) + if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_probes = 1; + addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + } /* Well, that's kinda nasty ... */ list_for_each_entry(ifp, &idev->addr_list, if_list) { -- cgit From 621e84d6f373dcb273ebfd772638b8e7dc3c2c48 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 16:11:27 +0200 Subject: dev: introduce skb_scrub_packet() The goal of this new function is to perform all needed cleanup before sending an skb into another netns. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/dev.c | 11 +---------- net/core/skbuff.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 722f633926e0..370354a9c5f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1652,22 +1652,13 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } } - skb_orphan(skb); - if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; } - skb->skb_iif = 0; - skb_dst_drop(skb); - skb->tstamp.tv64 = 0; - skb->pkt_type = PACKET_HOST; + skb_scrub_packet(skb); skb->protocol = eth_type_trans(skb, dev); - skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); - nf_reset_trace(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f73eca29fbe..b1fcb8727e56 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3492,3 +3492,26 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } EXPORT_SYMBOL(skb_try_coalesce); + +/** + * skb_scrub_packet - scrub an skb before sending it to another netns + * + * @skb: buffer to clean + * + * skb_scrub_packet can be used to clean an skb before injecting it in + * another namespace. We have to clear all information in the skb that + * could impact namespace isolation. + */ +void skb_scrub_packet(struct sk_buff *skb) +{ + skb_orphan(skb); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->skb_iif = 0; + skb_dst_drop(skb); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + nf_reset_trace(skb); +} +EXPORT_SYMBOL_GPL(skb_scrub_packet); -- cgit From 5e6700b3bf98fe98d630bf9c939ad4c85ce95592 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 16:11:28 +0200 Subject: sit: add support of x-netns This patch allows to switch the netns when packet is encapsulated or decapsulated. In other word, the encapsulated packet is received in a netns, where the lookup is done to find the tunnel. Once the tunnel is found, the packet is decapsulated and injecting into the corresponding interface which stands to another netns. When one of the two netns is removed, the tunnel is destroyed. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 10 +++++++++- net/ipv6/sit.c | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3b00d81c8f1e..394cebc96d22 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -304,6 +304,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, tunnel = netdev_priv(dev); tunnel->parms = *parms; + tunnel->net = net; err = register_netdevice(dev); if (err) @@ -453,6 +454,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); + if (tunnel->dev->type == ARPHRD_ETHER) { skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -541,7 +545,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } - rt = ip_route_output_tunnel(dev_net(dev), &fl4, + rt = ip_route_output_tunnel(tunnel->net, &fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, @@ -602,6 +606,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } #endif + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); + if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { @@ -888,6 +895,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], if (ip_tunnel_find(itn, p, dev->type)) return -EEXIST; + nt->net = net; nt->parms = *p; err = register_netdevice(dev); if (err) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f639866b3dcf..97a0bfe2c293 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -466,14 +466,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) static void ipip6_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct sit_net *sitn = net_generic(tunnel->net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { - ipip6_tunnel_unlink(sitn, netdev_priv(dev)); - ipip6_tunnel_del_prl(netdev_priv(dev), NULL); + ipip6_tunnel_unlink(sitn, tunnel); + ipip6_tunnel_del_prl(tunnel, NULL); } dev_put(dev); } @@ -621,6 +621,8 @@ static int ipip6_rcv(struct sk_buff *skb) tstats->rx_packets++; tstats->rx_bytes += skb->len; + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); netif_rx(skb); return 0; @@ -803,7 +805,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + rt = ip_route_output_ports(tunnel->net, &fl4, NULL, dst, tiph->saddr, 0, 0, IPPROTO_IPV6, RT_TOS(tos), @@ -858,6 +860,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tunnel->err_count = 0; } + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); + /* * Okay, now see if we can stuff it in the buffer as-is. */ @@ -944,7 +949,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4, + NULL, iph->daddr, iph->saddr, 0, 0, IPPROTO_IPV6, @@ -959,7 +965,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -972,7 +978,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) { - struct net *net = dev_net(t->dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); ipip6_tunnel_unlink(sitn, t); @@ -1248,7 +1254,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; } @@ -1257,6 +1262,7 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1277,6 +1283,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; @@ -1564,8 +1571,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { + struct net *net = dev_net(sitn->fb_tunnel_dev); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &sit_link_ops) + unregister_netdevice_queue(dev, head); + for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1573,7 +1586,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (dev_net(t->dev) != net) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1598,6 +1616,10 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); if (err) -- cgit From 3a36515f729458c8efa0c124c7262d5843ad5c37 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Fri, 28 Jun 2013 03:04:23 +0200 Subject: netlink: fix splat in skb_clone with large messages Since (c05cdb1 netlink: allow large data transfers from user-space), netlink splats if it invokes skb_clone on large netlink skbs since: * skb_shared_info was not correctly initialized. * skb->destructor is not set in the cloned skb. This was spotted by trinity: [ 894.990671] BUG: unable to handle kernel paging request at ffffc9000047b001 [ 894.991034] IP: [] skb_clone+0x24/0xc0 [...] [ 894.991034] Call Trace: [ 894.991034] [] nl_fib_input+0x6a/0x240 [ 894.991034] [] ? _raw_read_unlock+0x26/0x40 [ 894.991034] [] netlink_unicast+0x169/0x1e0 [ 894.991034] [] netlink_sendmsg+0x251/0x3d0 Fix it by: 1) introducing a new netlink_skb_clone function that is used in nl_fib_input, that sets our special skb->destructor in the cloned skb. Moreover, handle the release of the large cloned skb head area in the destructor path. 2) not allowing large skbuffs in the netlink broadcast path. I cannot find any reasonable use of the large data transfer using netlink in that path, moreover this helps to skip extra skb_clone handling. I found two more netlink clients that are cloning the skbs, but they are not in the sendmsg path. Therefore, the sole client cloning that I found seems to be the fib frontend. Thanks to Eric Dumazet for helping to address this issue. Reported-by: Fengguang Wu Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- net/netlink/af_netlink.c | 35 ++++++++++++++++++----------------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 05a4888dede9..b3f627ac4ed8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -961,7 +961,7 @@ static void nl_fib_input(struct sk_buff *skb) nlmsg_len(nlh) < sizeof(*frn)) return; - skb = skb_clone(skb, GFP_KERNEL); + skb = netlink_skb_clone(skb, GFP_KERNEL); if (skb == NULL) return; nlh = nlmsg_hdr(skb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6967fbcca6c5..0c61b59175dc 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -849,7 +849,10 @@ static void netlink_skb_destructor(struct sk_buff *skb) } #endif if (is_vmalloc_addr(skb->head)) { - vfree(skb->head); + if (!skb->cloned || + !atomic_dec_return(&(skb_shinfo(skb)->dataref))) + vfree(skb->head); + skb->head = NULL; } if (skb->sk != NULL) @@ -1532,33 +1535,31 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } -static struct sk_buff *netlink_alloc_large_skb(unsigned int size) +static struct sk_buff *netlink_alloc_large_skb(unsigned int size, + int broadcast) { struct sk_buff *skb; void *data; - if (size <= NLMSG_GOODSIZE) + if (size <= NLMSG_GOODSIZE || broadcast) return alloc_skb(size, GFP_KERNEL); - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) - return NULL; + size = SKB_DATA_ALIGN(size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); data = vmalloc(size); if (data == NULL) - goto err; + return NULL; - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - skb->destructor = netlink_skb_destructor; + skb = build_skb(data, size); + if (skb == NULL) + vfree(data); + else { + skb->head_frag = 0; + skb->destructor = netlink_skb_destructor; + } return skb; -err: - kfree_skb(skb); - return NULL; } /* @@ -2244,7 +2245,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = netlink_alloc_large_skb(len); + skb = netlink_alloc_large_skb(len, dst_group); if (skb == NULL) goto out; -- cgit From 1ec047eb4751e331bc61cff0e98f0db67db8b8dc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 00:06:56 +0200 Subject: ipv6: introduce per-interface counter for dad-completed ipv6 addresses To reduce the number of unnecessary router solicitations, MLDv2 and IGMPv3 messages we need to track the number of valid (as in non-optimistic, no-dad-failed and non-tentative) link-local addresses. Therefore, this patch implements a valid_ll_addr_cnt in struct inet6_dev. We now only emit router solicitations if the first link-local address finishes duplicate address detection. The changes for MLDv2 and IGMPv3 are in a follow-up patch. While there, also simplify one if statement(one minor nit I made in one of my previous patches): if (!...) do(); else return; <> if (...) return; do(); Cc: Flavio Leitner Cc: YOSHIFUJI Hideaki Cc: David Stevens Suggested-by: David Stevens Signed-off-by: Hannes Frederic Sowa Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4e4cc1fc26d1..20d92ff2d690 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3277,6 +3277,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; + bool send_rs; addrconf_del_dad_timer(ifp); @@ -3290,20 +3291,25 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) router advertisements, start sending router solicitations. */ - if (ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + read_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + send_rs = ipv6_accept_ra(ifp->idev) && + ifp->idev->cnf.rtr_solicits > 0 && + (dev->flags&IFF_LOOPBACK) == 0 && + ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && + ifp->idev->valid_ll_addr_cnt == 1; + spin_unlock(&ifp->lock); + read_unlock_bh(&ifp->idev->lock); + + if (send_rs) { /* * If a host as already performed a random delay * [...] as part of DAD [...] there is no need * to delay again before sending the first RS */ - if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) - ndisc_send_rs(dev, &lladdr, - &in6addr_linklocal_allrouters); - else + if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) return; + ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters); write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); @@ -4576,6 +4582,19 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); } +static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count) +{ + write_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC| + IFA_F_DADFAILED)) == IFA_F_PERMANENT) && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) + ifp->idev->valid_ll_addr_cnt += count; + WARN_ON(ifp->idev->valid_ll_addr_cnt < 0); + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); +} + static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); @@ -4584,6 +4603,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: + update_valid_ll_addr_cnt(ifp, 1); + /* * If the address was optimistic * we inserted the route at the start of @@ -4599,6 +4620,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ifp->idev->dev, 0, 0); break; case RTM_DELADDR: + update_valid_ll_addr_cnt(ifp, -1); + if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); -- cgit From b173ee488dcc545e77ed482158a2f0d06d7a5860 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 00:07:01 +0200 Subject: ipv6: resend MLD report if a link-local address completes DAD RFC3590/RFC3810 specifies we should resend MLD reports as soon as a valid link-local address is available. We now use the valid_ll_addr_cnt to check if it is necessary to resend a new report. Changes since Flavio Leitner's version: a) adapt for valid_ll_addr_cnt b) resend first reports directly in the path and just arm the timer for mc_qrv-1 resends. Reported-by: Flavio Leitner Cc: Hideaki YOSHIFUJI Cc: David Stevens Signed-off-by: Hannes Frederic Sowa Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 17 ++++++++++++----- net/ipv6/mcast.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 20d92ff2d690..12dd2fec045c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3277,7 +3277,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; - bool send_rs; + bool send_rs, send_mld; addrconf_del_dad_timer(ifp); @@ -3293,14 +3293,21 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) read_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); - send_rs = ipv6_accept_ra(ifp->idev) && + send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && + ifp->idev->valid_ll_addr_cnt == 1; + send_rs = send_mld && + ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && - ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && - ifp->idev->valid_ll_addr_cnt == 1; + (dev->flags&IFF_LOOPBACK) == 0; spin_unlock(&ifp->lock); read_unlock_bh(&ifp->idev->lock); + /* While dad is in progress mld report's source address is in6_addrany. + * Resend with proper ll now. + */ + if (send_mld) + ipv6_mc_dad_complete(ifp->idev); + if (send_rs) { /* * If a host as already performed a random delay diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 72c8bfe06bb4..502c877cbf10 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -999,6 +999,14 @@ static void mld_ifc_start_timer(struct inet6_dev *idev, int delay) in6_dev_hold(idev); } +static void mld_dad_start_timer(struct inet6_dev *idev, int delay) +{ + int tv = net_random() % delay; + + if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) + in6_dev_hold(idev); +} + /* * IGMP handling (alias multicast ICMPv6 messages) */ @@ -1815,6 +1823,46 @@ err_out: goto out; } +static void mld_resend_report(struct inet6_dev *idev) +{ + if (MLD_V1_SEEN(idev)) { + struct ifmcaddr6 *mcaddr; + read_lock_bh(&idev->lock); + for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { + if (!(mcaddr->mca_flags & MAF_NOREPORT)) + igmp6_send(&mcaddr->mca_addr, idev->dev, + ICMPV6_MGM_REPORT); + } + read_unlock_bh(&idev->lock); + } else { + mld_send_report(idev, NULL); + } +} + +void ipv6_mc_dad_complete(struct inet6_dev *idev) +{ + idev->mc_dad_count = idev->mc_qrv; + if (idev->mc_dad_count) { + mld_resend_report(idev); + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } +} + +static void mld_dad_timer_expire(unsigned long data) +{ + struct inet6_dev *idev = (struct inet6_dev *)data; + + mld_resend_report(idev); + if (idev->mc_dad_count) { + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } + __in6_dev_put(idev); +} + static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) { @@ -2232,6 +2280,8 @@ void ipv6_mc_down(struct inet6_dev *idev) idev->mc_gq_running = 0; if (del_timer(&idev->mc_gq_timer)) __in6_dev_put(idev); + if (del_timer(&idev->mc_dad_timer)) + __in6_dev_put(idev); for (i = idev->mc_list; i; i=i->next) igmp6_group_dropped(i); @@ -2268,6 +2318,8 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) idev->mc_ifc_count = 0; setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, (unsigned long)idev); + setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, + (unsigned long)idev); idev->mc_qrv = MLD_QRV_DEFAULT; idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; idev->mc_v1_seen = 0; -- cgit From 2ffae99d1fac272952b5a395759823717760ce37 Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Thu, 27 Jun 2013 10:27:05 +0300 Subject: ipv4: use next hop exceptions also for input routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d2d68ba9 (ipv4: Cache input routes in fib_info nexthops) assmued that "locally destined, and routed packets, never trigger PMTU events or redirects that will be processed by us". However, it seems that tunnel devices do trigger PMTU events in certain cases. At least ip_gre, ip6_gre, sit, and ipip do use the inner flow's skb_dst(skb)->ops->update_pmtu to propage mtu information from the outer flows. These can cause the inner flow mtu to be decreased. If next hop exceptions are not consulted for pmtu, IP fragmentation will not be done properly for these routes. It also seems that we really need to have the PMTU information always for netfilter TCPMSS clamp-to-pmtu feature to work properly. So for the time being, cache separate copies of input routes for each next hop exception. Signed-off-by: Timo Teräs Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 3 ++- net/ipv4/route.c | 65 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8f6cb7a87cd6..d5dbca5ecf62 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -169,7 +169,8 @@ static void free_nh_exceptions(struct fib_nh *nh) next = rcu_dereference_protected(fnhe->fnhe_next, 1); - rt_fibinfo_free(&fnhe->fnhe_rth); + rt_fibinfo_free(&fnhe->fnhe_rth_input); + rt_fibinfo_free(&fnhe->fnhe_rth_output); kfree(fnhe); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f3fa42eac461..a9a54a236832 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -565,10 +565,25 @@ static inline void rt_free(struct rtable *rt) static DEFINE_SPINLOCK(fnhe_lock); +static void fnhe_flush_routes(struct fib_nh_exception *fnhe) +{ + struct rtable *rt; + + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); + rt_free(rt); + } + rt = rcu_dereference(fnhe->fnhe_rth_output); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); + rt_free(rt); + } +} + static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; - struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -576,11 +591,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } - orig = rcu_dereference(oldest->fnhe_rth); - if (orig) { - RCU_INIT_POINTER(oldest->fnhe_rth, NULL); - rt_free(orig); - } + fnhe_flush_routes(oldest); return oldest; } @@ -644,7 +655,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_expires = max(1UL, expires); } /* Update all cached dsts too */ - rt = rcu_dereference(fnhe->fnhe_rth); + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) + fill_route_from_fnhe(rt, fnhe); + rt = rcu_dereference(fnhe->fnhe_rth_output); if (rt) fill_route_from_fnhe(rt, fnhe); } else { @@ -668,6 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ + rt = rcu_dereference(nh->nh_rth_input); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + for_each_possible_cpu(i) { struct rtable __rcu **prt; prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); @@ -1242,25 +1260,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { + struct rtable __rcu **porig; + struct rtable *orig; int genid = fnhe_genid(dev_net(rt->dst.dev)); - struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); + + if (rt_is_input_route(rt)) + porig = &fnhe->fnhe_rth_input; + else + porig = &fnhe->fnhe_rth_output; + orig = rcu_dereference(*porig); if (fnhe->fnhe_genid != genid) { fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; + fnhe_flush_routes(fnhe); + orig = NULL; } fill_route_from_fnhe(rt, fnhe); if (!rt->rt_gateway) rt->rt_gateway = daddr; - rcu_assign_pointer(fnhe->fnhe_rth, rt); - if (orig) - rt_free(orig); + if (!(rt->dst.flags & DST_NOCACHE)) { + rcu_assign_pointer(*porig, rt); + if (orig) + rt_free(orig); + ret = true; + } fnhe->fnhe_stamp = jiffies; - ret = true; } spin_unlock_bh(&fnhe_lock); @@ -1492,6 +1521,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { + struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; @@ -1538,8 +1568,13 @@ static int __mkroute_input(struct sk_buff *skb, } } + fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (fnhe != NULL) + rth = rcu_dereference(fnhe->fnhe_rth_input); + else + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1567,7 +1602,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1882,7 +1917,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = find_exception(nh, fl4->daddr); if (fnhe) - prth = &fnhe->fnhe_rth; + prth = &fnhe->fnhe_rth_output; else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && -- cgit From 496e4ae7dc944faa1721bfda7e9d834d5611a874 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jun 2013 14:15:47 +0200 Subject: netfilter: nf_queue: add NFQA_SKB_CSUM_NOTVERIFIED info flag The common case is that TCP/IP checksums have already been verified, e.g. by hardware (rx checksum offload), or conntrack. Userspace can use this flag to determine when the checksum has not been validated yet. If the flag is set, this doesn't necessarily mean that the packet has an invalid checksum, e.g. if NIC doesn't support rx checksum. Userspace that sucessfully enabled NFQA_CFG_F_GSO queue feature flag can infer that IP/TCP checksum has already been validated if either the SKB_INFO attribute is not present or the NFQA_SKB_CSUM_NOTVERIFIED flag is unset. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 299a48ae5dc9..971ea145ab3e 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -280,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) skb_shinfo(to)->nr_frags = j; } -static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) +static int +nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, + bool csum_verify) { __u32 flags = 0; if (packet->ip_summed == CHECKSUM_PARTIAL) flags = NFQA_SKB_CSUMNOTREADY; + else if (csum_verify) + flags = NFQA_SKB_CSUM_NOTVERIFIED; + if (skb_is_gso(packet)) flags |= NFQA_SKB_GSO; @@ -310,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct net_device *outdev; struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); + bool csum_verify; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -327,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + if (entry->hook <= NF_INET_FORWARD || + (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) + csum_verify = !skb_csum_unnecessary(entskb); + else + csum_verify = false; + outdev = entry->outdev; switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { @@ -476,7 +488,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; - if (nfqnl_put_packet_info(skb, entskb)) + if (nfqnl_put_packet_info(skb, entskb, csum_verify)) goto nla_put_failure; if (data_len) { -- cgit From fb825a550a1af75323cee9d62d6fb818384c8c95 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 28 Jun 2013 16:07:40 -0700 Subject: openvswitch: Add Kconfig dependency on GRE-DEMUX. Openvswitch uses function from NET_IPGRE_DEMUX module. Add Kconfig dependency to fix following compilation errors: http://marc.info/?l=linux-netdev&m=137244035226634 CC: Jesse Gross Reported-by: Randy Dunlap Signed-off-by: Pravin Shelar Acked-by: Randy Dunlap Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/Kconfig | 16 ++++++++++++++-- net/openvswitch/vport-gre.c | 5 +++-- net/openvswitch/vport.c | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 9fbc04a31ed6..27ee56b688a3 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -19,8 +19,6 @@ config OPENVSWITCH which is able to accept configuration from a variety of sources and translate it into packet processing rules. - Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. - See http://openvswitch.org for more information and userspace utilities. @@ -28,3 +26,17 @@ config OPENVSWITCH called openvswitch. If unsure, say N. + +config OPENVSWITCH_GRE + bool "Open vSwitch GRE tunneling support" + depends on INET + depends on OPENVSWITCH + depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m) + default y + ---help--- + If you say Y here, then the Open vSwitch will be able create GRE + vport. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 943e5c431354..493e9775dcda 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -16,7 +16,7 @@ * 02110-1301, USA */ -#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) +#ifdef CONFIG_OPENVSWITCH_GRE #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include @@ -271,4 +271,5 @@ const struct vport_ops ovs_gre_vport_ops = { .get_name = gre_get_name, .send = gre_tnl_send, }; -#endif + +#endif /* OPENVSWITCH_GRE */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index ba81294219ac..d4c7fa04ce08 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -39,7 +39,7 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, -#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) +#ifdef CONFIG_OPENVSWITCH_GRE &ovs_gre_vport_ops, #endif }; -- cgit From 5c29fb12e8fb8a8105ea048cb160fd79a85a52bb Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 22:46:04 +0200 Subject: ipv6: only apply anti-spoofing checks to not-pointopoint tunnels Because of commit 218774dc341f219bfcf940304a081b121a0e8099 ("ipv6: add anti-spoofing checks for 6to4 and 6rd") the sit driver dropped packets for 2002::/16 destinations and sources even when configured to work as a tunnel with fixed endpoint. We may only apply the 6rd/6to4 anti-spoofing checks if the device is not in pointopoint mode. This was an oversight from me in the above commit, sorry. Thanks to Roman Mamedov for reporting this! Reported-by: Roman Mamedov Cc: David Miller Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 97a0bfe2c293..85ff37b1ce02 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -593,7 +593,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel->dev->stats.rx_errors++; goto out; } - } else { + } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { if (is_spoofed_6rd(tunnel, iph->saddr, &ipv6_hdr(skb)->saddr) || is_spoofed_6rd(tunnel, iph->daddr, -- cgit From 52bd4c0c1551daa2efa7bb9e01a2f4ea6d1311bb Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 28 Jun 2013 17:35:48 +0200 Subject: ipv6: fix ecmp lookup when oif is specified There is no reason to skip ECMP lookup when oif is specified, but this implies to check oif given by user when selecting another route. When the new route does not match oif requirement, we simply keep the initial one. Spotted-by: dingzhi Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7ca87b37c0ef..9ff0b78a9c15 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -83,6 +83,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -394,7 +395,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count, } static struct rt6_info *rt6_multipath_select(struct rt6_info *match, - struct flowi6 *fl6) + struct flowi6 *fl6, int oif, + int strict) { struct rt6_info *sibling, *next_sibling; int route_choosen; @@ -408,6 +410,8 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, &match->rt6i_siblings, rt6i_siblings) { route_choosen--; if (route_choosen == 0) { + if (rt6_score_route(sibling, oif, strict) < 0) + break; match = sibling; break; } @@ -743,7 +747,7 @@ restart: rt = fn->leaf; rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6); + rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); @@ -875,8 +879,8 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - if (rt->rt6i_nsiblings && oif == 0) - rt = rt6_multipath_select(rt, fl6); + if (rt->rt6i_nsiblings) + rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) -- cgit From c9ab4d85de222f3390c67aedc9c18a50e767531e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jun 2013 02:37:42 -0700 Subject: neighbour: fix a race in neigh_destroy() There is a race in neighbour code, because neigh_destroy() uses skb_queue_purge(&neigh->arp_queue) without holding neighbour lock, while other parts of the code assume neighbour rwlock is what protects arp_queue Convert all skb_queue_purge() calls to the __skb_queue_purge() variant Use __skb_queue_head_init() instead of skb_queue_head_init() to make clear we do not use arp_queue.lock And hold neigh->lock in neigh_destroy() to close the race. Reported-by: Joe Jin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2569ab2cafbe..b7de821f98df 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - skb_queue_purge(&n->arp_queue); + __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device if (!n) goto out_entries; - skb_queue_head_init(&n->arp_queue); + __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); seqlock_init(&n->ha_lock); n->updated = n->used = now; @@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) pr_warn("Impossible event\n"); - skb_queue_purge(&neigh->arp_queue); + write_lock_bh(&neigh->lock); + __skb_queue_purge(&neigh->arp_queue); + write_unlock_bh(&neigh->lock); neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) @@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh) neigh->ops->error_report(neigh, skb); write_lock(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } @@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_lock_bh(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } out: -- cgit From aec0a40a6f78843c0ce73f7398230ee5184f896d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jun 2013 07:40:57 -0700 Subject: netem: use rb tree to implement the time queue Following typical setup to implement a ~100 ms RTT and big amount of reorders has very poor performance because netem implements the time queue using a linked list. ----------------------------------------------------------- ETH=eth0 IFB=ifb0 modprobe ifb ip link set dev $IFB up tc qdisc add dev $ETH ingress 2>/dev/null tc filter add dev $ETH parent ffff: \ protocol ip u32 match u32 0 0 flowid 1:1 action mirred egress \ redirect dev $IFB ethtool -K $ETH gro off tso off gso off tc qdisc add dev $IFB root netem delay 50ms 10ms limit 100000 tc qd add dev $ETH root netem delay 50ms limit 100000 --------------------------------------------------------- Switch netem time queue to a rb tree, so this kind of setup can work at high speed. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 109 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3d2acc7a9c80..ed0082cf8eff 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -68,7 +69,8 @@ */ struct netem_sched_data { - /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ + /* internal t(ime)fifo qdisc uses t_root and sch->limit */ + struct rb_root t_root; /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; @@ -128,10 +130,35 @@ struct netem_sched_data { */ struct netem_skb_cb { psched_time_t time_to_send; + ktime_t tstamp_save; }; +/* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp + * to hold a rb_node structure. + * + * If struct sk_buff layout is changed, the following checks will complain. + */ +static struct rb_node *netem_rb_node(struct sk_buff *skb) +{ + BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0); + BUILD_BUG_ON(offsetof(struct sk_buff, prev) != + offsetof(struct sk_buff, next) + sizeof(skb->next)); + BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) != + offsetof(struct sk_buff, prev) + sizeof(skb->prev)); + BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) + + sizeof(skb->prev) + + sizeof(skb->tstamp)); + return (struct rb_node *)&skb->next; +} + +static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) +{ + return (struct sk_buff *)rb; +} + static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { + /* we assume we can use skb next/prev/tstamp as storage for rb_node */ qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } @@ -333,20 +360,23 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { - struct sk_buff_head *list = &sch->q; + struct netem_sched_data *q = qdisc_priv(sch); psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; - struct sk_buff *skb = skb_peek_tail(list); + struct rb_node **p = &q->t_root.rb_node, *parent = NULL; - /* Optimize for add at tail */ - if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) - return __skb_queue_tail(list, nskb); + while (*p) { + struct sk_buff *skb; - skb_queue_reverse_walk(list, skb) { + parent = *p; + skb = netem_rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) - break; + p = &parent->rb_right; + else + p = &parent->rb_left; } - - __skb_queue_after(list, skb, nskb); + rb_link_node(netem_rb_node(nskb), parent, p); + rb_insert_color(netem_rb_node(nskb), &q->t_root); + sch->q.qlen++; } /* @@ -436,23 +466,28 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = psched_get_time(); if (q->rate) { - struct sk_buff_head *list = &sch->q; + struct sk_buff *last; - if (!skb_queue_empty(list)) { + if (!skb_queue_empty(&sch->q)) + last = skb_peek_tail(&sch->q); + else + last = netem_rb_to_skb(rb_last(&q->t_root)); + if (last) { /* * Last packet in queue is reference point (now), * calculate this time bonus and subtract * from delay. */ - delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now; + delay -= netem_skb_cb(last)->time_to_send - now; delay = max_t(psched_tdiff_t, 0, delay); - now = netem_skb_cb(skb_peek_tail(list))->time_to_send; + now = netem_skb_cb(last)->time_to_send; } delay += packet_len_2_sched_time(skb->len, q); } cb->time_to_send = now + delay; + cb->tstamp_save = skb->tstamp; ++q->counter; tfifo_enqueue(skb, sch); } else { @@ -476,6 +511,21 @@ static unsigned int netem_drop(struct Qdisc *sch) unsigned int len; len = qdisc_queue_drop(sch); + + if (!len) { + struct rb_node *p = rb_first(&q->t_root); + + if (p) { + struct sk_buff *skb = netem_rb_to_skb(p); + + rb_erase(p, &q->t_root); + sch->q.qlen--; + skb->next = NULL; + skb->prev = NULL; + len = qdisc_pkt_len(skb); + kfree_skb(skb); + } + } if (!len && q->qdisc && q->qdisc->ops->drop) len = q->qdisc->ops->drop(q->qdisc); if (len) @@ -488,19 +538,32 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; + struct rb_node *p; if (qdisc_is_throttled(sch)) return NULL; tfifo_dequeue: - skb = qdisc_peek_head(sch); + skb = __skb_dequeue(&sch->q); if (skb) { - const struct netem_skb_cb *cb = netem_skb_cb(skb); +deliver: + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); + return skb; + } + p = rb_first(&q->t_root); + if (p) { + skb = netem_rb_to_skb(p); /* if more time remaining? */ - if (cb->time_to_send <= psched_get_time()) { - __skb_unlink(skb, &sch->q); - sch->qstats.backlog -= qdisc_pkt_len(skb); + if (netem_skb_cb(skb)->time_to_send <= psched_get_time()) { + rb_erase(p, &q->t_root); + + sch->q.qlen--; + skb->next = NULL; + skb->prev = NULL; + skb->tstamp = netem_skb_cb(skb)->tstamp_save; #ifdef CONFIG_NET_CLS_ACT /* @@ -522,10 +585,7 @@ tfifo_dequeue: } goto tfifo_dequeue; } -deliver: - qdisc_unthrottled(sch); - qdisc_bstats_update(sch, skb); - return skb; + goto deliver; } if (q->qdisc) { @@ -533,7 +593,8 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); + qdisc_watchdog_schedule(&q->watchdog, + netem_skb_cb(skb)->time_to_send); } if (q->qdisc) { -- cgit From 4ccb93ce7439b63c31bc7597bfffd13567fa483d Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 28 Jun 2013 12:13:52 -0400 Subject: x25: Fix broken locking in ioctl error paths. Two of the x25 ioctl cases have error paths that break out of the function without unlocking the socket, leading to this warning: ================================================ [ BUG: lock held when returning to user space! ] 3.10.0-rc7+ #36 Not tainted ------------------------------------------------ trinity-child2/31407 is leaving the kernel with locks still held! 1 lock held by trinity-child2/31407: #0: (sk_lock-AF_X25){+.+.+.}, at: [] x25_ioctl+0x8a/0x740 [x25] Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/x25/af_x25.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 1d964e23853f..45a3ab5612c1 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1583,11 +1583,11 @@ out_cud_release: case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) - break; - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + if (sk->sk_state == TCP_CLOSE) { + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + rc = 0; + } release_sock(sk); - rc = 0; break; } @@ -1595,14 +1595,15 @@ out_cud_release: rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) - break; + goto out_sendcallaccpt_release; /* must call accptapprv above */ if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - break; + goto out_sendcallaccpt_release; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - release_sock(sk); rc = 0; +out_sendcallaccpt_release: + release_sock(sk); break; } -- cgit From bb33381d0c97cdee25f2cdab540b6e2bd16fa03b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Jun 2013 19:49:40 +0200 Subject: net: sctp: rework debugging framework to use pr_debug and friends We should get rid of all own SCTP debug printk macros and use the ones that the kernel offers anyway instead. This makes the code more readable and conform to the kernel code, and offers all the features of dynamic debbuging that pr_debug() et al has, such as only turning on/off portions of debug messages at runtime through debugfs. The runtime cost of having CONFIG_DYNAMIC_DEBUG enabled, but none of the debug statements printing, is negligible [1]. If kernel debugging is completly turned off, then these statements will also compile into "empty" functions. While we're at it, we also need to change the Kconfig option as it /now/ only refers to the ifdef'ed code portions in outqueue.c that enable further debugging/tracing of SCTP transaction fields. Also, since SCTP_ASSERT code was enabled with this Kconfig option and has now been removed, we transform those code parts into WARNs resp. where appropriate BUG_ONs so that those bugs can be more easily detected as probably not many people have SCTP debugging permanently turned on. To turn on all SCTP debugging, the following steps are needed: # mount -t debugfs none /sys/kernel/debug # echo -n 'module sctp +p' > /sys/kernel/debug/dynamic_debug/control This can be done more fine-grained on a per file, per line basis and others as described in [2]. [1] https://www.kernel.org/doc/ols/2009/ols2009-pages-39-46.pdf [2] Documentation/dynamic-debug-howto.txt Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/Kconfig | 12 ++-- net/sctp/associola.c | 67 ++++++++++--------- net/sctp/chunk.c | 5 +- net/sctp/debug.c | 4 -- net/sctp/endpointola.c | 5 +- net/sctp/input.c | 6 +- net/sctp/inqueue.c | 9 ++- net/sctp/ipv6.c | 21 +++--- net/sctp/output.c | 40 +++++------ net/sctp/outqueue.c | 160 ++++++++++++++++++++------------------------ net/sctp/protocol.c | 48 +++++++------- net/sctp/sm_make_chunk.c | 25 +++---- net/sctp/sm_sideeffect.c | 96 +++++++++++---------------- net/sctp/sm_statefuns.c | 77 +++++++++++++--------- net/sctp/socket.c | 168 ++++++++++++++++++++++------------------------- net/sctp/transport.c | 49 +++++++------- 16 files changed, 383 insertions(+), 409 deletions(-) (limited to 'net') diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index cf4852814e0c..d80bf1aebaed 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -30,7 +30,8 @@ menuconfig IP_SCTP homing at either or both ends of an association." To compile this protocol support as a module, choose M here: the - module will be called sctp. + module will be called sctp. Debug messages are handeled by the + kernel's dynamic debugging framework. If in doubt, say N. @@ -48,13 +49,14 @@ config NET_SCTPPROBE To compile this code as a module, choose M here: the module will be called sctp_probe. -config SCTP_DBG_MSG - bool "SCTP: Debug messages" +config SCTP_DBG_TSNS + bool "SCTP: Debug transactions" help - If you say Y, this will enable verbose debugging messages. + If you say Y, this will enable transaction debugging, visible + from the kernel's dynamic debugging framework. If unsure, say N. However, if you are running into problems, use - this option to gather detailed trace information + this option to gather outqueue trace information. config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9a383a8774e8..bce5b79662a6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -357,7 +357,8 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, goto fail_init; SCTP_DBG_OBJCNT_INC(assoc); - SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); + + pr_debug("Created asoc %p\n", asoc); return asoc; @@ -455,7 +456,10 @@ void sctp_association_free(struct sctp_association *asoc) /* Cleanup and free up an association. */ static void sctp_association_destroy(struct sctp_association *asoc) { - SCTP_ASSERT(asoc->base.dead, "Assoc is not dead", return); + if (unlikely(!asoc->base.dead)) { + WARN(1, "Attempt to destroy undead association %p!\n", asoc); + return; + } sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); @@ -536,11 +540,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, struct list_head *pos; struct sctp_transport *transport; - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_rm_peer:association %p addr: ", - " port: %d\n", - asoc, - (&peer->ipaddr), - ntohs(peer->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p addr:%pISpc\n", + __func__, asoc, &peer->ipaddr.sa); /* If we are to remove the current retran_path, update it * to the next peer before removing this peer from the list. @@ -636,12 +637,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* AF_INET and AF_INET6 share common port field. */ port = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", - " port: %d state:%d\n", - asoc, - addr, - port, - peer_state); + pr_debug("%s: association:%p addr:%pISpc state:%d\n", __func__, + asoc, &addr->sa, peer_state); /* Set the port if it has not been set yet. */ if (0 == asoc->peer.port) @@ -708,8 +705,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, else asoc->pathmtu = peer->pathmtu; - SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " - "%d\n", asoc, asoc->pathmtu); + pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc, + asoc->pathmtu); + peer->pmtu_pending = 0; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); @@ -1349,12 +1347,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) else t = asoc->peer.retran_path; - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" - " %p addr: ", - " port: %d\n", - asoc, - (&t->ipaddr), - ntohs(t->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, + &t->ipaddr.sa); } /* Choose the transport for sending retransmit packet. */ @@ -1401,8 +1395,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) asoc->frag_point = sctp_frag_point(asoc, pmtu); } - SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", - __func__, asoc, asoc->pathmtu, asoc->frag_point); + pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc, + asoc->pathmtu, asoc->frag_point); } /* Should we send a SACK to update our peer? */ @@ -1454,9 +1448,9 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) asoc->rwnd_press -= change; } - SCTP_DEBUG_PRINTK("%s: asoc %p rwnd increased by %d to (%u, %u) " - "- %u\n", __func__, asoc, len, asoc->rwnd, - asoc->rwnd_over, asoc->a_rwnd); + pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->a_rwnd); /* Send a window update SACK if the rwnd has increased by at least the * minimum of the association's PMTU and half of the receive buffer. @@ -1465,9 +1459,11 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) */ if (sctp_peer_needs_update(asoc)) { asoc->a_rwnd = asoc->rwnd; - SCTP_DEBUG_PRINTK("%s: Sending window update SACK- asoc: %p " - "rwnd: %u a_rwnd: %u\n", __func__, - asoc, asoc->rwnd, asoc->a_rwnd); + + pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u " + "a_rwnd:%u\n", __func__, asoc, asoc->rwnd, + asoc->a_rwnd); + sack = sctp_make_sack(asoc); if (!sack) return; @@ -1489,8 +1485,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) int rx_count; int over = 0; - SCTP_ASSERT(asoc->rwnd, "rwnd zero", return); - SCTP_ASSERT(!asoc->rwnd_over, "rwnd_over not zero", return); + if (unlikely(!asoc->rwnd || asoc->rwnd_over)) + pr_debug("%s: association:%p has asoc->rwnd:%u, " + "asoc->rwnd_over:%u!\n", __func__, asoc, + asoc->rwnd, asoc->rwnd_over); if (asoc->ep->rcvbuf_policy) rx_count = atomic_read(&asoc->rmem_alloc); @@ -1515,9 +1513,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) asoc->rwnd_over = len - asoc->rwnd; asoc->rwnd = 0; } - SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u, %u)\n", - __func__, asoc, len, asoc->rwnd, - asoc->rwnd_over, asoc->rwnd_press); + + pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->rwnd_press); } /* Build the bind address list for the association based on info from the diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7135fc0c087a..5780565f5b7d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -193,8 +193,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); msg->can_abandon = 1; - SCTP_DEBUG_PRINTK("%s: msg:%p expires_at: %ld jiffies:%ld\n", - __func__, msg, msg->expires_at, jiffies); + + pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__, + msg, msg->expires_at, jiffies); } /* This is the biggest possible DATA chunk that can fit into diff --git a/net/sctp/debug.c b/net/sctp/debug.c index ec997cfe0a7e..f4998780d6df 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -47,10 +47,6 @@ #include -#if SCTP_DEBUG -int sctp_debug_flag = 1; /* Initially enable DEBUG */ -#endif /* SCTP_DEBUG */ - /* These are printable forms of Chunk ID's from section 3.1. */ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { "DATA", diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index b26999d508ba..9e3d257de0e0 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -249,7 +249,10 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; - SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); + if (unlikely(!ep->base.dead)) { + WARN(1, "Attempt to destroy undead endpoint %p!\n", ep); + return; + } /* Free the digest buffer */ kfree(ep->digest); diff --git a/net/sctp/input.c b/net/sctp/input.c index 4cfc74699a3f..3fa4d858c35a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -454,8 +454,6 @@ void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t) { - SCTP_DEBUG_PRINTK("%s\n", __func__); - if (sock_owned_by_user(sk)) { if (timer_pending(&t->proto_unreach_timer)) return; @@ -464,10 +462,12 @@ void sctp_icmp_proto_unreachable(struct sock *sk, jiffies + (HZ/20))) sctp_association_hold(asoc); } - } else { struct net *net = sock_net(sk); + pr_debug("%s: unrecognized next header type " + "encountered!\n", __func__); + if (del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 3221d073448c..cb25f040fed0 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -219,10 +219,10 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->end_of_packet = 1; } - SCTP_DEBUG_PRINTK("+++sctp_inq_pop+++ chunk %p[%s]," - " length %d, skb->len %d\n",chunk, - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), - ntohs(chunk->chunk_hdr->length), chunk->skb->len); + pr_debug("+++sctp_inq_pop+++ chunk:%p[%s], length:%d, skb->len:%d\n", + chunk, sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), + ntohs(chunk->chunk_hdr->length), chunk->skb->len); + return chunk; } @@ -238,4 +238,3 @@ void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback) { INIT_WORK(&q->immediate, callback); } - diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index adeaa0e64f52..09ffcc912d23 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -239,9 +239,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) fl6.daddr = *rt0->addr; } - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", - __func__, skb, skb->len, - &fl6.saddr, &fl6.daddr); + pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, + skb->len, &fl6.saddr, &fl6.daddr); SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); @@ -276,7 +275,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) fl6->flowi6_oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6->daddr); + pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr); if (asoc) fl6->fl6_sport = htons(asoc->base.bind_addr.port); @@ -284,7 +283,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (saddr) { fl6->saddr = saddr->v6.sin6_addr; fl6->fl6_sport = saddr->v6.sin6_port; - SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr); + + pr_debug("src=%pI6 - ", &fl6->saddr); } dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); @@ -348,13 +348,16 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; + rt = (struct rt6_info *)dst; t->dst = dst; - SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", - &rt->rt6i_dst.addr, &fl6->saddr); + + pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, + &fl6->saddr); } else { t->dst = NULL; - SCTP_DEBUG_PRINTK("NO ROUTE\n"); + + pr_debug("no route\n"); } } @@ -377,7 +380,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, struct flowi6 *fl6 = &fl->u.ip6; union sctp_addr *saddr = &t->saddr; - SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); + pr_debug("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); if (t->dst) { saddr->v6.sin6_family = AF_INET6; diff --git a/net/sctp/output.c b/net/sctp/output.c index bbef4a7a9b56..a46d1eb41762 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -93,8 +93,7 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, { struct sctp_chunk *chunk = NULL; - SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, - packet, vtag); + pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); packet->vtag = vtag; @@ -119,8 +118,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, struct sctp_association *asoc = transport->asoc; size_t overhead; - SCTP_DEBUG_PRINTK("%s: packet:%p transport:%p\n", __func__, - packet, transport); + pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport); packet->transport = transport; packet->source_port = sport; @@ -145,7 +143,7 @@ void sctp_packet_free(struct sctp_packet *packet) { struct sctp_chunk *chunk, *tmp; - SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); + pr_debug("%s: packet:%p\n", __func__, packet); list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); @@ -167,8 +165,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, sctp_xmit_t retval; int error = 0; - SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, - packet, chunk); + pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: @@ -334,8 +331,7 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, { sctp_xmit_t retval = SCTP_XMIT_OK; - SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, packet, - chunk); + pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); /* Data chunks are special. Before seeing what else we can * bundle into this packet, check to see if we are allowed to @@ -402,7 +398,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned char *auth = NULL; /* pointer to auth in skb data */ __u32 cksum_buf_len = sizeof(struct sctphdr); - SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); + pr_debug("%s: packet:%p\n", __func__, packet); /* Do NOT generate a chunkless packet. */ if (list_empty(&packet->chunk_list)) @@ -472,7 +468,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) * * [This whole comment explains WORD_ROUND() below.] */ - SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n"); + + pr_debug("***sctp_transmit_packet***\n"); + list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { @@ -505,16 +503,13 @@ int sctp_packet_transmit(struct sctp_packet *packet) memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); - SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n", - "*** Chunk", chunk, - sctp_cname(SCTP_ST_CHUNK( - chunk->chunk_hdr->type)), - chunk->has_tsn ? "TSN" : "No TSN", - chunk->has_tsn ? - ntohl(chunk->subh.data_hdr->tsn) : 0, - "length", ntohs(chunk->chunk_hdr->length), - "chunk->skb->len", chunk->skb->len, - "rtt_in_progress", chunk->rtt_in_progress); + pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " + "rtt_in_progress:%d\n", chunk, + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), + chunk->has_tsn ? "TSN" : "No TSN", + chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, + ntohs(chunk->chunk_hdr->length), chunk->skb->len, + chunk->rtt_in_progress); /* * If this is a control chunk, this is our last @@ -606,8 +601,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } } - SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", - nskb->len); + pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); nskb->local_df = packet->ipfragok; (*tp->af_specific->sctp_xmit)(nskb, tp); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index be35e2dbcc9a..511b3b35d609 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,10 +299,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) struct net *net = sock_net(q->asoc->base.sk); int error = 0; - SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", - q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) - : "Illegal Chunk"); + pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, + chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); /* If it is data, queue it up, otherwise, send it * immediately. @@ -328,10 +328,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) break; default: - SCTP_DEBUG_PRINTK("outqueueing (%p, %p[%s])\n", - q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) - : "Illegal Chunk"); + pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) @@ -460,14 +460,10 @@ void sctp_retransmit_mark(struct sctp_outq *q, } } - SCTP_DEBUG_PRINTK("%s: transport: %p, reason: %d, " - "cwnd: %d, ssthresh: %d, flight_size: %d, " - "pba: %d\n", __func__, - transport, reason, - transport->cwnd, transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - + pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, transport, reason, + transport->cwnd, transport->ssthresh, transport->flight_size, + transport->partial_bytes_acked); } /* Mark all the eligible packets on a transport for retransmission and force @@ -1014,19 +1010,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_transport_burst_limited(transport); } - SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", - q, chunk, - chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK( - chunk->chunk_hdr->type)) - : "Illegal Chunk"); - - SCTP_DEBUG_PRINTK("TX TSN 0x%x skb->head " - "%p skb->users %d.\n", - ntohl(chunk->subh.data_hdr->tsn), - chunk->skb ?chunk->skb->head : NULL, - chunk->skb ? - atomic_read(&chunk->skb->users) : -1); + pr_debug("%s: outq:%p, chunk:%p[%s], tx-tsn:0x%x skb->head:%p " + "skb->users:%d\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk", ntohl(chunk->subh.data_hdr->tsn), + chunk->skb ? chunk->skb->head : NULL, chunk->skb ? + atomic_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ status = sctp_packet_transmit_chunk(packet, chunk, 0); @@ -1038,10 +1028,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) /* We could not append this chunk, so put * the chunk back on the output queue. */ - SCTP_DEBUG_PRINTK("sctp_outq_flush: could " - "not transmit TSN: 0x%x, status: %d\n", - ntohl(chunk->subh.data_hdr->tsn), - status); + pr_debug("%s: could not transmit tsn:0x%x, status:%d\n", + __func__, ntohl(chunk->subh.data_hdr->tsn), + status); + sctp_outq_head_data(q, chunk); goto sctp_flush_out; break; @@ -1284,11 +1274,10 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_generate_fwdtsn(q, sack_ctsn); - SCTP_DEBUG_PRINTK("%s: sack Cumulative TSN Ack is 0x%x.\n", - __func__, sack_ctsn); - SCTP_DEBUG_PRINTK("%s: Cumulative TSN Ack of association, " - "%p is 0x%x. Adv peer ack point: 0x%x\n", - __func__, asoc, ctsn, asoc->adv_peer_ack_point); + pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn); + pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, " + "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, + asoc->adv_peer_ack_point); /* See if all chunks are acked. * Make sure the empty queue handler will get run later. @@ -1304,7 +1293,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) goto finish; } - SCTP_DEBUG_PRINTK("sack queue is empty.\n"); + pr_debug("%s: sack queue is empty\n", __func__); finish: return q->empty; } @@ -1348,7 +1337,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* These state variables are for coherent debug output. --xguo */ -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS __u32 dbg_ack_tsn = 0; /* An ACKed TSN range starts here... */ __u32 dbg_last_ack_tsn = 0; /* ...and finishes here. */ __u32 dbg_kept_tsn = 0; /* An un-ACKed range starts here... */ @@ -1359,7 +1348,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * -1: We need to initialize. */ int dbg_prt_state = -1; -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ sack_ctsn = ntohl(sack->cum_tsn_ack); @@ -1483,7 +1472,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, list_add_tail(lchunk, &tlist); } -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS switch (dbg_prt_state) { case 0: /* last TSN was ACKed */ if (dbg_last_ack_tsn + 1 == tsn) { @@ -1497,42 +1486,39 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Display the end of the * current range. */ - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_ack_tsn); + pr_cont("-%08x", dbg_last_ack_tsn); } /* Start a new range. */ - SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); + pr_cont(",%08x", tsn); dbg_ack_tsn = tsn; break; case 1: /* The last TSN was NOT ACKed. */ if (dbg_last_kept_tsn != dbg_kept_tsn) { /* Display the end of current range. */ - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_kept_tsn); + pr_cont("-%08x", dbg_last_kept_tsn); } - SCTP_DEBUG_PRINTK_CONT("\n"); - + pr_cont("\n"); /* FALL THROUGH... */ default: /* This is the first-ever TSN we examined. */ /* Start a new range of ACK-ed TSNs. */ - SCTP_DEBUG_PRINTK("ACKed: %08x", tsn); + pr_debug("ACKed: %08x", tsn); + dbg_prt_state = 0; dbg_ack_tsn = tsn; } dbg_last_ack_tsn = tsn; -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ } else { if (tchunk->tsn_gap_acked) { - SCTP_DEBUG_PRINTK("%s: Receiver reneged on " - "data TSN: 0x%x\n", - __func__, - tsn); + pr_debug("%s: receiver reneged on data TSN:0x%x\n", + __func__, tsn); + tchunk->tsn_gap_acked = 0; if (tchunk->transport) @@ -1552,7 +1538,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, list_add_tail(lchunk, &tlist); -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS /* See the above comments on ACK-ed TSNs. */ switch (dbg_prt_state) { case 1: @@ -1560,50 +1546,47 @@ static void sctp_check_transmitted(struct sctp_outq *q, break; if (dbg_last_kept_tsn != dbg_kept_tsn) - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_kept_tsn); + pr_cont("-%08x", dbg_last_kept_tsn); - SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); + pr_cont(",%08x", tsn); dbg_kept_tsn = tsn; break; case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_ack_tsn); - SCTP_DEBUG_PRINTK_CONT("\n"); + pr_cont("-%08x", dbg_last_ack_tsn); + pr_cont("\n"); /* FALL THROUGH... */ default: - SCTP_DEBUG_PRINTK("KEPT: %08x",tsn); + pr_debug("KEPT: %08x", tsn); + dbg_prt_state = 1; dbg_kept_tsn = tsn; } dbg_last_kept_tsn = tsn; -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ } } -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS /* Finish off the last range, displaying its ending TSN. */ switch (dbg_prt_state) { case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) { - SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn); - } else { - SCTP_DEBUG_PRINTK_CONT("\n"); - } - break; - + if (dbg_last_ack_tsn != dbg_ack_tsn) + pr_cont("-%08x\n", dbg_last_ack_tsn); + else + pr_cont("\n"); + break; case 1: - if (dbg_last_kept_tsn != dbg_kept_tsn) { - SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn); - } else { - SCTP_DEBUG_PRINTK_CONT("\n"); - } + if (dbg_last_kept_tsn != dbg_kept_tsn) + pr_cont("-%08x\n", dbg_last_kept_tsn); + else + pr_cont("\n"); + break; } -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ if (transport) { if (bytes_acked) { struct sctp_association *asoc = transport->asoc; @@ -1676,9 +1659,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, !list_empty(&tlist) && (sack_ctsn+2 == q->asoc->next_tsn) && q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) { - SCTP_DEBUG_PRINTK("%s: SACK received for zero " - "window probe: %u\n", - __func__, sack_ctsn); + pr_debug("%s: sack received for zero window " + "probe:%u\n", __func__, sack_ctsn); + q->asoc->overall_error_count = 0; transport->error_count = 0; } @@ -1739,10 +1722,8 @@ static void sctp_mark_missing(struct sctp_outq *q, count_of_newacks, tsn)) { chunk->tsn_missing_report++; - SCTP_DEBUG_PRINTK( - "%s: TSN 0x%x missing counter: %d\n", - __func__, tsn, - chunk->tsn_missing_report); + pr_debug("%s: tsn:0x%x missing counter:%d\n", + __func__, tsn, chunk->tsn_missing_report); } } /* @@ -1762,11 +1743,10 @@ static void sctp_mark_missing(struct sctp_outq *q, if (do_fast_retransmit) sctp_retransmit(q, transport, SCTP_RTXR_FAST_RTX); - SCTP_DEBUG_PRINTK("%s: transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, pba: %d\n", - __func__, transport, transport->cwnd, - transport->ssthresh, transport->flight_size, - transport->partial_bytes_acked); + pr_debug("%s: transport:%p, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, transport, + transport->cwnd, transport->ssthresh, + transport->flight_size, transport->partial_bytes_acked); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1de49c802d83..4a17494d736c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -451,8 +451,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, fl4->fl4_sport = saddr->v4.sin_port; } - SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", - __func__, &fl4->daddr, &fl4->saddr); + pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr, + &fl4->saddr); rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) @@ -513,10 +513,10 @@ out_unlock: out: t->dst = dst; if (dst) - SCTP_DEBUG_PRINTK("rt_dst:%pI4, rt_src:%pI4\n", - &fl4->daddr, &fl4->saddr); + pr_debug("rt_dst:%pI4, rt_src:%pI4\n", + &fl4->daddr, &fl4->saddr); else - SCTP_DEBUG_PRINTK("NO ROUTE\n"); + pr_debug("no route\n"); } /* For v4, the source address is cached in the route entry(dst). So no need @@ -604,9 +604,9 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) spin_lock_bh(&net->sctp.addr_wq_lock); list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { - SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", - " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, - addrw); + pr_debug("%s: the first ent in wq:%p is addr:%pISc for cmd:%d at " + "entry:%p\n", __func__, &net->sctp.addr_waitq, &addrw->a.sa, + addrw->state, addrw); #if IS_ENABLED(CONFIG_IPV6) /* Now we send an ASCONF for each association */ @@ -623,8 +623,10 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) addrw->state == SCTP_ADDR_NEW) { unsigned long timeo_val; - SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n", - SCTP_ADDRESS_TICK_DELAY); + pr_debug("%s: this is on DAD, trying %d sec " + "later\n", __func__, + SCTP_ADDRESS_TICK_DELAY); + timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); mod_timer(&net->sctp.addr_wq_timer, timeo_val); @@ -641,7 +643,7 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) continue; sctp_bh_lock_sock(sk); if (sctp_asconf_mgmt(sp, addrw) < 0) - SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n"); + pr_debug("%s: sctp_asconf_mgmt failed\n", __func__); sctp_bh_unlock_sock(sk); } #if IS_ENABLED(CONFIG_IPV6) @@ -707,9 +709,10 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) { - SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", - " in wq %p\n", addrw->state, &addrw->a, - &net->sctp.addr_waitq); + pr_debug("%s: offsets existing entry for %d, addr:%pISc " + "in wq:%p\n", __func__, addrw->state, &addrw->a.sa, + &net->sctp.addr_waitq); + list_del(&addrw->list); kfree(addrw); } @@ -725,8 +728,9 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm } addrw->state = cmd; list_add_tail(&addrw->list, &net->sctp.addr_waitq); - SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", - " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); + + pr_debug("%s: add new entry for cmd:%d, addr:%pISc in wq:%p\n", + __func__, addrw->state, &addrw->a.sa, &net->sctp.addr_waitq); if (!timer_pending(&net->sctp.addr_wq_timer)) { timeo_val = jiffies; @@ -952,15 +956,14 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, { struct inet_sock *inet = inet_sk(skb->sk); - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", - __func__, skb, skb->len, - &transport->fl.u.ip4.saddr, - &transport->fl.u.ip4.daddr); + pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, + skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr); inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); + return ip_queue_xmit(skb, &transport->fl); } @@ -1321,9 +1324,8 @@ static __init int sctp_init(void) int max_share; int order; - /* SCTP_DEBUG sanity check. */ - if (!sctp_sanity_check()) - goto out; + BUILD_BUG_ON(sizeof(struct sctp_ulpevent) > + sizeof(((struct sk_buff *) 0)->cb)); /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index dd71f1f9ba10..362ae6e2fd93 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -741,7 +741,8 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); - SCTP_DEBUG_PRINTK("sackCTSNAck sent: 0x%x.\n", ctsn); + + pr_debug("%s: sackCTSNAck sent:0x%x\n", __func__, ctsn); /* How much room is needed in the chunk? */ num_gabs = sctp_tsnmap_num_gabs(map, gabs); @@ -1287,10 +1288,8 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, if (!retval) goto nodata; - - if (!sk) { - SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb); - } + if (!sk) + pr_debug("%s: chunkifying skb:%p w/o an sk\n", __func__, skb); INIT_LIST_HEAD(&retval->list); retval->skb = skb; @@ -2191,8 +2190,9 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; fallthrough: default: - SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", - ntohs(param.p->type), cid); + pr_debug("%s: unrecognized param:%d for chunk:%d\n", + __func__, ntohs(param.p->type), cid); + retval = sctp_process_unk_param(asoc, param, chunk, err_chunk); break; } @@ -2516,7 +2516,7 @@ do_addr_param: break; case SCTP_PARAM_HOST_NAME_ADDRESS: - SCTP_DEBUG_PRINTK("unimplemented SCTP_HOST_NAME_ADDRESS\n"); + pr_debug("%s: unimplemented SCTP_HOST_NAME_ADDRESS\n", __func__); break; case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES: @@ -2662,8 +2662,8 @@ fall_through: * called prior to this routine. Simply log the error * here. */ - SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n", - ntohs(param.p->type), asoc); + pr_debug("%s: ignoring param:%d for association:%p.\n", + __func__, ntohs(param.p->type), asoc); break; } @@ -2805,7 +2805,10 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, totallen += paramlen; totallen += addr_param_len; del_pickup = 1; - SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen); + + pr_debug("%s: picked same-scope del_pending addr, " + "totallen for all addresses is %d\n", + __func__, totallen); } } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index ff91f47b0239..cf6f84518222 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -257,7 +257,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->T3_rtx_timer, jiffies + (HZ/20))) @@ -297,9 +297,8 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n", - __func__, - timeout_type); + pr_debug("%s: sock is busy: timer %d\n", __func__, + timeout_type); /* Try again later. */ if (!mod_timer(&asoc->timers[timeout_type], jiffies + (HZ/20))) @@ -377,7 +376,7 @@ void sctp_generate_heartbeat_event(unsigned long data) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->hb_timer, jiffies + (HZ/20))) @@ -415,7 +414,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, @@ -521,11 +520,9 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, if (transport->state != SCTP_INACTIVE && (transport->error_count > transport->pathmaxrxt)) { - SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", - " transport IP: port:%d failed.\n", - asoc, - (&transport->ipaddr), - ntohs(transport->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p transport addr:%pISpc failed\n", + __func__, asoc, &transport->ipaddr.sa); + sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_DOWN, SCTP_FAILED_THRESHOLD); @@ -804,8 +801,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, asoc->state = state; - SCTP_DEBUG_PRINTK("sctp_cmd_new_state: asoc %p[%s]\n", - asoc, sctp_state_tbl[state]); + pr_debug("%s: asoc:%p[%s]\n", __func__, asoc, sctp_state_tbl[state]); if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has @@ -1017,15 +1013,11 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, asoc->timeouts[timer] = asoc->max_init_timeo; } asoc->init_cycle++; - SCTP_DEBUG_PRINTK( - "T1 %s Timeout adjustment" - " init_err_counter: %d" - " cycle: %d" - " timeout: %ld\n", - name, - asoc->init_err_counter, - asoc->init_cycle, - asoc->timeouts[timer]); + + pr_debug("%s: T1[%s] timeout adjustment init_err_counter:%d" + " cycle:%d timeout:%ld\n", __func__, name, + asoc->init_err_counter, asoc->init_cycle, + asoc->timeouts[timer]); } } @@ -1080,23 +1072,19 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. */ -#define DEBUG_PRE \ - SCTP_DEBUG_PRINTK("sctp_do_sm prefn: " \ - "ep %p, %s, %s, asoc %p[%s], %s\n", \ - ep, sctp_evttype_tbl[event_type], \ - (*debug_fn)(subtype), asoc, \ - sctp_state_tbl[state], state_fn->name) - -#define DEBUG_POST \ - SCTP_DEBUG_PRINTK("sctp_do_sm postfn: " \ - "asoc %p, status: %s\n", \ - asoc, sctp_status_tbl[status]) - -#define DEBUG_POST_SFX \ - SCTP_DEBUG_PRINTK("sctp_do_sm post sfx: error %d, asoc %p[%s]\n", \ - error, asoc, \ - sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ - sctp_assoc2id(asoc)))?asoc->state:SCTP_STATE_CLOSED]) +#define debug_pre_sfn() \ + pr_debug("%s[pre-fn]: ep:%p, %s, %s, asoc:%p[%s], %s\n", __func__, \ + ep, sctp_evttype_tbl[event_type], (*debug_fn)(subtype), \ + asoc, sctp_state_tbl[state], state_fn->name) + +#define debug_post_sfn() \ + pr_debug("%s[post-fn]: asoc:%p, status:%s\n", __func__, asoc, \ + sctp_status_tbl[status]) + +#define debug_post_sfx() \ + pr_debug("%s[post-sfx]: error:%d, asoc:%p[%s]\n", __func__, error, \ + asoc, sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ + sctp_assoc2id(asoc))) ? asoc->state : SCTP_STATE_CLOSED]) /* * This is the master state machine processing function. @@ -1116,7 +1104,6 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_disposition_t status; int error = 0; typedef const char *(printfn_t)(sctp_subtype_t); - static printfn_t *table[] = { NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname, }; @@ -1129,21 +1116,18 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_init_cmd_seq(&commands); - DEBUG_PRE; + debug_pre_sfn(); status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); - DEBUG_POST; + debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, ep, asoc, event_arg, status, &commands, gfp); - DEBUG_POST_SFX; + debug_post_sfx(); return error; } -#undef DEBUG_PRE -#undef DEBUG_POST - /***************************************************************** * This the master state function side effect processing function. *****************************************************************/ @@ -1172,9 +1156,9 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, switch (status) { case SCTP_DISPOSITION_DISCARD: - SCTP_DEBUG_PRINTK("Ignored sctp protocol event - state %d, " - "event_type %d, event_id %d\n", - state, event_type, subtype.chunk); + pr_debug("%s: ignored sctp protocol event - state:%d, " + "event_type:%d, event_id:%d\n", __func__, state, + event_type, subtype.chunk); break; case SCTP_DISPOSITION_NOMEM: @@ -1425,18 +1409,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_CHUNK_ULP: /* Send a chunk to the sockets layer. */ - SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "chunk_up:", cmd->obj.chunk, - "ulpq:", &asoc->ulpq); + pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n", + __func__, cmd->obj.chunk, &asoc->ulpq); + sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_EVENT_ULP: /* Send a notification to the sockets layer. */ - SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "event_up:",cmd->obj.ulpevent, - "ulpq:",&asoc->ulpq); + pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n", + __func__, cmd->obj.ulpevent, &asoc->ulpq); + sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); break; @@ -1601,7 +1585,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_REPORT_BAD_TAG: - SCTP_DEBUG_PRINTK("vtag mismatch!\n"); + pr_debug("%s: vtag mismatch!\n", __func__); break; case SCTP_CMD_STRIKE: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b3d186856513..f6b7109195a6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1179,9 +1179,9 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, /* Check if the timestamp looks valid. */ if (time_after(hbinfo->sent_at, jiffies) || time_after(jiffies, hbinfo->sent_at + max_interval)) { - SCTP_DEBUG_PRINTK("%s: HEARTBEAT ACK with invalid timestamp " - "received for transport: %p\n", - __func__, link); + pr_debug("%s: HEARTBEAT ACK with invalid timestamp received " + "for transport:%p\n", __func__, link); + return SCTP_DISPOSITION_DISCARD; } @@ -2562,7 +2562,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, const struct sctp_association *asoc, struct sctp_transport *transport) { - SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); + pr_debug("%s: ABORT received (INIT)\n", __func__); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); @@ -2572,6 +2573,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, /* CMD_INIT_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, SCTP_PERR(error)); + return SCTP_DISPOSITION_ABORT; } @@ -2637,8 +2639,9 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -2721,8 +2724,9 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -3174,8 +3178,9 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, * Point indicates an out-of-order SACK. */ if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -3859,7 +3864,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, skb_pull(chunk->skb, len); tsn = ntohl(fwdtsn_hdr->new_cum_tsn); - SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* The TSN is too high--silently discard the chunk and count on it * getting retransmitted later. @@ -3927,7 +3932,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( skb_pull(chunk->skb, len); tsn = ntohl(fwdtsn_hdr->new_cum_tsn); - SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* The TSN is too high--silently discard the chunk and count on it * getting retransmitted later. @@ -4166,7 +4171,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, struct sctp_chunk *err_chunk; sctp_chunkhdr_t *hdr; - SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk); + pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk); if (!sctp_vtag_verify(unk_chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -4256,7 +4261,8 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); + pr_debug("%s: chunk:%d is discarded\n", __func__, type.chunk); + return SCTP_DISPOSITION_DISCARD; } @@ -5184,7 +5190,9 @@ sctp_disposition_t sctp_sf_ignore_primitive( void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("Primitive type %d is ignored.\n", type.primitive); + pr_debug("%s: primitive type:%d is ignored\n", __func__, + type.primitive); + return SCTP_DISPOSITION_DISCARD; } @@ -5379,7 +5387,9 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("The event other type %d is ignored\n", type.other); + pr_debug("%s: the event other type:%d is ignored\n", + __func__, type.other); + return SCTP_DISPOSITION_DISCARD; } @@ -5527,7 +5537,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; - SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + pr_debug("%s: timer T1 expired (INIT)\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { @@ -5546,9 +5557,10 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); } else { - SCTP_DEBUG_PRINTK("Giving up on INIT, attempts: %d" - " max_init_attempts: %d\n", - attempts, asoc->max_init_attempts); + pr_debug("%s: giving up on INIT, attempts:%d " + "max_init_attempts:%d\n", __func__, attempts, + asoc->max_init_attempts); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, @@ -5588,7 +5600,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, struct sctp_chunk *repl = NULL; int attempts = asoc->init_err_counter + 1; - SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); + pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { @@ -5636,7 +5649,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, { struct sctp_chunk *reply = NULL; - SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); + pr_debug("%s: timer T2 expired\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); ((struct sctp_association *)asoc)->shutdown_retries++; @@ -5777,7 +5791,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, { struct sctp_chunk *reply = NULL; - SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); + pr_debug("%s: timer T5 expired\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); @@ -5892,7 +5907,8 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("Timer %d ignored.\n", type.chunk); + pr_debug("%s: timer %d ignored\n", __func__, type.chunk); + return SCTP_DISPOSITION_CONSUME; } @@ -6102,7 +6118,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); tsn = ntohl(data_hdr->tsn); - SCTP_DEBUG_PRINTK("eat_data: TSN 0x%x.\n", tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* ASSERT: Now skb->data is really the user data. */ @@ -6179,12 +6195,12 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { - SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn); + pr_debug("%s: reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; } else { - SCTP_DEBUG_PRINTK("Discard tsn: %u len: %Zd, " - "rwnd: %d\n", tsn, datalen, - asoc->rwnd); + pr_debug("%s: discard tsn:%u len:%zu, rwnd:%d\n", + __func__, tsn, datalen, asoc->rwnd); + return SCTP_IERROR_IGNORE_TSN; } } @@ -6199,7 +6215,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, if (*sk->sk_prot_creator->memory_pressure) { if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { - SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn); + pr_debug("%s: under pressure, reneging for tsn:%u\n", + __func__, tsn); deliver = SCTP_CMD_RENEGE; } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 66fcdcfe1b74..d5c6a2870473 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -281,8 +281,8 @@ static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, addr: %p, addr_len: %d)\n", - sk, addr, addr_len); + pr_debug("%s: sk:%p, addr:%p, addr_len:%d\n", __func__, sk, + addr, addr_len); /* Disallow binding twice. */ if (!sctp_sk(sk)->ep->base.bind_addr.port) @@ -342,19 +342,15 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Common sockaddr verification. */ af = sctp_sockaddr_af(sp, addr, len); if (!af) { - SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d) EINVAL\n", - sk, addr, len); + pr_debug("%s: sk:%p, newaddr:%p, len:%d EINVAL\n", + __func__, sk, addr, len); return -EINVAL; } snum = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK_IPADDR("sctp_do_bind(sk: %p, new addr: ", - ", port: %d, new port: %d, len: %d)\n", - sk, - addr, - bp->port, snum, - len); + pr_debug("%s: sk:%p, new addr:%pISc, port:%d, new port:%d, len:%d\n", + __func__, sk, &addr->sa, bp->port, snum, len); /* PF specific bind() address verification. */ if (!sp->pf->bind_verify(sp, addr)) @@ -368,9 +364,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!snum) snum = bp->port; else if (snum != bp->port) { - SCTP_DEBUG_PRINTK("sctp_do_bind:" - " New port %d does not match existing port " - "%d.\n", snum, bp->port); + pr_debug("%s: new port %d doesn't match existing port " + "%d\n", __func__, snum, bp->port); return -EINVAL; } } @@ -468,8 +463,8 @@ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) struct sockaddr *sa_addr; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_bindx_add (sk: %p, addrs: %p, addrcnt: %d)\n", - sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, + addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { @@ -535,11 +530,10 @@ static int sctp_send_asconf_add_ip(struct sock *sk, sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", - __func__, sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { - if (!asoc->peer.asconf_capable) continue; @@ -646,8 +640,8 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) union sctp_addr *sa_addr; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_bindx_rem (sk: %p, addrs: %p, addrcnt: %d)\n", - sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { @@ -740,8 +734,8 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", - __func__, sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { @@ -808,9 +802,11 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sin6 = (struct sockaddr_in6 *)addrs; asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr; } - SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", - " at %p\n", asoc, asoc->asconf_addr_del_pending, - asoc->asconf_addr_del_pending); + + pr_debug("%s: keep the last address asoc:%p %pISc at %p\n", + __func__, asoc, &asoc->asconf_addr_del_pending->sa, + asoc->asconf_addr_del_pending); + asoc->src_out_of_asoc_ok = 1; stored = 1; goto skip_mkasconf; @@ -972,8 +968,8 @@ static int sctp_setsockopt_bindx(struct sock* sk, void *addr_buf; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_setsockopt_bindx: sk %p addrs %p" - " addrs_size %d opt %d\n", sk, addrs, addrs_size, op); + pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n", + __func__, sk, addrs, addrs_size, op); if (unlikely(addrs_size <= 0)) return -EINVAL; @@ -1231,10 +1227,9 @@ static int __sctp_connect(struct sock* sk, asoc = NULL; out_free: + pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n", + __func__, asoc, kaddrs, err); - SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" - " kaddrs: %p err: %d\n", - asoc, kaddrs, err); if (asoc) { /* sctp_primitive_ASSOCIATE may have added this association * To the hash table, try to unhash it, just in case, its a noop @@ -1316,8 +1311,8 @@ static int __sctp_setsockopt_connectx(struct sock* sk, int err = 0; struct sockaddr *kaddrs; - SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n", - __func__, sk, addrs, addrs_size); + pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", + __func__, sk, addrs, addrs_size); if (unlikely(addrs_size <= 0)) return -EINVAL; @@ -1468,7 +1463,7 @@ static void sctp_close(struct sock *sk, long timeout) struct list_head *pos, *temp; unsigned int data_was_unread; - SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout); + pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); sctp_lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; @@ -1594,14 +1589,12 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; - SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %zu)\n", - sk, msg, msg_len); - err = 0; sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep); + pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk, + msg, msg_len, ep); /* We cannot send a message over a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { @@ -1611,9 +1604,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Parse out the SCTP CMSGs. */ err = sctp_msghdr_parse(msg, &cmsgs); - if (err) { - SCTP_DEBUG_PRINTK("msghdr parse err = %x\n", err); + pr_debug("%s: msghdr parse err:%x\n", __func__, err); goto out_nounlock; } @@ -1645,8 +1637,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, associd = sinfo->sinfo_assoc_id; } - SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n", - msg_len, sinfo_flags); + pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__, + msg_len, sinfo_flags); /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */ if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) { @@ -1675,7 +1667,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, transport = NULL; - SCTP_DEBUG_PRINTK("About to look up association.\n"); + pr_debug("%s: about to look up association\n", __func__); sctp_lock_sock(sk); @@ -1705,7 +1697,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (asoc) { - SCTP_DEBUG_PRINTK("Just looked up association: %p.\n", asoc); + pr_debug("%s: just looked up association:%p\n", __func__, asoc); /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED * socket that has an association in CLOSED state. This can @@ -1718,8 +1710,9 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (sinfo_flags & SCTP_EOF) { - SCTP_DEBUG_PRINTK("Shutting down association: %p\n", - asoc); + pr_debug("%s: shutting down association:%p\n", + __func__, asoc); + sctp_primitive_SHUTDOWN(net, asoc, NULL); err = 0; goto out_unlock; @@ -1732,7 +1725,9 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } - SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); + pr_debug("%s: aborting association:%p\n", + __func__, asoc); + sctp_primitive_ABORT(net, asoc, chunk); err = 0; goto out_unlock; @@ -1741,7 +1736,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Do we need to create the association? */ if (!asoc) { - SCTP_DEBUG_PRINTK("There is no association yet.\n"); + pr_debug("%s: there is no association yet\n", __func__); if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) { err = -EINVAL; @@ -1840,7 +1835,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } /* ASSERT: we have a valid association at this point. */ - SCTP_DEBUG_PRINTK("We have a valid association.\n"); + pr_debug("%s: we have a valid association\n", __func__); if (!sinfo) { /* If the user didn't specify SNDRCVINFO, make up one with @@ -1909,7 +1904,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) goto out_free; - SCTP_DEBUG_PRINTK("We associated primitively.\n"); + + pr_debug("%s: we associated primitively\n", __func__); } /* Break the message into multiple chunks of maximum size. */ @@ -1936,17 +1932,15 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, */ err = sctp_primitive_SEND(net, asoc, datamsg); /* Did the lower layer accept the chunk? */ - if (err) + if (err) { sctp_datamsg_free(datamsg); - else - sctp_datamsg_put(datamsg); + goto out_free; + } - SCTP_DEBUG_PRINTK("We sent primitively.\n"); + pr_debug("%s: we sent primitively\n", __func__); - if (err) - goto out_free; - else - err = msg_len; + sctp_datamsg_put(datamsg); + err = msg_len; /* If we are already past ASSOCIATE, the lower * layers are responsible for association cleanup. @@ -2041,10 +2035,9 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, int err = 0; int skb_len; - SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %zd, %s: %d, %s: " - "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg, - "len", len, "knoblauch", noblock, - "flags", flags, "addr_len", addr_len); + pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, " + "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags, + addr_len); sctp_lock_sock(sk); @@ -3086,7 +3079,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva err = sctp_send_asconf(asoc, chunk); - SCTP_DEBUG_PRINTK("We set peer primary addr primitively.\n"); + pr_debug("%s: we set peer primary addr primitively\n", __func__); return err; } @@ -3561,8 +3554,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, { int retval = 0; - SCTP_DEBUG_PRINTK("sctp_setsockopt(sk: %p... optname: %d)\n", - sk, optname); + pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft @@ -3724,8 +3716,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d\n", - __func__, sk, addr, addr_len); + pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, + addr, addr_len); /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); @@ -3855,7 +3847,7 @@ static int sctp_init_sock(struct sock *sk) struct net *net = sock_net(sk); struct sctp_sock *sp; - SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); + pr_debug("%s: sk:%p\n", __func__, sk); sp = sctp_sk(sk); @@ -3990,7 +3982,7 @@ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; - SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk); + pr_debug("%s: sk:%p\n", __func__, sk); /* Release our hold on the endpoint. */ sp = sctp_sk(sk); @@ -4123,9 +4115,9 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, goto out; } - SCTP_DEBUG_PRINTK("sctp_getsockopt_sctp_status(%d): %d %d %d\n", - len, status.sstat_state, status.sstat_rwnd, - status.sstat_assoc_id); + pr_debug("%s: len:%d, state:%d, rwnd:%d, assoc_id:%d\n", + __func__, len, status.sstat_state, status.sstat_rwnd, + status.sstat_assoc_id); if (copy_to_user(optval, &status, len)) { retval = -EFAULT; @@ -4333,8 +4325,8 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval return PTR_ERR(newfile); } - SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", - __func__, sk, newsock->sk, retval); + pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, + retval); /* Return the fd mapped to the new socket. */ if (put_user(len, optlen)) { @@ -4467,7 +4459,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { - SCTP_DEBUG_PRINTK("Failed no transport\n"); + pr_debug("%s: failed no transport\n", __func__); return -EINVAL; } } @@ -4478,7 +4470,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { - SCTP_DEBUG_PRINTK("Failed no association\n"); + pr_debug("%s: failed no association\n", __func__); return -EINVAL; } @@ -5698,8 +5690,7 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, if (put_user(len, optlen)) return -EFAULT; - SCTP_DEBUG_PRINTK("sctp_getsockopt_assoc_stat(%d): %d\n", - len, sas.sas_assoc_id); + pr_debug("%s: len:%d, assoc_id:%d\n", __func__, len, sas.sas_assoc_id); if (copy_to_user(optval, &sas, len)) return -EFAULT; @@ -5713,8 +5704,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, int retval = 0; int len; - SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n", - sk, optname); + pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft @@ -5894,7 +5884,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) snum = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum); + pr_debug("%s: begins, snum:%d\n", __func__, snum); + sctp_local_bh_disable(); if (snum == 0) { @@ -5960,7 +5951,8 @@ pp_found: int reuse = sk->sk_reuse; struct sock *sk2; - SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); + pr_debug("%s: found a possible match\n", __func__); + if (pp->fastreuse && sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING) goto success; @@ -5990,7 +5982,8 @@ pp_found: goto fail_unlock; } } - SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n"); + + pr_debug("%s: found a match\n", __func__); } pp_not_found: /* If there was a hash table miss, create a new port. */ @@ -6479,8 +6472,8 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, timeo = sock_rcvtimeo(sk, noblock); - SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n", - timeo, MAX_SCHEDULE_TIMEOUT); + pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo, + MAX_SCHEDULE_TIMEOUT); do { /* Again only user level code calls this function, @@ -6611,8 +6604,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, long current_timeo = *timeo_p; DEFINE_WAIT(wait); - SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n", - asoc, (long)(*timeo_p), msg_len); + pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, + *timeo_p, msg_len); /* Increment the association's refcnt. */ sctp_association_hold(asoc); @@ -6718,8 +6711,7 @@ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p) long current_timeo = *timeo_p; DEFINE_WAIT(wait); - SCTP_DEBUG_PRINTK("%s: asoc=%p, timeo=%ld\n", __func__, asoc, - (long)(*timeo_p)); + pr_debug("%s: asoc:%p, timeo:%ld\n", __func__, asoc, *timeo_p); /* Increment the association's refcnt. */ sctp_association_hold(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 5d3c71bbd197..bdbbc3fd7c14 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -176,7 +176,10 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head) */ static void sctp_transport_destroy(struct sctp_transport *transport) { - SCTP_ASSERT(transport->dead, "Transport is not dead", return); + if (unlikely(!transport->dead)) { + WARN(1, "Attempt to destroy undead transport %p!\n", transport); + return; + } call_rcu(&transport->rcu, sctp_transport_destroy_rcu); @@ -317,11 +320,9 @@ void sctp_transport_put(struct sctp_transport *transport) /* Update transport's RTO based on the newly calculated RTT. */ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) { - /* Check for valid transport. */ - SCTP_ASSERT(tp, "NULL transport", return); - - /* We should not be doing any RTO updates unless rto_pending is set. */ - SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); + if (unlikely(!tp->rto_pending)) + /* We should not be doing any RTO updates unless rto_pending is set. */ + pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp); if (tp->rttvar || tp->srtt) { struct net *net = sock_net(tp->asoc->base.sk); @@ -377,9 +378,8 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) */ tp->rto_pending = 0; - SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d " - "rttvar: %d, rto: %ld\n", __func__, - tp, rtt, tp->srtt, tp->rttvar, tp->rto); + pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n", + __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto); } /* This routine updates the transport's cwnd and partial_bytes_acked @@ -433,12 +433,11 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd += pmtu; else cwnd += bytes_acked; - SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, " - "bytes_acked: %d, cwnd: %d, ssthresh: %d, " - "flight_size: %d, pba: %d\n", - __func__, - transport, bytes_acked, cwnd, - ssthresh, flight_size, pba); + + pr_debug("%s: slow start: transport:%p, bytes_acked:%d, " + "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n", + __func__, transport, bytes_acked, cwnd, ssthresh, + flight_size, pba); } else { /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, * upon each SACK arrival that advances the Cumulative TSN Ack @@ -459,12 +458,12 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd += pmtu; pba = ((cwnd < pba) ? (pba - cwnd) : 0); } - SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: " - "transport: %p, bytes_acked: %d, cwnd: %d, " - "ssthresh: %d, flight_size: %d, pba: %d\n", - __func__, - transport, bytes_acked, cwnd, - ssthresh, flight_size, pba); + + pr_debug("%s: congestion avoidance: transport:%p, " + "bytes_acked:%d, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, + transport, bytes_acked, cwnd, ssthresh, + flight_size, pba); } transport->cwnd = cwnd; @@ -558,10 +557,10 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, } transport->partial_bytes_acked = 0; - SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: " - "%d ssthresh: %d\n", __func__, - transport, reason, - transport->cwnd, transport->ssthresh); + + pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n", + __func__, transport, reason, transport->cwnd, + transport->ssthresh); } /* Apply Max.Burst limit to the congestion window: -- cgit From 6c734fb8592f6768170e48e7102cb2f0a1bb9759 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 12:02:59 +0800 Subject: gre: fix a regression in ioctl When testing GRE tunnel, I got: # ip tunnel show get tunnel gre0 failed: Invalid argument get tunnel gre1 failed: Invalid argument This is a regression introduced by commit c54419321455631079c7d ("GRE: Refactor GRE tunneling code.") because previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. After this patch I got: # ip tunnel show gre0: gre/ip remote any local any ttl inherit nopmtudisc gre1: gre/ip remote 192.168.122.101 local 192.168.122.45 ttl inherit Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c326e869993a..1f6eab66f7ce 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -314,10 +314,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + return -EINVAL; } p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); -- cgit From ab6c7a0a43c2eaafa57583822b619b22637b49c7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 13:00:57 +0800 Subject: vti: remove duplicated code to fix a memory leak vti module allocates dev->tstats twice: in vti_fb_tunnel_init() and in vti_tunnel_init(), this lead to a memory leak of dev->tstats. Just remove the duplicated operations in vti_fb_tunnel_init(). (candidate for -stable) Cc: Stephen Hemminger Cc: Saurabh Mohan Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c118f6b576bb..17cc0ffa8c0d 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -606,17 +606,10 @@ static int __net_init vti_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - iph->version = 4; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - dev_hold(dev); rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; -- cgit From 8965779d2c0e6ab246c82a405236b1fb2adae6b2 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Sat, 29 Jun 2013 21:30:49 +0800 Subject: ipv6,mcast: always hold idev->lock before mca_lock dingtianhong reported the following deadlock detected by lockdep: ====================================================== [ INFO: possible circular locking dependency detected ] 3.4.24.05-0.1-default #1 Not tainted ------------------------------------------------------- ksoftirqd/0/3 is trying to acquire lock: (&ndev->lock){+.+...}, at: [] ipv6_get_lladdr+0x74/0x120 but task is already holding lock: (&mc->mca_lock){+.+...}, at: [] mld_send_report+0x40/0x150 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mc->mca_lock){+.+...}: [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_spin_lock+0x4a/0x60 [] igmp6_group_added+0x3b/0x120 [] ipv6_mc_up+0x38/0x60 [] ipv6_find_idev+0x3d/0x80 [] addrconf_notify+0x3d5/0x4b0 [] notifier_call_chain+0x3f/0x80 [] raw_notifier_call_chain+0x11/0x20 [] call_netdevice_notifiers+0x32/0x60 [] __dev_notify_flags+0x34/0x80 [] dev_change_flags+0x40/0x70 [] do_setlink+0x237/0x8a0 [] rtnl_newlink+0x3ec/0x600 [] rtnetlink_rcv_msg+0x160/0x310 [] netlink_rcv_skb+0x89/0xb0 [] rtnetlink_rcv+0x27/0x40 [] netlink_unicast+0x140/0x180 [] netlink_sendmsg+0x33e/0x380 [] sock_sendmsg+0x112/0x130 [] __sys_sendmsg+0x44e/0x460 [] sys_sendmsg+0x44/0x70 [] system_call_fastpath+0x16/0x1b -> #0 (&ndev->lock){+.+...}: [] check_prev_add+0x3de/0x440 [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_read_lock+0x42/0x60 [] ipv6_get_lladdr+0x74/0x120 [] mld_newpack+0xb6/0x160 [] add_grhead+0xab/0xc0 [] add_grec+0x3ab/0x460 [] mld_send_report+0x5a/0x150 [] igmp6_timer_handler+0x4e/0xb0 [] call_timer_fn+0xca/0x1d0 [] run_timer_softirq+0x1df/0x2e0 [] handle_pending_softirqs+0xf7/0x1f0 [] __do_softirq_common+0x7b/0xf0 [] __thread_do_softirq+0x1af/0x210 [] run_ksoftirqd+0xe1/0x1f0 [] kthread+0xae/0xc0 [] kernel_thread_helper+0x4/0x10 actually we can just hold idev->lock before taking pmc->mca_lock, and avoid taking idev->lock again when iterating idev->addr_list, since the upper callers of mld_newpack() already take read_lock_bh(&idev->lock). Reported-by: dingtianhong Cc: dingtianhong Cc: Hideaki YOSHIFUJI Cc: David S. Miller Cc: Hannes Frederic Sowa Tested-by: Ding Tianhong Tested-by: Chen Weilong Signed-off-by: Cong Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- net/ipv6/mcast.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 12dd2fec045c..75fd93bdd0d3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1444,8 +1444,8 @@ try_nextdev: } EXPORT_SYMBOL(ipv6_dev_get_saddr); -static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, - unsigned char banned_flags) +int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) { struct inet6_ifaddr *ifp; int err = -EADDRNOTAVAIL; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 502c877cbf10..99cd65c715cd 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1351,8 +1351,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct net_device *dev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) { + struct net_device *dev = idev->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; @@ -1377,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_reserve(skb, hlen); - if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { + if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* : * use unspecified address as the source address * when a valid link-local address is not available. @@ -1474,7 +1475,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr; if (!skb) - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(pmc->idev, dev->mtu); if (!skb) return NULL; pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); @@ -1494,7 +1495,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) { - struct net_device *dev = pmc->idev->dev; + struct inet6_dev *idev = pmc->idev; + struct net_device *dev = idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; @@ -1523,7 +1525,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); } } first = 1; @@ -1550,7 +1552,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); first = 1; scount = 0; } @@ -1605,8 +1607,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) struct sk_buff *skb = NULL; int type; + read_lock_bh(&idev->lock); if (!pmc) { - read_lock_bh(&idev->lock); for (pmc=idev->mc_list; pmc; pmc=pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; @@ -1618,7 +1620,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); } else { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) @@ -1628,6 +1629,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } -- cgit From e02010adeeb21ef56d6b9b68c785ed1ecc832aee Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 1 Jul 2013 11:31:36 +0200 Subject: net: sctp: get rid of SCTP_DBG_TSNS entirely After having reworked the debugging framework, Neil and Vlad agreed to get rid of the leftover SCTP_DBG_TSNS code for a couple of reasons: We can use systemtap scripts to investigate these things, we now have pr_debug() helpers that make life easier, and if we really need anything else besides those tools, we will be forced to come up with something better than we have there. Therefore, get rid of this ifdef debugging code entirely for now. Signed-off-by: Daniel Borkmann CC: Vlad Yasevich CC: Neil Horman Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/Kconfig | 9 ----- net/sctp/outqueue.c | 105 ---------------------------------------------------- 2 files changed, 114 deletions(-) (limited to 'net') diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index d80bf1aebaed..71c1a598d9bc 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -49,15 +49,6 @@ config NET_SCTPPROBE To compile this code as a module, choose M here: the module will be called sctp_probe. -config SCTP_DBG_TSNS - bool "SCTP: Debug transactions" - help - If you say Y, this will enable transaction debugging, visible - from the kernel's dynamic debugging framework. - - If unsure, say N. However, if you are running into problems, use - this option to gather outqueue trace information. - config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" depends on PROC_FS diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 511b3b35d609..cb80a8e060b7 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1335,21 +1335,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, int bytes_acked = 0; int migrate_bytes = 0; - /* These state variables are for coherent debug output. --xguo */ - -#ifdef CONFIG_SCTP_DBG_TSNS - __u32 dbg_ack_tsn = 0; /* An ACKed TSN range starts here... */ - __u32 dbg_last_ack_tsn = 0; /* ...and finishes here. */ - __u32 dbg_kept_tsn = 0; /* An un-ACKed range starts here... */ - __u32 dbg_last_kept_tsn = 0; /* ...and finishes here. */ - - /* 0 : The last TSN was ACKed. - * 1 : The last TSN was NOT ACKed (i.e. KEPT). - * -1: We need to initialize. - */ - int dbg_prt_state = -1; -#endif /* CONFIG_SCTP_DBG_TSNS */ - sack_ctsn = ntohl(sack->cum_tsn_ack); INIT_LIST_HEAD(&tlist); @@ -1471,49 +1456,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ list_add_tail(lchunk, &tlist); } - -#ifdef CONFIG_SCTP_DBG_TSNS - switch (dbg_prt_state) { - case 0: /* last TSN was ACKed */ - if (dbg_last_ack_tsn + 1 == tsn) { - /* This TSN belongs to the - * current ACK range. - */ - break; - } - - if (dbg_last_ack_tsn != dbg_ack_tsn) { - /* Display the end of the - * current range. - */ - pr_cont("-%08x", dbg_last_ack_tsn); - } - - /* Start a new range. */ - pr_cont(",%08x", tsn); - dbg_ack_tsn = tsn; - break; - - case 1: /* The last TSN was NOT ACKed. */ - if (dbg_last_kept_tsn != dbg_kept_tsn) { - /* Display the end of current range. */ - pr_cont("-%08x", dbg_last_kept_tsn); - } - - pr_cont("\n"); - /* FALL THROUGH... */ - default: - /* This is the first-ever TSN we examined. */ - /* Start a new range of ACK-ed TSNs. */ - pr_debug("ACKed: %08x", tsn); - - dbg_prt_state = 0; - dbg_ack_tsn = tsn; - } - - dbg_last_ack_tsn = tsn; -#endif /* CONFIG_SCTP_DBG_TSNS */ - } else { if (tchunk->tsn_gap_acked) { pr_debug("%s: receiver reneged on data TSN:0x%x\n", @@ -1537,56 +1479,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, } list_add_tail(lchunk, &tlist); - -#ifdef CONFIG_SCTP_DBG_TSNS - /* See the above comments on ACK-ed TSNs. */ - switch (dbg_prt_state) { - case 1: - if (dbg_last_kept_tsn + 1 == tsn) - break; - - if (dbg_last_kept_tsn != dbg_kept_tsn) - pr_cont("-%08x", dbg_last_kept_tsn); - - pr_cont(",%08x", tsn); - dbg_kept_tsn = tsn; - break; - - case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) - pr_cont("-%08x", dbg_last_ack_tsn); - - pr_cont("\n"); - /* FALL THROUGH... */ - default: - pr_debug("KEPT: %08x", tsn); - - dbg_prt_state = 1; - dbg_kept_tsn = tsn; - } - - dbg_last_kept_tsn = tsn; -#endif /* CONFIG_SCTP_DBG_TSNS */ } } -#ifdef CONFIG_SCTP_DBG_TSNS - /* Finish off the last range, displaying its ending TSN. */ - switch (dbg_prt_state) { - case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) - pr_cont("-%08x\n", dbg_last_ack_tsn); - else - pr_cont("\n"); - break; - case 1: - if (dbg_last_kept_tsn != dbg_kept_tsn) - pr_cont("-%08x\n", dbg_last_kept_tsn); - else - pr_cont("\n"); - break; - } -#endif /* CONFIG_SCTP_DBG_TSNS */ if (transport) { if (bytes_acked) { struct sctp_association *asoc = transport->asoc; -- cgit From c590b5e2f05b5e98e614382582b7ae4cddb37599 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 1 Jul 2013 17:23:30 +0200 Subject: ethtool: make .get_dump_data() harder to misuse by drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the patch "bnx2x: remove zeroing of dump data buffer" showed, it is too easy implement .get_dump_data incorrectly in a driver. Let's make sure drivers cannot get confused by userspace requesting a too big dump. Also WARN if the driver sets dump->len to something weird and make sure the length reported to userspace is the actual length of data copied to userspace. Signed-off-by: Michal Schmidt Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9255bbdf81ff..ab5fa6336c84 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1320,10 +1320,19 @@ static int ethtool_get_dump_data(struct net_device *dev, if (ret) return ret; - len = (tmp.len > dump.len) ? dump.len : tmp.len; + len = min(tmp.len, dump.len); if (!len) return -EFAULT; + /* Don't ever let the driver think there's more space available + * than it requested with .get_dump_flag(). + */ + dump.len = len; + + /* Always allocate enough space to hold the whole thing so that the + * driver does not need to check the length and bother with partial + * dumping. + */ data = vzalloc(tmp.len); if (!data) return -ENOMEM; @@ -1331,6 +1340,16 @@ static int ethtool_get_dump_data(struct net_device *dev, if (ret) goto out; + /* There are two sane possibilities: + * 1. The driver's .get_dump_data() does not touch dump.len. + * 2. Or it may set dump.len to how much it really writes, which + * should be tmp.len (or len if it can do a partial dump). + * In any case respond to userspace with the actual length of data + * it's receiving. + */ + WARN_ON(dump.len != len && dump.len != tmp.len); + dump.len = len; + if (copy_to_user(useraddr, &dump, sizeof(dump))) { ret = -EFAULT; goto out; -- cgit From e1558a93b61962710733dc8c11a2bc765607f1cd Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 2 Jul 2013 09:02:07 +0800 Subject: l2tp: add missing .owner to struct pppox_proto Add missing .owner of struct pppox_proto. This prevents the module from being removed from underneath its users. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 8dec6876dc50..5ebee2ded9e9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1793,7 +1793,8 @@ static const struct proto_ops pppol2tp_ops = { static const struct pppox_proto pppol2tp_proto = { .create = pppol2tp_create, - .ioctl = pppol2tp_ioctl + .ioctl = pppol2tp_ioctl, + .owner = THIS_MODULE, }; #ifdef CONFIG_L2TP_V3 -- cgit From 3b7b514f44bff05d26a6499c4d4fac2a83938e6e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 2 Jul 2013 14:49:34 +0800 Subject: ipip: fix a regression in ioctl This is a regression introduced by commit fd58156e456d9f68fe0448 (IPIP: Use ip-tunneling code.) Similar to GRE tunnel, previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. Also, the check for i_key, o_key etc. is suspicious too, which did not exist before, reset them before passing to ip_tunnel_ioctl(). Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e6905fbda2a2..51fc2a1dcdd3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -244,11 +244,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - return -EINVAL; - if (p.i_key || p.o_key || p.i_flags || p.o_flags) - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + } + + p.i_key = p.o_key = p.i_flags = p.o_flags = 0; if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); -- cgit From 8822b64a0fa64a5dd1dfcf837c5b0be83f8c05d1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 1 Jul 2013 20:21:30 +0200 Subject: ipv6: call udp_push_pending_frames when uncorking a socket with AF_INET pending data We accidentally call down to ip6_push_pending_frames when uncorking pending AF_INET data on a ipv6 socket. This results in the following splat (from Dave Jones): skbuff: skb_under_panic: text:ffffffff816765f6 len:48 put:40 head:ffff88013deb6df0 data:ffff88013deb6dec tail:0x2c end:0xc0 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:126! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: dccp_ipv4 dccp 8021q garp bridge stp dlci mpoa snd_seq_dummy sctp fuse hidp tun bnep nfnetlink scsi_transport_iscsi rfcomm can_raw can_bcm af_802154 appletalk caif_socket can caif ipt_ULOG x25 rose af_key pppoe pppox ipx phonet irda llc2 ppp_generic slhc p8023 psnap p8022 llc crc_ccitt atm bluetooth +netrom ax25 nfc rfkill rds af_rxrpc coretemp hwmon kvm_intel kvm crc32c_intel snd_hda_codec_realtek ghash_clmulni_intel microcode pcspkr snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep usb_debug snd_seq snd_seq_device snd_pcm e1000e snd_page_alloc snd_timer ptp snd pps_core soundcore xfs libcrc32c CPU: 2 PID: 8095 Comm: trinity-child2 Not tainted 3.10.0-rc7+ #37 task: ffff8801f52c2520 ti: ffff8801e6430000 task.ti: ffff8801e6430000 RIP: 0010:[] [] skb_panic+0x63/0x65 RSP: 0018:ffff8801e6431de8 EFLAGS: 00010282 RAX: 0000000000000086 RBX: ffff8802353d3cc0 RCX: 0000000000000006 RDX: 0000000000003b90 RSI: ffff8801f52c2ca0 RDI: ffff8801f52c2520 RBP: ffff8801e6431e08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff88022ea0c800 R13: ffff88022ea0cdf8 R14: ffff8802353ecb40 R15: ffffffff81cc7800 FS: 00007f5720a10740(0000) GS:ffff880244c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005862000 CR3: 000000022843c000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 Stack: ffff88013deb6dec 000000000000002c 00000000000000c0 ffffffff81a3f6e4 ffff8801e6431e18 ffffffff8159a9aa ffff8801e6431e90 ffffffff816765f6 ffffffff810b756b 0000000700000002 ffff8801e6431e40 0000fea9292aa8c0 Call Trace: [] skb_push+0x3a/0x40 [] ip6_push_pending_frames+0x1f6/0x4d0 [] ? mark_held_locks+0xbb/0x140 [] udp_v6_push_pending_frames+0x2b9/0x3d0 [] ? udplite_getfrag+0x20/0x20 [] udp_lib_setsockopt+0x1aa/0x1f0 [] ? fget_light+0x387/0x4f0 [] udpv6_setsockopt+0x34/0x40 [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xd0 [] tracesys+0xdd/0xe2 Code: 00 00 48 89 44 24 10 8b 87 d8 00 00 00 48 89 44 24 08 48 8b 87 e8 00 00 00 48 c7 c7 c0 04 aa 81 48 89 04 24 31 c0 e8 e1 7e ff ff <0f> 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 RIP [] skb_panic+0x63/0x65 RSP This patch adds a check if the pending data is of address family AF_INET and directly calls udp_push_ending_frames from udp_v6_push_pending_frames if that is the case. This bug was found by Dave Jones with trinity. (Also move the initialization of fl6 below the AF_INET check, even if not strictly necessary.) Cc: Dave Jones Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 959502afd8d9..6b270e53c207 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -800,7 +800,7 @@ send: /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -819,6 +819,7 @@ out: up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f77e34c5a0e2..b6f31437a1f8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -959,11 +959,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl6 = &inet->cork.fl.u.ip6; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; -- cgit From 75a493e60ac4bbe2e977e7129d6d8cbb0dd236be Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 2 Jul 2013 08:04:05 +0200 Subject: ipv6: ip6_append_data_mtu did not care about pmtudisc and frag_size If the socket had an IPV6_MTU value set, ip6_append_data_mtu lost track of this when appending the second frame on a corked socket. This results in the following splat: [37598.993962] ------------[ cut here ]------------ [37598.994008] kernel BUG at net/core/skbuff.c:2064! [37598.994008] invalid opcode: 0000 [#1] SMP [37598.994008] Modules linked in: tcp_lp uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_core videodev media vfat fat usb_storage fuse ebtable_nat xt_CHECKSUM bridge stp llc ipt_MASQUERADE nf_conntrack_netbios_ns nf_conntrack_broadcast ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat +nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i cxgb3 mdio libcxgbi ib_iser rdma_cm ib_addr iw_cm ib_cm ib_sa ib_mad ib_core iscsi_tcp libiscsi_tcp libiscsi +scsi_transport_iscsi rfcomm bnep iTCO_wdt iTCO_vendor_support snd_hda_codec_conexant arc4 iwldvm mac80211 snd_hda_intel acpi_cpufreq mperf coretemp snd_hda_codec microcode cdc_wdm cdc_acm [37598.994008] snd_hwdep cdc_ether snd_seq snd_seq_device usbnet mii joydev btusb snd_pcm bluetooth i2c_i801 e1000e lpc_ich mfd_core ptp iwlwifi pps_core snd_page_alloc mei cfg80211 snd_timer thinkpad_acpi snd tpm_tis soundcore rfkill tpm tpm_bios vhost_net tun macvtap macvlan kvm_intel kvm uinput binfmt_misc +dm_crypt i915 i2c_algo_bit drm_kms_helper drm i2c_core wmi video [37598.994008] CPU 0 [37598.994008] Pid: 27320, comm: t2 Not tainted 3.9.6-200.fc18.x86_64 #1 LENOVO 27744PG/27744PG [37598.994008] RIP: 0010:[] [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP: 0018:ffff88003670da18 EFLAGS: 00010202 [37598.994008] RAX: ffff88018105c018 RBX: 0000000000000004 RCX: 00000000000006c0 [37598.994008] RDX: ffff88018105a6c0 RSI: ffff88018105a000 RDI: ffff8801e1b0aa00 [37598.994008] RBP: ffff88003670da78 R08: 0000000000000000 R09: ffff88018105c040 [37598.994008] R10: ffff8801e1b0aa00 R11: 0000000000000000 R12: 000000000000fff8 [37598.994008] R13: 00000000000004fc R14: 00000000ffff0504 R15: 0000000000000000 [37598.994008] FS: 00007f28eea59740(0000) GS:ffff88023bc00000(0000) knlGS:0000000000000000 [37598.994008] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [37598.994008] CR2: 0000003d935789e0 CR3: 00000000365cb000 CR4: 00000000000407f0 [37598.994008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [37598.994008] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [37598.994008] Process t2 (pid: 27320, threadinfo ffff88003670c000, task ffff88022c162ee0) [37598.994008] Stack: [37598.994008] ffff88022e098a00 ffff88020f973fc0 0000000000000008 00000000000004c8 [37598.994008] ffff88020f973fc0 00000000000004c4 ffff88003670da78 ffff8801e1b0a200 [37598.994008] 0000000000000018 00000000000004c8 ffff88020f973fc0 00000000000004c4 [37598.994008] Call Trace: [37598.994008] [] ip6_append_data+0xccf/0xfe0 [37598.994008] [] ? ip_copy_metadata+0x1a0/0x1a0 [37598.994008] [] ? _raw_spin_lock_bh+0x16/0x40 [37598.994008] [] udpv6_sendmsg+0x1ed/0xc10 [37598.994008] [] ? sock_has_perm+0x75/0x90 [37598.994008] [] inet_sendmsg+0x63/0xb0 [37598.994008] [] ? selinux_socket_sendmsg+0x23/0x30 [37598.994008] [] sock_sendmsg+0xb0/0xe0 [37598.994008] [] ? __switch_to+0x181/0x4a0 [37598.994008] [] sys_sendto+0x12d/0x180 [37598.994008] [] ? __audit_syscall_entry+0x94/0xf0 [37598.994008] [] ? syscall_trace_enter+0x231/0x240 [37598.994008] [] tracesys+0xdd/0xe2 [37598.994008] Code: fe 07 00 00 48 c7 c7 04 28 a6 81 89 45 a0 4c 89 4d b8 44 89 5d a8 e8 1b ac b1 ff 44 8b 5d a8 4c 8b 4d b8 8b 45 a0 e9 cf fe ff ff <0f> 0b 66 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48 89 e5 48 [37598.994008] RIP [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP [37599.007323] ---[ end trace d69f6a17f8ac8eee ]--- While there, also check if path mtu discovery is activated for this socket. The logic was adapted from ip6_append_data when first writing on the corked socket. This bug was introduced with commit 0c1833797a5a6ec23ea9261d979aa18078720b74 ("ipv6: fix incorrect ipsec fragment"). v2: a) Replace IPV6_PMTU_DISC_DO with IPV6_PMTUDISC_PROBE. b) Don't pass ipv6_pinfo to ip6_append_data_mtu (suggestion by Gao feng, thanks!). c) Change mtu to unsigned int, else we get a warning about non-matching types because of the min()-macro type-check. Acked-by: Gao feng Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index dae1949019d7..be7589ef5cf9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1093,11 +1093,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } -static void ip6_append_data_mtu(int *mtu, +static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, - struct rt6_info *rt) + struct rt6_info *rt, + bool pmtuprobe) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { @@ -1109,7 +1110,9 @@ static void ip6_append_data_mtu(int *mtu, * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = dst_mtu(rt->dst.path); + *mtu = min(*mtu, pmtuprobe ? + rt->dst.dev->mtu : + dst_mtu(rt->dst.path)); } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1126,11 +1129,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, mtu; int exthdrlen; int dst_exthdrlen; int hh_len; - int mtu; int copy; int err; int offset = 0; @@ -1287,7 +1289,9 @@ alloc_new_skb: /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, - fragheaderlen, skb, rt); + fragheaderlen, skb, rt, + np->pmtudisc == + IPV6_PMTUDISC_PROBE); skb_prev = skb; -- cgit From 06a23fe31ca3992863721f21bdb0307af93da807 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 2 Jul 2013 20:30:10 +0900 Subject: core/dev: set pkt_type after eth_type_trans() in dev_forward_skb() The dev_forward_skb() assignment of pkt_type should be done after the call to eth_type_trans(). ip-encapsulated packets can be handled by localhost. But skb->pkt_type can be PACKET_OTHERHOST when packet comes via veth into ip tunnel device. In that case, the packet is dropped by ip_rcv(). Although this example uses gretap. l2tp-eth also has same issue. For l2tp-eth case, add dummy device for ip address and ip l2tp command. netns A | root netns | netns B veth<->veth=bridge=gretap <-loop back-> gretap=bridge=veth<->veth arp packet -> pkt_type BROADCAST------------>ip_rcv()------------------------> <- arp reply pkt_type ip_rcv()<-----------------OTHERHOST drop sample operations ip link add tapa type gretap remote 172.17.107.4 local 172.17.107.3 ip link add tapb type gretap remote 172.17.107.3 local 172.17.107.4 ip link set tapa up ip link set tapb up ip address add 172.17.107.3 dev tapa ip address add 172.17.107.4 dev tapb ip route get 172.17.107.3 > local 172.17.107.3 dev lo src 172.17.107.3 > cache ip route get 172.17.107.4 > local 172.17.107.4 dev lo src 172.17.107.4 > cache ip link add vetha type veth peer name vetha-peer ip link add vethb type veth peer name vethb-peer brctl addbr bra brctl addbr brb brctl addif bra tapa brctl addif bra vetha-peer brctl addif brb tapb brctl addif brb vethb-peer brctl show > bridge name bridge id STP enabled interfaces > bra 8000.6ea21e758ff1 no tapa > vetha-peer > brb 8000.420020eb92d5 no tapb > vethb-peer ip link set vetha-peer up ip link set vethb-peer up ip link set bra up ip link set brb up ip netns add a ip netns add b ip link set vetha netns a ip link set vethb netns b ip netns exec a ip address add 10.0.0.3/24 dev vetha ip netns exec b ip address add 10.0.0.4/24 dev vethb ip netns exec a ip link set vetha up ip netns exec b ip link set vethb up ip netns exec a arping -I vetha 10.0.0.4 ARPING 10.0.0.4 from 10.0.0.3 vetha ^CSent 2 probes (2 broadcast(s)) Received 0 response(s) Cc: Jason Wang Cc: "Michael S. Tsirkin" Cc: Eric Dumazet Cc: Patrick McHardy Cc: Hong Zhiguo Cc: Rami Rosen Cc: Tom Parkin Cc: Cong Wang Cc: Pravin B Shelar Cc: Jesse Gross Cc: dev@openvswitch.org Signed-off-by: Isaku Yamahata Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 370354a9c5f6..6a93cd8cd264 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1659,6 +1659,12 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_scrub_packet(skb); skb->protocol = eth_type_trans(skb, dev); + + /* eth_type_trans() can set pkt_type. + * clear pkt_type _after_ calling eth_type_trans() + */ + skb->pkt_type = PACKET_HOST; + return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); -- cgit From 8a59bd3e9b296b93b905b5509c4ff540ee0e00bf Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Tue, 2 Jul 2013 18:39:36 +0200 Subject: sctp: use get_unused_fd_flags(0) instead of get_unused_fd() Macro get_unused_fd() is used to allocate a file descriptor with default flags. Those default flags (0) can be "unsafe": O_CLOEXEC must be used by default to not leak file descriptor across exec(). Instead of macro get_unused_fd(), functions anon_inode_getfd() or get_unused_fd_flags() should be used with flags given by userspace. If not possible, flags should be set to O_CLOEXEC to provide userspace with a default safe behavor. In a further patch, get_unused_fd() will be removed so that new code start using anon_inode_getfd() or get_unused_fd_flags() with correct flags. This patch replaces calls to get_unused_fd() with equivalent call to get_unused_fd_flags(0) to preserve current behavor for existing code. The hard coded flag value (0) should be reviewed on a per-subsystem basis, and, if possible, set to O_CLOEXEC. Signed-off-by: Yann Droneaud Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d5c6a2870473..c6670d2e3f8d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4312,7 +4312,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = get_unused_fd(); + retval = get_unused_fd_flags(0); if (retval < 0) { sock_release(newsock); goto out; -- cgit From b6dc01a43aaca24e6e6928e24d9b37ba599f1e3c Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 2 Jul 2013 20:28:58 +0100 Subject: l2tp: do data sequence number handling in a separate func This change moves some code handling data sequence numbers into a separate function to avoid too much indentation. This is to prepare for some changes to data sequence number handling in subsequent patches. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 54 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6984c3a353cd..5ca29659171d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -542,6 +542,38 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return __skb_checksum_complete(skb); } +/* If packet has sequence numbers, queue it if acceptable. Returns 0 if + * acceptable, else non-zero. + */ +static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) +{ + if (session->reorder_timeout != 0) { + /* Packet reordering enabled. Add skb to session's + * reorder queue, in order of ns. + */ + l2tp_recv_queue_skb(session, skb); + } else { + /* Packet reordering disabled. Discard out-of-sequence + * packets + */ + if (L2TP_SKB_CB(skb)->ns != session->nr) { + atomic_long_inc(&session->stats.rx_seq_discards); + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", + session->name, L2TP_SKB_CB(skb)->ns, + L2TP_SKB_CB(skb)->length, session->nr, + skb_queue_len(&session->reorder_q)); + goto discard; + } + skb_queue_tail(&session->reorder_q, skb); + } + + return 0; + +discard: + return 1; +} + /* Do receive processing of L2TP data frames. We handle both L2TPv2 * and L2TPv3 data frames here. * @@ -757,26 +789,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * enabled. Saved L2TP protocol info is stored in skb->sb[]. */ if (L2TP_SKB_CB(skb)->has_seq) { - if (session->reorder_timeout != 0) { - /* Packet reordering enabled. Add skb to session's - * reorder queue, in order of ns. - */ - l2tp_recv_queue_skb(session, skb); - } else { - /* Packet reordering disabled. Discard out-of-sequence - * packets - */ - if (L2TP_SKB_CB(skb)->ns != session->nr) { - atomic_long_inc(&session->stats.rx_seq_discards); - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", - session->name, L2TP_SKB_CB(skb)->ns, - L2TP_SKB_CB(skb)->length, session->nr, - skb_queue_len(&session->reorder_q)); - goto discard; - } - skb_queue_tail(&session->reorder_q, skb); - } + if (l2tp_recv_data_seq(session, skb)) + goto discard; } else { /* No sequence numbers. Add the skb to the tail of the * reorder queue. This ensures that it will be -- cgit From 8a1631d588a39e826f4248e60310498d5266c6fa Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 2 Jul 2013 20:28:59 +0100 Subject: l2tp: make datapath sequence number support RFC-compliant The L2TP datapath is not currently RFC-compliant when sequence numbers are used in L2TP data packets. According to the L2TP RFC, any received sequence number NR greater than or equal to the next expected NR is acceptable, where the "greater than or equal to" test is determined by the NR wrap point. This differs for L2TPv2 and L2TPv3, so add state in the session context to hold the max NR value and the NR window size in order to do the acceptable sequence number value check. These might be configurable later, but for now we derive it from the tunnel L2TP version, which determines the sequence number field size. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 36 +++++++++++++++++++++++++++++++----- net/l2tp/l2tp_core.h | 2 ++ 2 files changed, 33 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 5ca29659171d..735cc06971ef 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -414,10 +414,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ session->nr++; - if (tunnel->version == L2TP_HDR_VER_2) - session->nr &= 0xffff; - else - session->nr &= 0xffffff; + session->nr &= session->nr_max; l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated nr to %hu\n", session->name, session->nr); @@ -542,11 +539,34 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return __skb_checksum_complete(skb); } +static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) +{ + u32 nws; + + if (nr >= session->nr) + nws = nr - session->nr; + else + nws = (session->nr_max + 1) - (session->nr - nr); + + return nws < session->nr_window_size; +} + /* If packet has sequence numbers, queue it if acceptable. Returns 0 if * acceptable, else non-zero. */ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) { + if (!l2tp_seq_check_rx_window(session, L2TP_SKB_CB(skb)->ns)) { + /* Packet sequence number is outside allowed window. + * Discard it. + */ + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: pkt %u len %d discarded, outside window, nr=%u\n", + session->name, L2TP_SKB_CB(skb)->ns, + L2TP_SKB_CB(skb)->length, session->nr); + goto discard; + } + if (session->reorder_timeout != 0) { /* Packet reordering enabled. Add skb to session's * reorder queue, in order of ns. @@ -556,7 +576,8 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) /* Packet reordering disabled. Discard out-of-sequence * packets */ - if (L2TP_SKB_CB(skb)->ns != session->nr) { + if ((L2TP_SKB_CB(skb)->ns != session->nr) && + (!session->reorder_skip)) { atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", @@ -1826,6 +1847,11 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->session_id = session_id; session->peer_session_id = peer_session_id; session->nr = 0; + if (tunnel->version == L2TP_HDR_VER_2) + session->nr_max = 0xffff; + else + session->nr_max = 0xffffff; + session->nr_window_size = session->nr_max / 2; sprintf(&session->name[0], "sess %u/%u", tunnel->tunnel_id, session->session_id); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 485a490fd990..4b9a3b724423 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -102,6 +102,8 @@ struct l2tp_session { u32 nr; /* session NR state (receive) */ u32 ns; /* session NR state (send) */ struct sk_buff_head reorder_q; /* receive reorder queue */ + u32 nr_max; /* max NR. Depends on tunnel */ + u32 nr_window_size; /* NR window size */ struct hlist_node hlist; /* Hash list node */ atomic_t ref_count; -- cgit From a0dbd822273ce7660bf35525d61d7a8ac5e679a3 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 2 Jul 2013 20:29:00 +0100 Subject: l2tp: make datapath resilient to packet loss when sequence numbers enabled If L2TP data sequence numbers are enabled and reordering is not enabled, data reception stops if a packet is lost since the kernel waits for a sequence number that is never resent. (When reordering is enabled, data reception restarts when the reorder timeout expires.) If no reorder timeout is set, we should count the number of in-sequence packets after the out-of-sequence (OOS) condition is detected, and reset sequence number state after a number of such packets are received. For now, the number of in-sequence packets while in OOS state which cause the sequence number state to be reset is hard-coded to 5. This could be configurable later. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 36 +++++++++++++++++++++++++++++++----- net/l2tp/l2tp_core.h | 3 +++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 735cc06971ef..feae495a0a30 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -572,12 +572,33 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) * reorder queue, in order of ns. */ l2tp_recv_queue_skb(session, skb); + goto out; + } + + /* Packet reordering disabled. Discard out-of-sequence packets, while + * tracking the number if in-sequence packets after the first OOS packet + * is seen. After nr_oos_count_max in-sequence packets, reset the + * sequence number to re-enable packet reception. + */ + if (L2TP_SKB_CB(skb)->ns == session->nr) { + skb_queue_tail(&session->reorder_q, skb); } else { - /* Packet reordering disabled. Discard out-of-sequence - * packets - */ - if ((L2TP_SKB_CB(skb)->ns != session->nr) && - (!session->reorder_skip)) { + u32 nr_oos = L2TP_SKB_CB(skb)->ns; + u32 nr_next = (session->nr_oos + 1) & session->nr_max; + + if (nr_oos == nr_next) + session->nr_oos_count++; + else + session->nr_oos_count = 0; + + session->nr_oos = nr_oos; + if (session->nr_oos_count > session->nr_oos_count_max) { + session->reorder_skip = 1; + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: %d oos packets received. Resetting sequence numbers\n", + session->name, session->nr_oos_count); + } + if (!session->reorder_skip) { atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", @@ -589,6 +610,7 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) skb_queue_tail(&session->reorder_q, skb); } +out: return 0; discard: @@ -1852,6 +1874,10 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn else session->nr_max = 0xffffff; session->nr_window_size = session->nr_max / 2; + session->nr_oos_count_max = 4; + + /* Use NR of first received packet */ + session->reorder_skip = 1; sprintf(&session->name[0], "sess %u/%u", tunnel->tunnel_id, session->session_id); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 4b9a3b724423..66a559b104b6 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -104,6 +104,9 @@ struct l2tp_session { struct sk_buff_head reorder_q; /* receive reorder queue */ u32 nr_max; /* max NR. Depends on tunnel */ u32 nr_window_size; /* NR window size */ + u32 nr_oos; /* NR of last OOS packet */ + int nr_oos_count; /* For OOS recovery */ + int nr_oos_count_max; struct hlist_node hlist; /* Hash list node */ atomic_t ref_count; -- cgit From 23a3647bc4f93bac3776c66dc2c7f7f68b3cd662 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 2 Jul 2013 10:57:33 -0700 Subject: ip_tunnels: Use skb-len to PMTU check. In path mtu check, ip header total length works for gre device but not for gre-tap device. Use skb len which is consistent for all tunneling types. This is old bug in gre. This also fixes mtu calculation bug introduced by commit c54419321455631079c7d (GRE: Refactor GRE tunneling code). Reported-by: Timo Teras Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 99 +++++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 394cebc96d22..945734b2f209 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -472,6 +472,54 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); +static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + struct rtable *rt, __be16 df) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int pkt_size = skb->len - tunnel->hlen; + int mtu; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr) - tunnel->hlen; + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && + (df & htons(IP_DF)) && mtu < pkt_size) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + return -E2BIG; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < pkt_size) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -E2BIG; + } + } +#endif + return 0; +} + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol) { @@ -483,7 +531,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int mtu; int err; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -560,51 +607,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->stats.collisions++; goto tx_error; } - df = tnl_params->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (inner_iph->frag_off&htons(IP_DF)); - - if (!skb_is_gso(skb) && - (inner_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(inner_iph->tot_len)) { - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } - } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && - mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - - if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && - mtu < skb->len) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + ip_rt_put(rt); + goto tx_error; } -#endif if (tunnel->net != dev_net(dev)) skb_scrub_packet(skb); @@ -631,6 +638,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP)) + df |= (inner_iph->frag_off&htons(IP_DF)); + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { -- cgit From c50cd357887acf9fd7af3a5d492911bd825555a2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 1 Jul 2013 19:24:00 +0200 Subject: net: gre: move GSO functions to gre_offload Similarly to TCP/UDP offloading, move all related GRE functions to gre_offload.c to make things more explicit and similar to the rest of the code. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/Makefile | 1 + net/ipv4/gre.c | 514 ------------------------------------------------- net/ipv4/gre_demux.c | 414 +++++++++++++++++++++++++++++++++++++++ net/ipv4/gre_offload.c | 127 ++++++++++++ 4 files changed, 542 insertions(+), 514 deletions(-) delete mode 100644 net/ipv4/gre.c create mode 100644 net/ipv4/gre_demux.c create mode 100644 net/ipv4/gre_offload.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 86ded0bac9c7..4b81e91c80fe 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o +gre-y := gre_demux.o gre_offload.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c deleted file mode 100644 index ba4803e609b5..000000000000 --- a/net/ipv4/gre.c +++ /dev/null @@ -1,514 +0,0 @@ -/* - * GRE over IPv4 demultiplexer driver - * - * Authors: Dmitry Kozlov (xeb@mail.ru) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; -static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; - -int gre_add_protocol(const struct gre_protocol *proto, u8 version) -{ - if (version >= GREPROTO_MAX) - return -EINVAL; - - return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? - 0 : -EBUSY; -} -EXPORT_SYMBOL_GPL(gre_add_protocol); - -int gre_del_protocol(const struct gre_protocol *proto, u8 version) -{ - int ret; - - if (version >= GREPROTO_MAX) - return -EINVAL; - - ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? - 0 : -EBUSY; - - if (ret) - return ret; - - synchronize_rcu(); - return 0; -} -EXPORT_SYMBOL_GPL(gre_del_protocol); - -void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, - int hdr_len) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(tpi->flags); - greh->protocol = tpi->proto; - - if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (tpi->flags&TUNNEL_SEQ) { - *ptr = tpi->seq; - ptr--; - } - if (tpi->flags&TUNNEL_KEY) { - *ptr = tpi->key; - ptr--; - } - if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { - *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, - skb->len, 0)); - } - } -} -EXPORT_SYMBOL_GPL(gre_build_header); - -struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) -{ - int err; - - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - - if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - goto error; - skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; - return skb; - } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - return skb; -error: - kfree_skb(skb); - return ERR_PTR(err); -} -EXPORT_SYMBOL_GPL(gre_handle_offloads); - -static __sum16 check_checksum(struct sk_buff *skb) -{ - __sum16 csum = 0; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - - if (!csum) - break; - /* Fall through. */ - - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - break; - } - - return csum; -} - -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err) -{ - unsigned int ip_hlen = ip_hdrlen(skb); - const struct gre_base_hdr *greh; - __be32 *options; - int hdr_len; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (check_checksum(skb)) { - *csum_err = true; - return -EINVAL; - } - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else - tpi->key = 0; - - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else - tpi->seq = 0; - - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - hdr_len += 4; - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - } - } - - return iptunnel_pull_header(skb, hdr_len, tpi->proto); -} - -static int gre_cisco_rcv(struct sk_buff *skb) -{ - struct tnl_ptk_info tpi; - int i; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err) < 0) - goto drop; - - rcu_read_lock(); - for (i = 0; i < GRE_IP_PROTO_MAX; i++) { - struct gre_cisco_protocol *proto; - int ret; - - proto = rcu_dereference(gre_cisco_proto_list[i]); - if (!proto) - continue; - ret = proto->handler(skb, &tpi); - if (ret == PACKET_RCVD) { - rcu_read_unlock(); - return 0; - } - } - rcu_read_unlock(); - - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); -drop: - kfree_skb(skb); - return 0; -} - -static void gre_cisco_err(struct sk_buff *skb, u32 info) -{ - /* All the routers (except for Linux) return only - * 8 bytes of packet payload. It means, that precise relaying of - * ICMP in the real Internet is absolutely infeasible. - * - * Moreover, Cisco "wise men" put GRE key to the third word - * in GRE header. It makes impossible maintaining even soft - * state for keyed - * GRE tunnels with enabled checksum. Tell them "thank you". - * - * Well, I wonder, rfc1812 was written by Cisco employee, - * what the hell these idiots break standards established - * by themselves??? - */ - - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct tnl_ptk_info tpi; - bool csum_err = false; - int i; - - if (parse_gre_header(skb, &tpi, &csum_err)) { - if (!csum_err) /* ignore csum errors. */ - return; - } - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - skb->dev->ifindex, 0, IPPROTO_GRE, 0); - return; - } - if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, - IPPROTO_GRE, 0); - return; - } - - rcu_read_lock(); - for (i = 0; i < GRE_IP_PROTO_MAX; i++) { - struct gre_cisco_protocol *proto; - - proto = rcu_dereference(gre_cisco_proto_list[i]); - if (!proto) - continue; - - if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) - goto out; - - } -out: - rcu_read_unlock(); -} - -static int gre_rcv(struct sk_buff *skb) -{ - const struct gre_protocol *proto; - u8 ver; - int ret; - - if (!pskb_may_pull(skb, 12)) - goto drop; - - ver = skb->data[1]&0x7f; - if (ver >= GREPROTO_MAX) - goto drop; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (!proto || !proto->handler) - goto drop_unlock; - ret = proto->handler(skb); - rcu_read_unlock(); - return ret; - -drop_unlock: - rcu_read_unlock(); -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -static void gre_err(struct sk_buff *skb, u32 info) -{ - const struct gre_protocol *proto; - const struct iphdr *iph = (const struct iphdr *)skb->data; - u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; - - if (ver >= GREPROTO_MAX) - return; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (proto && proto->err_handler) - proto->err_handler(skb, info); - rcu_read_unlock(); -} - -static struct sk_buff *gre_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - netdev_features_t enc_features; - int ghl = GRE_HEADER_SECTION; - struct gre_base_hdr *greh; - int mac_len = skb->mac_len; - __be16 protocol = skb->protocol; - int tnl_hlen; - bool csum; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) - goto out; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - - if (greh->flags & GRE_KEY) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_SEQ) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_CSUM) { - ghl += GRE_HEADER_SECTION; - csum = true; - } else - csum = false; - - /* setup inner skb. */ - skb->protocol = greh->protocol; - skb->encapsulation = 0; - - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - __skb_pull(skb, ghl); - skb_reset_mac_header(skb); - skb_set_network_header(skb, skb_inner_network_offset(skb)); - skb->mac_len = skb_inner_network_offset(skb); - - /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) - goto out; - - skb = segs; - tnl_hlen = skb_tnl_header_len(skb); - do { - __skb_push(skb, ghl); - if (csum) { - __be32 *pcsum; - - if (skb_has_shared_frag(skb)) { - int err; - - err = __skb_linearize(skb); - if (err) { - kfree_skb(segs); - segs = ERR_PTR(err); - goto out; - } - } - - greh = (struct gre_base_hdr *)(skb->data); - pcsum = (__be32 *)(greh + 1); - *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); - } - __skb_push(skb, tnl_hlen - ghl); - - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); - skb->mac_len = mac_len; - skb->protocol = protocol; - } while ((skb = skb->next)); -out: - return segs; -} - -static int gre_gso_send_check(struct sk_buff *skb) -{ - if (!skb->encapsulation) - return -EINVAL; - return 0; -} - -static const struct net_protocol net_gre_protocol = { - .handler = gre_rcv, - .err_handler = gre_err, - .netns_ok = 1, -}; - -static const struct net_offload gre_offload = { - .callbacks = { - .gso_send_check = gre_gso_send_check, - .gso_segment = gre_gso_segment, - }, -}; - -static const struct gre_protocol ipgre_protocol = { - .handler = gre_cisco_rcv, - .err_handler = gre_cisco_err, -}; - -int gre_cisco_register(struct gre_cisco_protocol *newp) -{ - struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) - &gre_cisco_proto_list[newp->priority]; - - return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; -} -EXPORT_SYMBOL_GPL(gre_cisco_register); - -int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) -{ - struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) - &gre_cisco_proto_list[del_proto->priority]; - int ret; - - ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; - - if (ret) - return ret; - - synchronize_net(); - return 0; -} -EXPORT_SYMBOL_GPL(gre_cisco_unregister); - -static int __init gre_init(void) -{ - pr_info("GRE over IPv4 demultiplexor driver\n"); - - if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { - pr_err("can't add protocol\n"); - goto err; - } - - if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { - pr_info("%s: can't add ipgre handler\n", __func__); - goto err_gre; - } - - if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { - pr_err("can't add protocol offload\n"); - goto err_gso; - } - - return 0; -err_gso: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); -err_gre: - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); -err: - return -EAGAIN; -} - -static void __exit gre_exit(void) -{ - inet_del_offload(&gre_offload, IPPROTO_GRE); - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); -} - -module_init(gre_init); -module_exit(gre_exit); - -MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); -MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c new file mode 100644 index 000000000000..736c9fc3ef93 --- /dev/null +++ b/net/ipv4/gre_demux.c @@ -0,0 +1,414 @@ +/* + * GRE over IPv4 demultiplexer driver + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; +static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + return -EINVAL; + + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? + 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_add_protocol); + +int gre_del_protocol(const struct gre_protocol *proto, u8 version) +{ + int ret; + + if (version >= GREPROTO_MAX) + return -EINVAL; + + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? + 0 : -EBUSY; + + if (ret) + return ret; + + synchronize_rcu(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_del_protocol); + +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + int hdr_len) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; + + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; + } + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; + } + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); + } + } +} +EXPORT_SYMBOL_GPL(gre_build_header); + +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(gre_handle_offloads); + +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + + if (!csum) + break; + /* Fall through. */ + + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; + } + + return csum; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err) +{ + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = ip_gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; + + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; + + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + + return iptunnel_pull_header(skb, hdr_len, tpi->proto); +} + +static int gre_cisco_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + int i; + bool csum_err = false; + + if (parse_gre_header(skb, &tpi, &csum_err) < 0) + goto drop; + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + int ret; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + ret = proto->handler(skb, &tpi); + if (ret == PACKET_RCVD) { + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + +static void gre_cisco_err(struct sk_buff *skb, u32 info) +{ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + */ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + bool csum_err = false; + int i; + + if (parse_gre_header(skb, &tpi, &csum_err)) { + if (!csum_err) /* ignore csum errors. */ + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + skb->dev->ifindex, 0, IPPROTO_GRE, 0); + return; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, + IPPROTO_GRE, 0); + return; + } + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + + if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) + goto out; + + } +out: + rcu_read_unlock(); +} + +static int gre_rcv(struct sk_buff *skb) +{ + const struct gre_protocol *proto; + u8 ver; + int ret; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->handler) + goto drop_unlock; + ret = proto->handler(skb); + rcu_read_unlock(); + return ret; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static void gre_err(struct sk_buff *skb, u32 info) +{ + const struct gre_protocol *proto; + const struct iphdr *iph = (const struct iphdr *)skb->data; + u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; + + if (ver >= GREPROTO_MAX) + return; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (proto && proto->err_handler) + proto->err_handler(skb, info); + rcu_read_unlock(); +} + +static const struct net_protocol net_gre_protocol = { + .handler = gre_rcv, + .err_handler = gre_err, + .netns_ok = 1, +}; + +static const struct gre_protocol ipgre_protocol = { + .handler = gre_cisco_rcv, + .err_handler = gre_cisco_err, +}; + +int gre_cisco_register(struct gre_cisco_protocol *newp) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[newp->priority]; + + return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_cisco_register); + +int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[del_proto->priority]; + int ret; + + ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; + + if (ret) + return ret; + + synchronize_net(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_cisco_unregister); + +static int __init gre_init(void) +{ + pr_info("GRE over IPv4 demultiplexor driver\n"); + + if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { + pr_err("can't add protocol\n"); + goto err; + } + + if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { + pr_info("%s: can't add ipgre handler\n", __func__); + goto err_gre; + } + + if (gre_offload_init()) { + pr_err("can't add protocol offload\n"); + goto err_gso; + } + + return 0; +err_gso: + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); +err_gre: + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +err: + return -EAGAIN; +} + +static void __exit gre_exit(void) +{ + gre_offload_exit(); + + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +} + +module_init(gre_init); +module_exit(gre_exit); + +MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c new file mode 100644 index 000000000000..a9d8cd2bff4b --- /dev/null +++ b/net/ipv4/gre_offload.c @@ -0,0 +1,127 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * GRE GSO support + */ + +#include +#include +#include + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + __be16 protocol = skb->protocol; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + skb->protocol = greh->protocol; + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + skb->protocol = protocol; + } while ((skb = skb->next)); +out: + return segs; +} + +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + +int __init gre_offload_init(void) +{ + return inet_add_offload(&gre_offload, IPPROTO_GRE); +} + +void __exit gre_offload_exit(void) +{ + inet_del_offload(&gre_offload, IPPROTO_GRE); +} -- cgit From 4bc41b84e9b4d904f68cba2dbe0c60a5428c27c4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 3 Jul 2013 16:04:25 +0900 Subject: core: Copy inner_protocol in copy_skb_header() inner_protocol was added to struct sk_buff in 0d89d2035fe063461a5ddb609b2c12e7fb006e44 ("MPLS: Add limited GSO support"), which is scheduled to be included in v3.11. That patch did not update __copy_skb_header to copy the inner_protocol. Signed-off-by: Joe Stringer Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 77971a35d6e1..724bb7cb173f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -697,6 +697,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; + new->inner_protocol = old->inner_protocol; new->inner_transport_header = old->inner_transport_header; new->inner_network_header = old->inner_network_header; new->inner_mac_header = old->inner_mac_header; -- cgit From 36b7bfe09b6deb71bf387852465245783c9a6208 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 Jul 2013 14:04:14 -0700 Subject: netem: fix possible NULL deref in netem_dequeue() commit aec0a40a6f7884 ("netem: use rb tree to implement the time queue") added a regression if a child qdisc is attached to netem, as we perform a NULL dereference. Fix this by adding a temporary variable to cache netem_skb_cb(skb)->time_to_send. Reported-by: Dan Carpenter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ed0082cf8eff..82f6016d89ab 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -554,10 +554,13 @@ deliver: } p = rb_first(&q->t_root); if (p) { + psched_time_t time_to_send; + skb = netem_rb_to_skb(p); /* if more time remaining? */ - if (netem_skb_cb(skb)->time_to_send <= psched_get_time()) { + time_to_send = netem_skb_cb(skb)->time_to_send; + if (time_to_send <= psched_get_time()) { rb_erase(p, &q->t_root); sch->q.qlen--; @@ -593,8 +596,7 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, - netem_skb_cb(skb)->time_to_send); + qdisc_watchdog_schedule(&q->watchdog, time_to_send); } if (q->qdisc) { -- cgit From a1bdc45580fc19e968b32ad27cd7e476a4aa58f6 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 4 Jul 2013 00:52:49 +0900 Subject: net: ipv6: add missing lock in ping_v6_sendmsg Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/ping.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 2b52046e1263..10b975577e91 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -174,6 +174,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); + lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, 0, hlimit, np->tclass, NULL, &fl6, rt, @@ -188,6 +189,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, (struct icmp6hdr *) &pfh.icmph, len); } + release_sock(sk); return err; } -- cgit From fbfe80c890a1dc521d0b629b870e32fcffff0da5 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 4 Jul 2013 00:12:40 +0900 Subject: net: ipv6: fix wrong ping_v6_sendmsg return value ping_v6_sendmsg currently returns 0 on success. It should return the number of bytes written instead. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/ping.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 10b975577e91..18f19df4189f 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -191,7 +191,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } release_sock(sk); - return err; + if (err) + return err; + + return len; } #ifdef CONFIG_PROC_FS -- cgit From 3630d40067a21d4dfbadc6002bb469ce26ac5d52 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 3 Jul 2013 20:45:04 +0200 Subject: ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available After the removal of rt->n we do not create a neighbour entry at route insertion time (rt6_bind_neighbour is gone). As long as no neighbour is created because of "useful traffic" we skip this routing entry because rt6_check_neigh cannot pick up a valid neighbour (neigh == NULL) and thus returns false. This change was introduced by commit 887c95cc1da53f66a5890fdeab13414613010097 ("ipv6: Complete neighbour entry removal from dst_entry.") To quote RFC4191: "If the host has no information about the router's reachability, then the host assumes the router is reachable." and also: "A host MUST NOT probe a router's reachability in the absence of useful traffic that the host would have sent to the router if it were reachable." So, just assume the router is reachable and let's rt6_probe do the rest. We don't need to create a neighbour on route insertion time. If we don't compile with CONFIG_IPV6_ROUTER_PREF (RFC4191 support) a neighbour is only valid if its nud_state is NUD_VALID. I did not find any references that we should probe the router on route insertion time via the other RFCs. So skip this route in that case. v2: a) use IS_ENABLED instead of #ifdefs (thanks to Sergei Shtylyov) Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9ff0b78a9c15..bd5fd7054031 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -551,6 +551,8 @@ static inline bool rt6_check_neigh(struct rt6_info *rt) ret = true; #endif read_unlock(&neigh->lock); + } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + ret = true; } rcu_read_unlock_bh(); -- cgit From 86bd68bfd75941d4cf3b874468791c3e73eef23d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 3 Jul 2013 17:00:34 +0200 Subject: sit: fix tunnel update via netlink The device can stand in another netns, hence we need to do the lookup in netns tunnel->net. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 85ff37b1ce02..a3437a4cd07e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1426,9 +1426,9 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; - struct net *net = dev_net(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; -- cgit From c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 5 Jul 2013 19:36:17 +0800 Subject: bridge: fix some kernel warning in multicast timer Several people reported the warning: "kernel BUG at kernel/timer.c:729!" and the stack trace is: #7 [ffff880214d25c10] mod_timer+501 at ffffffff8106d905 #8 [ffff880214d25c50] br_multicast_del_pg.isra.20+261 at ffffffffa0731d25 [bridge] #9 [ffff880214d25c80] br_multicast_disable_port+88 at ffffffffa0732948 [bridge] #10 [ffff880214d25cb0] br_stp_disable_port+154 at ffffffffa072bcca [bridge] #11 [ffff880214d25ce8] br_device_event+520 at ffffffffa072a4e8 [bridge] #12 [ffff880214d25d18] notifier_call_chain+76 at ffffffff8164aafc #13 [ffff880214d25d50] raw_notifier_call_chain+22 at ffffffff810858f6 #14 [ffff880214d25d60] call_netdevice_notifiers+45 at ffffffff81536aad #15 [ffff880214d25d80] dev_close_many+183 at ffffffff81536d17 #16 [ffff880214d25dc0] rollback_registered_many+168 at ffffffff81537f68 #17 [ffff880214d25de8] rollback_registered+49 at ffffffff81538101 #18 [ffff880214d25e10] unregister_netdevice_queue+72 at ffffffff815390d8 #19 [ffff880214d25e30] __tun_detach+272 at ffffffffa074c2f0 [tun] #20 [ffff880214d25e88] tun_chr_close+45 at ffffffffa074c4bd [tun] #21 [ffff880214d25ea8] __fput+225 at ffffffff8119b1f1 #22 [ffff880214d25ef0] ____fput+14 at ffffffff8119b3fe #23 [ffff880214d25f00] task_work_run+159 at ffffffff8107cf7f #24 [ffff880214d25f30] do_notify_resume+97 at ffffffff810139e1 #25 [ffff880214d25f50] int_signal+18 at ffffffff8164f292 this is due to I forgot to check if mp->timer is armed in br_multicast_del_pg(). This bug is introduced by commit 9f00b2e7cf241fa389733d41b6 (bridge: only expire the mdb entry when query is received). Same for __br_mdb_del(). Tested-by: poma Reported-by: LiYonghua <809674045@qq.com> Reported-by: Robert Hancock Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 +- net/bridge/br_multicast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 19942e38fd2d..0daae3ec2355 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -447,7 +447,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 81befac015e1..69af490cce44 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -270,7 +270,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); -- cgit From cbf55001b2ddb814329735641be5d29b08c82b08 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 8 Jul 2013 16:20:34 +0300 Subject: net: rename low latency sockets functions to busy poll Rename functions in include/net/ll_poll.h to busy wait. Clarify documentation about expected power use increase. Rename POLL_LL to POLL_BUSY_LOOP. Add need_resched() testing to poll/select busy loops. Note, that in select and poll can_busy_poll is dynamic and is updated continuously to reflect the existence of supported sockets with valid queue information. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/datagram.c | 3 ++- net/ipv4/tcp.c | 6 +++--- net/socket.c | 12 ++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 9cbaba98ce4c..6e9ab31e457e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -208,7 +208,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); - if (sk_valid_ll(sk) && sk_poll_ll(sk, flags & MSG_DONTWAIT)) + if (sk_can_busy_loop(sk) && + sk_busy_loop(sk, flags & MSG_DONTWAIT)) continue; /* User doesn't want to wait */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46ed9afd1f5e..15cbfa94bd8e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1554,9 +1554,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; - if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue) - && (sk->sk_state == TCP_ESTABLISHED)) - sk_poll_ll(sk, nonblock); + if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + (sk->sk_state == TCP_ESTABLISHED)) + sk_busy_loop(sk, nonblock); lock_sock(sk); diff --git a/net/socket.c b/net/socket.c index 4da14cbd49b6..45afa648364a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1148,7 +1148,7 @@ EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) { - unsigned int ll_flag = 0; + unsigned int busy_flag = 0; struct socket *sock; /* @@ -1156,16 +1156,16 @@ static unsigned int sock_poll(struct file *file, poll_table *wait) */ sock = file->private_data; - if (sk_valid_ll(sock->sk)) { + if (sk_can_busy_loop(sock->sk)) { /* this socket can poll_ll so tell the system call */ - ll_flag = POLL_LL; + busy_flag = POLL_BUSY_LOOP; /* once, only if requested by syscall */ - if (wait && (wait->_key & POLL_LL)) - sk_poll_ll(sock->sk, 1); + if (wait && (wait->_key & POLL_BUSY_LOOP)) + sk_busy_loop(sock->sk, 1); } - return ll_flag | sock->ops->poll(file, sock, wait); + return busy_flag | sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) -- cgit From 8c2f414ad1b3aa3af05791cd7312eb8ff9d80e0d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Jul 2013 16:17:04 +0200 Subject: net: sctp: confirm route during forward progress This fix has been proposed originally by Vlad Yasevich. He says: When SCTP makes forward progress (receives a SACK that acks new chunks, renegs, or answeres 0-window probes) or when HB-ACK arrives, mark the route as confirmed so we don't unnecessarily send NUD probes. Having a simple SCTP client/server that exchange data chunks every 1sec, without this patch ARP requests are sent periodically every 40-60sec. With this fix applied, an ARP request is only done once right at the "session" beginning. Also, when clearing the related ARP cache entry manually during the session, a new request is correctly done. I have only "backported" this to net-next and tested that it works, so full credit goes to Vlad. Signed-off-by: Vlad Yasevich Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 9 +++++++++ net/sctp/sm_sideeffect.c | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index cb80a8e060b7..ef9e2bbc0f2f 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1334,6 +1334,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, __u8 restart_timer = 0; int bytes_acked = 0; int migrate_bytes = 0; + bool forward_progress = false; sack_ctsn = ntohl(sack->cum_tsn_ack); @@ -1400,6 +1401,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); + forward_progress = true; } if (TSN_lte(tsn, sack_ctsn)) { @@ -1413,6 +1415,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * current RTO. */ restart_timer = 1; + forward_progress = true; if (!tchunk->tsn_gap_acked) { /* @@ -1503,6 +1506,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ transport->error_count = 0; transport->asoc->overall_error_count = 0; + forward_progress = true; /* * While in SHUTDOWN PENDING, we may have started @@ -1576,6 +1580,11 @@ static void sctp_check_transmitted(struct sctp_outq *q, jiffies + transport->rto)) sctp_transport_hold(transport); } + + if (forward_progress) { + if (transport->dst) + dst_confirm(transport->dst); + } } list_splice(&tlist, transmitted_queue); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index cf6f84518222..9da68852ee94 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -730,6 +730,12 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); + /* HB-ACK was received for a the proper HB. Consider this + * forward progress. + */ + if (t->dst) + dst_confirm(t->dst); + /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. -- cgit