diff options
Diffstat (limited to 'net')
251 files changed, 12561 insertions, 5833 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index ba5983f34c42..a2caf00b82cc 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -196,12 +196,12 @@ static inline u32 vlan_get_ingress_priority(struct net_device *dev, } #ifdef CONFIG_VLAN_8021Q_GVRP -extern int vlan_gvrp_request_join(const struct net_device *dev); -extern void vlan_gvrp_request_leave(const struct net_device *dev); -extern int vlan_gvrp_init_applicant(struct net_device *dev); -extern void vlan_gvrp_uninit_applicant(struct net_device *dev); -extern int vlan_gvrp_init(void); -extern void vlan_gvrp_uninit(void); +int vlan_gvrp_request_join(const struct net_device *dev); +void vlan_gvrp_request_leave(const struct net_device *dev); +int vlan_gvrp_init_applicant(struct net_device *dev); +void vlan_gvrp_uninit_applicant(struct net_device *dev); +int vlan_gvrp_init(void); +void vlan_gvrp_uninit(void); #else static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; } static inline void vlan_gvrp_request_leave(const struct net_device *dev) {} @@ -212,12 +212,12 @@ static inline void vlan_gvrp_uninit(void) {} #endif #ifdef CONFIG_VLAN_8021Q_MVRP -extern int vlan_mvrp_request_join(const struct net_device *dev); -extern void vlan_mvrp_request_leave(const struct net_device *dev); -extern int vlan_mvrp_init_applicant(struct net_device *dev); -extern void vlan_mvrp_uninit_applicant(struct net_device *dev); -extern int vlan_mvrp_init(void); -extern void vlan_mvrp_uninit(void); +int vlan_mvrp_request_join(const struct net_device *dev); +void vlan_mvrp_request_leave(const struct net_device *dev); +int vlan_mvrp_init_applicant(struct net_device *dev); +void vlan_mvrp_uninit_applicant(struct net_device *dev); +int vlan_mvrp_init(void); +void vlan_mvrp_uninit(void); #else static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} @@ -229,8 +229,8 @@ static inline void vlan_mvrp_uninit(void) {} extern const char vlan_fullname[]; extern const char vlan_version[]; -extern int vlan_netlink_init(void); -extern void vlan_netlink_fini(void); +int vlan_netlink_init(void); +void vlan_netlink_fini(void); extern struct rtnl_link_ops vlan_link_ops; diff --git a/net/Kconfig b/net/Kconfig index b50dacc072f0..0715db64a5c3 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -220,6 +220,7 @@ source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" source "net/mpls/Kconfig" +source "net/hsr/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index 9492e8cb64e9..8fa2f91517f1 100644 --- a/net/Makefile +++ b/net/Makefile @@ -71,3 +71,4 @@ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ obj-$(CONFIG_NET_MPLS_GSO) += mpls/ +obj-$(CONFIG_HSR) += hsr/ diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 4b4d2b779ec1..a00123ebb0ae 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1735,7 +1735,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) res = -EFAULT; break; } - if (amount > AX25_NOUID_BLOCK) { + if (amount < 0 || amount > AX25_NOUID_BLOCK) { res = -EINVAL; break; } diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 97b42d3c4bef..a2b480a90872 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -87,22 +87,198 @@ static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) return (uint8_t)(sum / count); } +/** + * batadv_iv_ogm_orig_free - free the private resources allocated for this + * orig_node + * @orig_node: the orig_node for which the resources have to be free'd + */ +static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) +{ + kfree(orig_node->bat_iv.bcast_own); + kfree(orig_node->bat_iv.bcast_own_sum); +} + +/** + * batadv_iv_ogm_orig_add_if - change the private structures of the orig_node to + * include the new hard-interface + * @orig_node: the orig_node that has to be changed + * @max_if_num: the current amount of interfaces + * + * Returns 0 on success, a negative error code otherwise. + */ +static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, + int max_if_num) +{ + void *data_ptr; + size_t data_size, old_size; + int ret = -ENOMEM; + + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + + data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS; + old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS; + data_ptr = kmalloc(data_size, GFP_ATOMIC); + if (!data_ptr) + goto unlock; + + memcpy(data_ptr, orig_node->bat_iv.bcast_own, old_size); + kfree(orig_node->bat_iv.bcast_own); + orig_node->bat_iv.bcast_own = data_ptr; + + data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); + if (!data_ptr) { + kfree(orig_node->bat_iv.bcast_own); + goto unlock; + } + + memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, + (max_if_num - 1) * sizeof(uint8_t)); + kfree(orig_node->bat_iv.bcast_own_sum); + orig_node->bat_iv.bcast_own_sum = data_ptr; + + ret = 0; + +unlock: + spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); + + return ret; +} + +/** + * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to + * exclude the removed interface + * @orig_node: the orig_node that has to be changed + * @max_if_num: the current amount of interfaces + * @del_if_num: the index of the interface being removed + * + * Returns 0 on success, a negative error code otherwise. + */ +static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, + int max_if_num, int del_if_num) +{ + int chunk_size, ret = -ENOMEM, if_offset; + void *data_ptr = NULL; + + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + + /* last interface was removed */ + if (max_if_num == 0) + goto free_bcast_own; + + chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS; + data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); + if (!data_ptr) + goto unlock; + + /* copy first part */ + memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size); + + /* copy second part */ + memcpy((char *)data_ptr + del_if_num * chunk_size, + orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size), + (max_if_num - del_if_num) * chunk_size); + +free_bcast_own: + kfree(orig_node->bat_iv.bcast_own); + orig_node->bat_iv.bcast_own = data_ptr; + + if (max_if_num == 0) + goto free_own_sum; + + data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); + if (!data_ptr) { + kfree(orig_node->bat_iv.bcast_own); + goto unlock; + } + + memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, + del_if_num * sizeof(uint8_t)); + + if_offset = (del_if_num + 1) * sizeof(uint8_t); + memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t), + orig_node->bat_iv.bcast_own_sum + if_offset, + (max_if_num - del_if_num) * sizeof(uint8_t)); + +free_own_sum: + kfree(orig_node->bat_iv.bcast_own_sum); + orig_node->bat_iv.bcast_own_sum = data_ptr; + + ret = 0; +unlock: + spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); + + return ret; +} + +/** + * batadv_iv_ogm_orig_get - retrieve or create (if does not exist) an originator + * @bat_priv: the bat priv with all the soft interface information + * @addr: mac address of the originator + * + * Returns the originator object corresponding to the passed mac address or NULL + * on failure. + * If the object does not exists it is created an initialised. + */ +static struct batadv_orig_node * +batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr) +{ + struct batadv_orig_node *orig_node; + int size, hash_added; + + orig_node = batadv_orig_hash_find(bat_priv, addr); + if (orig_node) + return orig_node; + + orig_node = batadv_orig_node_new(bat_priv, addr); + if (!orig_node) + return NULL; + + spin_lock_init(&orig_node->bat_iv.ogm_cnt_lock); + + size = bat_priv->num_ifaces * sizeof(unsigned long) * BATADV_NUM_WORDS; + orig_node->bat_iv.bcast_own = kzalloc(size, GFP_ATOMIC); + if (!orig_node->bat_iv.bcast_own) + goto free_orig_node; + + size = bat_priv->num_ifaces * sizeof(uint8_t); + orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC); + if (!orig_node->bat_iv.bcast_own_sum) + goto free_bcast_own; + + hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, + batadv_choose_orig, orig_node, + &orig_node->hash_entry); + if (hash_added != 0) + goto free_bcast_own; + + return orig_node; + +free_bcast_own: + kfree(orig_node->bat_iv.bcast_own); +free_orig_node: + batadv_orig_node_free_ref(orig_node); + + return NULL; +} + static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh) { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr); + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node); if (!neigh_node) goto out; - INIT_LIST_HEAD(&neigh_node->bonding_list); + spin_lock_init(&neigh_node->bat_iv.lq_update_lock); - neigh_node->orig_node = orig_neigh; - neigh_node->if_incoming = hard_iface; + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Creating new neighbor %pM for orig_node %pM on interface %s\n", + neigh_addr, orig_node->orig, hard_iface->net_dev->name); spin_lock_bh(&orig_node->neigh_list_lock); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); @@ -661,20 +837,22 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) uint32_t i; size_t word_index; uint8_t *w; + int if_num; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bcast_own[word_index]); + word = &(orig_node->bat_iv.bcast_own[word_index]); batadv_bit_get_packet(bat_priv, word, 1, 0); - w = &orig_node->bcast_own_sum[hard_iface->if_num]; + if_num = hard_iface->if_num; + w = &orig_node->bat_iv.bcast_own_sum[if_num]; *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_node->ogm_cnt_lock); + spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); } rcu_read_unlock(); } @@ -755,18 +933,18 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (dup_status != BATADV_NO_DUP) continue; - spin_lock_bh(&tmp_neigh_node->lq_update_lock); - batadv_ring_buffer_set(tmp_neigh_node->tq_recv, - &tmp_neigh_node->tq_index, 0); - tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->tq_recv); - tmp_neigh_node->tq_avg = tq_avg; - spin_unlock_bh(&tmp_neigh_node->lq_update_lock); + spin_lock_bh(&tmp_neigh_node->bat_iv.lq_update_lock); + batadv_ring_buffer_set(tmp_neigh_node->bat_iv.tq_recv, + &tmp_neigh_node->bat_iv.tq_index, 0); + tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->bat_iv.tq_recv); + tmp_neigh_node->bat_iv.tq_avg = tq_avg; + spin_unlock_bh(&tmp_neigh_node->bat_iv.lq_update_lock); } if (!neigh_node) { struct batadv_orig_node *orig_tmp; - orig_tmp = batadv_get_orig_node(bat_priv, ethhdr->h_source); + orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source); if (!orig_tmp) goto unlock; @@ -785,19 +963,20 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_node->last_seen = jiffies; - spin_lock_bh(&neigh_node->lq_update_lock); - batadv_ring_buffer_set(neigh_node->tq_recv, - &neigh_node->tq_index, + spin_lock_bh(&neigh_node->bat_iv.lq_update_lock); + batadv_ring_buffer_set(neigh_node->bat_iv.tq_recv, + &neigh_node->bat_iv.tq_index, batadv_ogm_packet->tq); - neigh_node->tq_avg = batadv_ring_buffer_avg(neigh_node->tq_recv); - spin_unlock_bh(&neigh_node->lq_update_lock); + tq_avg = batadv_ring_buffer_avg(neigh_node->bat_iv.tq_recv); + neigh_node->bat_iv.tq_avg = tq_avg; + spin_unlock_bh(&neigh_node->bat_iv.lq_update_lock); if (dup_status == BATADV_NO_DUP) { orig_node->last_ttl = batadv_ogm_packet->header.ttl; neigh_node->last_ttl = batadv_ogm_packet->header.ttl; } - batadv_bonding_candidate_add(orig_node, neigh_node); + batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node); /* if this neighbor already is our next hop there is nothing * to change @@ -807,24 +986,24 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, goto out; /* if this neighbor does not offer a better TQ we won't consider it */ - if (router && (router->tq_avg > neigh_node->tq_avg)) + if (router && (router->bat_iv.tq_avg > neigh_node->bat_iv.tq_avg)) goto out; /* if the TQ is the same and the link not more symmetric we * won't consider it either */ - if (router && (neigh_node->tq_avg == router->tq_avg)) { + if (router && (neigh_node->bat_iv.tq_avg == router->bat_iv.tq_avg)) { orig_node_tmp = router->orig_node; - spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock); if_num = router->if_incoming->if_num; - sum_orig = orig_node_tmp->bcast_own_sum[if_num]; - spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + sum_orig = orig_node_tmp->bat_iv.bcast_own_sum[if_num]; + spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock); orig_node_tmp = neigh_node->orig_node; - spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock); if_num = neigh_node->if_incoming->if_num; - sum_neigh = orig_node_tmp->bcast_own_sum[if_num]; - spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + sum_neigh = orig_node_tmp->bat_iv.bcast_own_sum[if_num]; + spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock); if (sum_orig >= sum_neigh) goto out; @@ -852,7 +1031,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int tq_asym_penalty, inv_asym_penalty, ret = 0; + int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0; unsigned int combined_tq; /* find corresponding one hop neighbor */ @@ -890,10 +1069,11 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, orig_node->last_seen = jiffies; /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->ogm_cnt_lock); - orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num]; - neigh_rq_count = neigh_node->real_packet_count; - spin_unlock_bh(&orig_node->ogm_cnt_lock); + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + if_num = if_incoming->if_num; + orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; + neigh_rq_count = neigh_node->bat_iv.real_packet_count; + spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ if (orig_eq_count > neigh_rq_count) @@ -975,12 +1155,13 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, uint32_t seqno = ntohl(batadv_ogm_packet->seqno); uint8_t *neigh_addr; uint8_t packet_count; + unsigned long *bitmap; - orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig); + orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig); if (!orig_node) return BATADV_NO_DUP; - spin_lock_bh(&orig_node->ogm_cnt_lock); + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); seq_diff = seqno - orig_node->last_real_seqno; /* signalize caller that the packet is to be dropped. */ @@ -995,7 +1176,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { neigh_addr = tmp_neigh_node->addr; - is_dup = batadv_test_bit(tmp_neigh_node->real_bits, + is_dup = batadv_test_bit(tmp_neigh_node->bat_iv.real_bits, orig_node->last_real_seqno, seqno); @@ -1011,13 +1192,13 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, } /* if the window moved, set the update flag. */ - need_update |= batadv_bit_get_packet(bat_priv, - tmp_neigh_node->real_bits, + bitmap = tmp_neigh_node->bat_iv.real_bits; + need_update |= batadv_bit_get_packet(bat_priv, bitmap, seq_diff, set_mark); - packet_count = bitmap_weight(tmp_neigh_node->real_bits, + packet_count = bitmap_weight(tmp_neigh_node->bat_iv.real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - tmp_neigh_node->real_packet_count = packet_count; + tmp_neigh_node->bat_iv.real_packet_count = packet_count; } rcu_read_unlock(); @@ -1029,7 +1210,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, } out: - spin_unlock_bh(&orig_node->ogm_cnt_lock); + spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); batadv_orig_node_free_ref(orig_node); return ret; } @@ -1041,7 +1222,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_hard_iface *hard_iface; - struct batadv_orig_node *orig_neigh_node, *orig_node; + struct batadv_orig_node *orig_neigh_node, *orig_node, *orig_node_tmp; struct batadv_neigh_node *router = NULL, *router_router = NULL; struct batadv_neigh_node *orig_neigh_router = NULL; int has_directlink_flag; @@ -1125,8 +1306,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, int16_t if_num; uint8_t *weight; - orig_neigh_node = batadv_get_orig_node(bat_priv, - ethhdr->h_source); + orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, + ethhdr->h_source); if (!orig_neigh_node) return; @@ -1140,15 +1321,15 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if_num = if_incoming->if_num; offset = if_num * BATADV_NUM_WORDS; - spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); - word = &(orig_neigh_node->bcast_own[offset]); + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); + word = &(orig_neigh_node->bat_iv.bcast_own[offset]); bit_pos = if_incoming_seqno - 2; bit_pos -= ntohl(batadv_ogm_packet->seqno); batadv_set_bit(word, bit_pos); - weight = &orig_neigh_node->bcast_own_sum[if_num]; + weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num]; *weight = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); } batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -1171,7 +1352,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; } - orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig); + orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig); if (!orig_node) return; @@ -1192,10 +1373,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, } router = batadv_orig_node_get_router(orig_node); - if (router) - router_router = batadv_orig_node_get_router(router->orig_node); + if (router) { + orig_node_tmp = router->orig_node; + router_router = batadv_orig_node_get_router(orig_node_tmp); + } - if ((router && router->tq_avg != 0) && + if ((router && router->bat_iv.tq_avg != 0) && (batadv_compare_eth(router->addr, ethhdr->h_source))) is_from_best_next_hop = true; @@ -1219,8 +1402,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (is_single_hop_neigh) orig_neigh_node = orig_node; else - orig_neigh_node = batadv_get_orig_node(bat_priv, - ethhdr->h_source); + orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, + ethhdr->h_source); if (!orig_neigh_node) goto out; @@ -1351,6 +1534,106 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, return NET_RX_SUCCESS; } +/** + * batadv_iv_ogm_orig_print - print the originator table + * @bat_priv: the bat priv with all the soft interface information + * @seq: debugfs table seq_file struct + */ +static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct batadv_neigh_node *neigh_node, *neigh_node_tmp; + struct batadv_hashtable *hash = bat_priv->orig_hash; + int last_seen_msecs, last_seen_secs; + struct batadv_orig_node *orig_node; + unsigned long last_seen_jiffies; + struct hlist_head *head; + int batman_count = 0; + uint32_t i; + + seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", + "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE, + "Nexthop", "outgoingIF", "Potential nexthops"); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + neigh_node = batadv_orig_node_get_router(orig_node); + if (!neigh_node) + continue; + + if (neigh_node->bat_iv.tq_avg == 0) + goto next; + + last_seen_jiffies = jiffies - orig_node->last_seen; + last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); + last_seen_secs = last_seen_msecs / 1000; + last_seen_msecs = last_seen_msecs % 1000; + + seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:", + orig_node->orig, last_seen_secs, + last_seen_msecs, neigh_node->bat_iv.tq_avg, + neigh_node->addr, + neigh_node->if_incoming->net_dev->name); + + hlist_for_each_entry_rcu(neigh_node_tmp, + &orig_node->neigh_list, list) { + seq_printf(seq, " %pM (%3i)", + neigh_node_tmp->addr, + neigh_node_tmp->bat_iv.tq_avg); + } + + seq_puts(seq, "\n"); + batman_count++; + +next: + batadv_neigh_node_free_ref(neigh_node); + } + rcu_read_unlock(); + } + + if (batman_count == 0) + seq_puts(seq, "No batman nodes in range ...\n"); +} + +/** + * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors + * @neigh1: the first neighbor object of the comparison + * @neigh2: the second neighbor object of the comparison + * + * Returns a value less, equal to or greater than 0 if the metric via neigh1 is + * lower, the same as or higher than the metric via neigh2 + */ +static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, + struct batadv_neigh_node *neigh2) +{ + uint8_t tq1, tq2; + + tq1 = neigh1->bat_iv.tq_avg; + tq2 = neigh2->bat_iv.tq_avg; + + return tq1 - tq2; +} + +/** + * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than + * neigh2 from the metric prospective + * @neigh1: the first neighbor object of the comparison + * @neigh2: the second neighbor object of the comparison + * + * Returns true if the metric via neigh1 is equally good or better than the + * metric via neigh2, false otherwise. + */ +static bool batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, + struct batadv_neigh_node *neigh2) +{ + int diff = batadv_iv_ogm_neigh_cmp(neigh1, neigh2); + + return diff > -BATADV_TQ_SIMILARITY_THRESHOLD; +} + static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .bat_iface_enable = batadv_iv_ogm_iface_enable, @@ -1359,6 +1642,12 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .bat_primary_iface_set = batadv_iv_ogm_primary_iface_set, .bat_ogm_schedule = batadv_iv_ogm_schedule, .bat_ogm_emit = batadv_iv_ogm_emit, + .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp, + .bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob, + .bat_orig_print = batadv_iv_ogm_orig_print, + .bat_orig_free = batadv_iv_ogm_orig_free, + .bat_orig_add_if = batadv_iv_ogm_orig_add_if, + .bat_orig_del_if = batadv_iv_ogm_orig_del_if, }; int __init batadv_iv_init(void) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 5bb58d7bdd56..28eb5e6d0a02 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -411,10 +411,10 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, return NULL; } - /* this is a gateway now, remove any tt entries */ + /* this is a gateway now, remove any TT entry on this VLAN */ orig_node = batadv_orig_hash_find(bat_priv, orig); if (orig_node) { - batadv_tt_global_del_orig(bat_priv, orig_node, + batadv_tt_global_del_orig(bat_priv, orig_node, vid, "became a backbone gateway"); batadv_orig_node_free_ref(orig_node); } @@ -858,27 +858,25 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct sk_buff *skb) { - struct ethhdr *ethhdr; + struct batadv_bla_claim_dst *bla_dst; + uint8_t *hw_src, *hw_dst; struct vlan_ethhdr *vhdr; + struct ethhdr *ethhdr; struct arphdr *arphdr; - uint8_t *hw_src, *hw_dst; - struct batadv_bla_claim_dst *bla_dst; + unsigned short vid; __be16 proto; int headlen; - unsigned short vid = BATADV_NO_FLAGS; int ret; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); - if (ethhdr->h_proto == htons(ETH_P_8021Q)) { + proto = ethhdr->h_proto; + headlen = ETH_HLEN; + if (vid & BATADV_VLAN_HAS_TAG) { vhdr = (struct vlan_ethhdr *)ethhdr; - vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; - vid |= BATADV_VLAN_HAS_TAG; proto = vhdr->h_vlan_encapsulated_proto; - headlen = sizeof(*vhdr); - } else { - proto = ethhdr->h_proto; - headlen = ETH_HLEN; + headlen += VLAN_HLEN; } if (proto != htons(ETH_P_ARP)) @@ -1317,12 +1315,14 @@ out: /* @bat_priv: the bat priv with all the soft interface information * @orig: originator mac address + * @vid: VLAN identifier * - * check if the originator is a gateway for any VLAN ID. + * Check if the originator is a gateway for the VLAN identified by vid. * - * returns 1 if it is found, 0 otherwise + * Returns true if orig is a backbone for this vid, false otherwise. */ -int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) +bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig, + unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; @@ -1330,25 +1330,26 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) int i; if (!atomic_read(&bat_priv->bridge_loop_avoidance)) - return 0; + return false; if (!hash) - return 0; + return false; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { - if (batadv_compare_eth(backbone_gw->orig, orig)) { + if (batadv_compare_eth(backbone_gw->orig, orig) && + backbone_gw->vid == vid) { rcu_read_unlock(); - return 1; + return true; } } rcu_read_unlock(); } - return 0; + return false; } @@ -1365,10 +1366,8 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size) { - struct ethhdr *ethhdr; - struct vlan_ethhdr *vhdr; struct batadv_bla_backbone_gw *backbone_gw; - unsigned short vid = BATADV_NO_FLAGS; + unsigned short vid; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) return 0; @@ -1377,16 +1376,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, if (!pskb_may_pull(skb, hdr_size + ETH_HLEN)) return 0; - ethhdr = (struct ethhdr *)(((uint8_t *)skb->data) + hdr_size); - - if (ethhdr->h_proto == htons(ETH_P_8021Q)) { - if (!pskb_may_pull(skb, hdr_size + VLAN_ETH_HLEN)) - return 0; - - vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); - vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; - vid |= BATADV_VLAN_HAS_TAG; - } + vid = batadv_get_vid(skb, hdr_size); /* see if this originator is a backbone gw for this VLAN */ backbone_gw = batadv_backbone_hash_find(orig_node->bat_priv, diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 4b102e71e5bd..da173e760e77 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -30,7 +30,8 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset); -int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig); +bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig, + unsigned short vid); int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct sk_buff *skb); void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, @@ -74,10 +75,11 @@ static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, return 0; } -static inline int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, - uint8_t *orig) +static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, + uint8_t *orig, + unsigned short vid) { - return 0; + return false; } static inline int diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 99da41290f82..6c8c3934bd7b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -19,6 +19,7 @@ #include <linux/if_ether.h> #include <linux/if_arp.h> +#include <linux/if_vlan.h> #include <net/arp.h> #include "main.h" @@ -205,15 +206,11 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) */ static uint32_t batadv_hash_dat(const void *data, uint32_t size) { - const unsigned char *key = data; uint32_t hash = 0; - size_t i; + const struct batadv_dat_entry *dat = data; - for (i = 0; i < 4; i++) { - hash += key[i]; - hash += (hash << 10); - hash ^= (hash >> 6); - } + hash = batadv_hash_bytes(hash, &dat->ip, sizeof(dat->ip)); + hash = batadv_hash_bytes(hash, &dat->vid, sizeof(dat->vid)); hash += (hash << 3); hash ^= (hash >> 11); @@ -227,21 +224,26 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size) * table * @bat_priv: the bat priv with all the soft interface information * @ip: search key + * @vid: VLAN identifier * * Returns the dat_entry if found, NULL otherwise. */ static struct batadv_dat_entry * -batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) +batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, + unsigned short vid) { struct hlist_head *head; - struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL; + struct batadv_dat_entry to_find, *dat_entry, *dat_entry_tmp = NULL; struct batadv_hashtable *hash = bat_priv->dat.hash; uint32_t index; if (!hash) return NULL; - index = batadv_hash_dat(&ip, hash->size); + to_find.ip = ip; + to_find.vid = vid; + + index = batadv_hash_dat(&to_find, hash->size); head = &hash->table[index]; rcu_read_lock(); @@ -265,22 +267,24 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) * @bat_priv: the bat priv with all the soft interface information * @ip: ipv4 to add/edit * @mac_addr: mac address to assign to the given ipv4 + * @vid: VLAN identifier */ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, - uint8_t *mac_addr) + uint8_t *mac_addr, unsigned short vid) { struct batadv_dat_entry *dat_entry; int hash_added; - dat_entry = batadv_dat_entry_hash_find(bat_priv, ip); + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip, vid); /* if this entry is already known, just update it */ if (dat_entry) { if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr)) memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); dat_entry->last_update = jiffies; batadv_dbg(BATADV_DBG_DAT, bat_priv, - "Entry updated: %pI4 %pM\n", &dat_entry->ip, - dat_entry->mac_addr); + "Entry updated: %pI4 %pM (vid: %d)\n", + &dat_entry->ip, dat_entry->mac_addr, + BATADV_PRINT_VID(vid)); goto out; } @@ -289,12 +293,13 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, goto out; dat_entry->ip = ip; + dat_entry->vid = vid; memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); dat_entry->last_update = jiffies; atomic_set(&dat_entry->refcount, 2); hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, - batadv_hash_dat, &dat_entry->ip, + batadv_hash_dat, dat_entry, &dat_entry->hash_entry); if (unlikely(hash_added != 0)) { @@ -303,8 +308,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, goto out; } - batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM\n", - &dat_entry->ip, dat_entry->mac_addr); + batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n", + &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid)); out: if (dat_entry) @@ -756,8 +761,8 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) goto out; seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name); - seq_printf(seq, " %-7s %-13s %5s\n", "IPv4", "MAC", - "last-seen"); + seq_printf(seq, " %-7s %-9s %4s %11s\n", "IPv4", + "MAC", "VID", "last-seen"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -770,8 +775,9 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) last_seen_msecs = last_seen_msecs % 60000; last_seen_secs = last_seen_msecs / 1000; - seq_printf(seq, " * %15pI4 %14pM %6i:%02i\n", + seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n", &dat_entry->ip, dat_entry->mac_addr, + BATADV_PRINT_VID(dat_entry->vid), last_seen_mins, last_seen_secs); } rcu_read_unlock(); @@ -858,6 +864,31 @@ out: } /** + * batadv_dat_get_vid - extract the VLAN identifier from skb if any + * @skb: the buffer containing the packet to extract the VID from + * @hdr_size: the size of the batman-adv header encapsulating the packet + * + * If the packet embedded in the skb is vlan tagged this function returns the + * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. + */ +static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) +{ + unsigned short vid; + + vid = batadv_get_vid(skb, *hdr_size); + + /* ARP parsing functions jump forward of hdr_size + ETH_HLEN. + * If the header contained in the packet is a VLAN one (which is longer) + * hdr_size is updated so that the functions will still skip the + * correct amount of bytes. + */ + if (vid & BATADV_VLAN_HAS_TAG) + *hdr_size += VLAN_HLEN; + + return vid; +} + +/** * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to * answer using DAT * @bat_priv: the bat priv with all the soft interface information @@ -876,26 +907,31 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; + int hdr_size = 0; + unsigned short vid; if (!atomic_read(&bat_priv->distributed_arp_table)) goto out; - type = batadv_arp_get_type(bat_priv, skb, 0); + vid = batadv_dat_get_vid(skb, &hdr_size); + + type = batadv_arp_get_type(bat_priv, skb, hdr_size); /* If the node gets an ARP_REQUEST it has to send a DHT_GET unicast * message to the selected DHT candidates */ if (type != ARPOP_REQUEST) goto out; - batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REQUEST"); + batadv_dbg_arp(bat_priv, skb, type, hdr_size, + "Parsing outgoing ARP REQUEST"); - ip_src = batadv_arp_ip_src(skb, 0); - hw_src = batadv_arp_hw_src(skb, 0); - ip_dst = batadv_arp_ip_dst(skb, 0); + ip_src = batadv_arp_ip_src(skb, hdr_size); + hw_src = batadv_arp_hw_src(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); - batadv_dat_entry_add(bat_priv, ip_src, hw_src); + batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); - dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst, vid); if (dat_entry) { /* If the ARP request is destined for a local client the local * client will answer itself. DAT would only generate a @@ -905,7 +941,8 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, * additional DAT answer may trigger kernel warnings about * a packet coming from the wrong port. */ - if (batadv_is_my_client(bat_priv, dat_entry->mac_addr)) { + if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, + BATADV_NO_FLAGS)) { ret = true; goto out; } @@ -916,11 +953,15 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; + if (vid & BATADV_VLAN_HAS_TAG) + skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); + skb_reset_mac_header(skb_new); skb_new->protocol = eth_type_trans(skb_new, bat_priv->soft_iface); bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; + bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size; bat_priv->soft_iface->last_rx = jiffies; netif_rx(skb_new); @@ -955,11 +996,14 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb_new; struct batadv_dat_entry *dat_entry = NULL; bool ret = false; + unsigned short vid; int err; if (!atomic_read(&bat_priv->distributed_arp_table)) goto out; + vid = batadv_dat_get_vid(skb, &hdr_size); + type = batadv_arp_get_type(bat_priv, skb, hdr_size); if (type != ARPOP_REQUEST) goto out; @@ -971,9 +1015,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, batadv_dbg_arp(bat_priv, skb, type, hdr_size, "Parsing incoming ARP REQUEST"); - batadv_dat_entry_add(bat_priv, ip_src, hw_src); + batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); - dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst, vid); if (!dat_entry) goto out; @@ -984,17 +1028,22 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; + if (vid & BATADV_VLAN_HAS_TAG) + skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); + /* To preserve backwards compatibility, the node has choose the outgoing * format based on the incoming request packet type. The assumption is * that a node not using the 4addr packet format doesn't support it. */ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) - err = batadv_send_skb_unicast_4addr(bat_priv, skb_new, - BATADV_P_DAT_CACHE_REPLY); + err = batadv_send_skb_via_tt_4addr(bat_priv, skb_new, + BATADV_P_DAT_CACHE_REPLY, + vid); else - err = batadv_send_skb_unicast(bat_priv, skb_new); + err = batadv_send_skb_via_tt(bat_priv, skb_new, vid); - if (!err) { + if (err != NET_XMIT_DROP) { batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX); ret = true; } @@ -1017,23 +1066,28 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, uint16_t type; __be32 ip_src, ip_dst; uint8_t *hw_src, *hw_dst; + int hdr_size = 0; + unsigned short vid; if (!atomic_read(&bat_priv->distributed_arp_table)) return; - type = batadv_arp_get_type(bat_priv, skb, 0); + vid = batadv_dat_get_vid(skb, &hdr_size); + + type = batadv_arp_get_type(bat_priv, skb, hdr_size); if (type != ARPOP_REPLY) return; - batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REPLY"); + batadv_dbg_arp(bat_priv, skb, type, hdr_size, + "Parsing outgoing ARP REPLY"); - hw_src = batadv_arp_hw_src(skb, 0); - ip_src = batadv_arp_ip_src(skb, 0); - hw_dst = batadv_arp_hw_dst(skb, 0); - ip_dst = batadv_arp_ip_dst(skb, 0); + hw_src = batadv_arp_hw_src(skb, hdr_size); + ip_src = batadv_arp_ip_src(skb, hdr_size); + hw_dst = batadv_arp_hw_dst(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); - batadv_dat_entry_add(bat_priv, ip_src, hw_src); - batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); + batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); + batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply @@ -1055,10 +1109,13 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, __be32 ip_src, ip_dst; uint8_t *hw_src, *hw_dst; bool ret = false; + unsigned short vid; if (!atomic_read(&bat_priv->distributed_arp_table)) goto out; + vid = batadv_dat_get_vid(skb, &hdr_size); + type = batadv_arp_get_type(bat_priv, skb, hdr_size); if (type != ARPOP_REPLY) goto out; @@ -1074,13 +1131,13 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, /* Update our internal cache with both the IP addresses the node got * within the ARP reply */ - batadv_dat_entry_add(bat_priv, ip_src, hw_src); - batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); + batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); + batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); /* if this REPLY is directed to a client of mine, let's deliver the * packet to the interface */ - ret = !batadv_is_my_client(bat_priv, hw_dst); + ret = !batadv_is_my_client(bat_priv, hw_dst, vid); out: if (ret) kfree_skb(skb); @@ -1103,7 +1160,8 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, __be32 ip_dst; struct batadv_dat_entry *dat_entry = NULL; bool ret = false; - const size_t bcast_len = sizeof(struct batadv_bcast_packet); + int hdr_size = sizeof(struct batadv_bcast_packet); + unsigned short vid; if (!atomic_read(&bat_priv->distributed_arp_table)) goto out; @@ -1114,12 +1172,14 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, if (forw_packet->num_packets) goto out; - type = batadv_arp_get_type(bat_priv, forw_packet->skb, bcast_len); + vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size); + + type = batadv_arp_get_type(bat_priv, forw_packet->skb, hdr_size); if (type != ARPOP_REQUEST) goto out; - ip_dst = batadv_arp_ip_dst(forw_packet->skb, bcast_len); - dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + ip_dst = batadv_arp_ip_dst(forw_packet->skb, hdr_size); + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst, vid); /* check if the node already got this entry */ if (!dat_entry) { batadv_dbg(BATADV_DBG_DAT, bat_priv, diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 053bb318c7a7..2449afaa7638 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -137,7 +137,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (!atomic_inc_not_zero(&gw_node->refcount)) goto next; - tq_avg = router->tq_avg; + tq_avg = router->bat_iv.tq_avg; switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ @@ -221,11 +221,6 @@ void batadv_gw_election(struct batadv_priv *bat_priv) struct batadv_neigh_node *router = NULL; char gw_addr[18] = { '\0' }; - /* The batman daemon checks here if we already passed a full originator - * cycle in order to make sure we don't choose the first gateway we - * hear about. This check is based on the daemon's uptime which we - * don't have. - */ if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) goto out; @@ -261,7 +256,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv) next_gw->bandwidth_down / 10, next_gw->bandwidth_down % 10, next_gw->bandwidth_up / 10, - next_gw->bandwidth_up % 10, router->tq_avg); + next_gw->bandwidth_up % 10, router->bat_iv.tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_ADD, gw_addr); } else { @@ -271,7 +266,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv) next_gw->bandwidth_down / 10, next_gw->bandwidth_down % 10, next_gw->bandwidth_up / 10, - next_gw->bandwidth_up % 10, router->tq_avg); + next_gw->bandwidth_up % 10, router->bat_iv.tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_CHANGE, gw_addr); } @@ -310,8 +305,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, if (!router_orig) goto out; - gw_tq_avg = router_gw->tq_avg; - orig_tq_avg = router_orig->tq_avg; + gw_tq_avg = router_gw->bat_iv.tq_avg; + orig_tq_avg = router_orig->bat_iv.tq_avg; /* the TQ value has to be better */ if (orig_tq_avg < gw_tq_avg) @@ -533,7 +528,7 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv, ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", (curr_gw == gw_node ? "=>" : " "), gw_node->orig_node->orig, - router->tq_avg, router->addr, + router->bat_iv.tq_avg, router->addr, router->if_incoming->net_dev->name, gw_node->bandwidth_down / 10, gw_node->bandwidth_down % 10, @@ -726,7 +721,20 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) return true; } -/* this call might reallocate skb data */ +/** + * batadv_gw_out_of_range - check if the dhcp request destination is the best gw + * @bat_priv: the bat priv with all the soft interface information + * @skb: the outgoing packet + * + * Check if the skb is a DHCP request and if it is sent to the current best GW + * server. Due to topology changes it may be the case that the GW server + * previously selected is not the best one anymore. + * + * Returns true if the packet destination is unicast and it is not the best gw, + * false otherwise. + * + * This call might reallocate skb data. + */ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb) { @@ -737,6 +745,9 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, bool ret, out_of_range = false; unsigned int header_len = 0; uint8_t curr_tq_avg; + unsigned short vid; + + vid = batadv_get_vid(skb, 0); ret = batadv_gw_is_dhcp_target(skb, &header_len); if (!ret) @@ -744,7 +755,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, ethhdr = (struct ethhdr *)skb->data; orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source, - ethhdr->h_dest); + ethhdr->h_dest, vid); if (!orig_dst_node) goto out; @@ -781,7 +792,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!neigh_curr) goto out; - curr_tq_avg = neigh_curr->tq_avg; + curr_tq_avg = neigh_curr->bat_iv.tq_avg; break; case BATADV_GW_MODE_OFF: default: @@ -792,7 +803,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!neigh_old) goto out; - if (curr_tq_avg - neigh_old->tq_avg > BATADV_GW_THRESHOLD) + if (curr_tq_avg - neigh_old->bat_iv.tq_avg > BATADV_GW_THRESHOLD) out_of_range = true; out: diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index d564af295db4..57c2a19dcb5c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -28,6 +28,7 @@ #include "originator.h" #include "hash.h" #include "bridge_loop_avoidance.h" +#include "gateway_client.h" #include <linux/if_arp.h> #include <linux/if_ether.h> @@ -124,8 +125,11 @@ static int batadv_is_valid_iface(const struct net_device *net_dev) * * Returns true if the net device is a 802.11 wireless device, false otherwise. */ -static bool batadv_is_wifi_netdev(struct net_device *net_device) +bool batadv_is_wifi_netdev(struct net_device *net_device) { + if (!net_device) + return false; + #ifdef CONFIG_WIRELESS_EXT /* pre-cfg80211 drivers have to implement WEXT, so it is possible to * check for wireless_handlers != NULL @@ -141,34 +145,6 @@ static bool batadv_is_wifi_netdev(struct net_device *net_device) return false; } -/** - * batadv_is_wifi_iface - check if the given interface represented by ifindex - * is a wifi interface - * @ifindex: interface index to check - * - * Returns true if the interface represented by ifindex is a 802.11 wireless - * device, false otherwise. - */ -bool batadv_is_wifi_iface(int ifindex) -{ - struct net_device *net_device = NULL; - bool ret = false; - - if (ifindex == BATADV_NULL_IFINDEX) - goto out; - - net_device = dev_get_by_index(&init_net, ifindex); - if (!net_device) - goto out; - - ret = batadv_is_wifi_netdev(net_device); - -out: - if (net_device) - dev_put(net_device); - return ret; -} - static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { @@ -266,16 +242,9 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev) int batadv_hardif_min_mtu(struct net_device *soft_iface) { - const struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(soft_iface); const struct batadv_hard_iface *hard_iface; - /* allow big frames if all devices are capable to do so - * (have MTU > 1500 + batadv_max_header_len()) - */ int min_mtu = ETH_DATA_LEN; - int max_header_len = batadv_max_header_len(); - - if (atomic_read(&bat_priv->fragmentation)) - goto out; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { @@ -286,22 +255,40 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) if (hard_iface->soft_iface != soft_iface) continue; - min_mtu = min_t(int, hard_iface->net_dev->mtu - max_header_len, - min_mtu); + min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu); } rcu_read_unlock(); + + atomic_set(&bat_priv->packet_size_max, min_mtu); + + if (atomic_read(&bat_priv->fragmentation) == 0) + goto out; + + /* with fragmentation enabled the maximum size of internally generated + * packets such as translation table exchanges or tvlv containers, etc + * has to be calculated + */ + min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE); + min_mtu -= sizeof(struct batadv_frag_packet); + min_mtu *= BATADV_FRAG_MAX_FRAGMENTS; + atomic_set(&bat_priv->packet_size_max, min_mtu); + + /* with fragmentation enabled we can fragment external packets easily */ + min_mtu = min_t(int, min_mtu, ETH_DATA_LEN); + out: - return min_mtu; + return min_mtu - batadv_max_header_len(); } /* adjusts the MTU if a new interface with a smaller MTU appeared. */ void batadv_update_min_mtu(struct net_device *soft_iface) { - int min_mtu; + soft_iface->mtu = batadv_hardif_min_mtu(soft_iface); - min_mtu = batadv_hardif_min_mtu(soft_iface); - if (soft_iface->mtu != min_mtu) - soft_iface->mtu = min_mtu; + /* Check if the local translate table should be cleaned up to match a + * new (and smaller) MTU. + */ + batadv_tt_local_resize_to_mtu(soft_iface); } static void @@ -524,8 +511,12 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, dev_put(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO) - batadv_softif_destroy_sysfs(hard_iface->soft_iface); + if (!bat_priv->num_ifaces) { + batadv_gw_check_client_stop(bat_priv); + + if (autodel == BATADV_IF_CLEANUP_AUTO) + batadv_softif_destroy_sysfs(hard_iface->soft_iface); + } netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface); hard_iface->soft_iface = NULL; @@ -643,6 +634,8 @@ static int batadv_hard_if_event(struct notifier_block *this, if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { batadv_sysfs_add_meshif(net_dev); + bat_priv = netdev_priv(net_dev); + batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); return NOTIFY_DONE; } diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 49892881a7c5..df4c8bd45c40 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -41,6 +41,7 @@ enum batadv_hard_if_cleanup { extern struct notifier_block batadv_hard_if_notifier; +bool batadv_is_wifi_netdev(struct net_device *net_device); struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, @@ -51,7 +52,6 @@ void batadv_hardif_remove_interfaces(void); int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); void batadv_hardif_free_rcu(struct rcu_head *rcu); -bool batadv_is_wifi_iface(int ifindex); static inline void batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 82ac6472fa6f..29ae4efe3543 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -29,7 +29,7 @@ static struct batadv_socket_client *batadv_socket_client_hash[256]; static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, - struct batadv_icmp_packet_rr *icmp_packet, + struct batadv_icmp_header *icmph, size_t icmp_len); void batadv_socket_init(void) @@ -155,13 +155,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, struct batadv_priv *bat_priv = socket_client->bat_priv; struct batadv_hard_iface *primary_if = NULL; struct sk_buff *skb; - struct batadv_icmp_packet_rr *icmp_packet; - + struct batadv_icmp_packet_rr *icmp_packet_rr; + struct batadv_icmp_header *icmp_header; struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; size_t packet_len = sizeof(struct batadv_icmp_packet); - if (len < sizeof(struct batadv_icmp_packet)) { + if (len < sizeof(struct batadv_icmp_header)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: invalid packet size\n"); return -EINVAL; @@ -174,8 +174,10 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto out; } - if (len >= sizeof(struct batadv_icmp_packet_rr)) - packet_len = sizeof(struct batadv_icmp_packet_rr); + if (len >= BATADV_ICMP_MAX_PACKET_SIZE) + packet_len = BATADV_ICMP_MAX_PACKET_SIZE; + else + packet_len = len; skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN); if (!skb) { @@ -185,67 +187,78 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, ETH_HLEN); - icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); + icmp_header = (struct batadv_icmp_header *)skb_put(skb, packet_len); - if (copy_from_user(icmp_packet, buff, packet_len)) { + if (copy_from_user(icmp_header, buff, packet_len)) { len = -EFAULT; goto free_skb; } - if (icmp_packet->icmph.header.packet_type != BATADV_ICMP) { + if (icmp_header->header.packet_type != BATADV_ICMP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); len = -EINVAL; goto free_skb; } - if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { + switch (icmp_header->msg_type) { + case BATADV_ECHO_REQUEST: + if (len < sizeof(struct batadv_icmp_packet)) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Error - can't send packet from char device: invalid packet size\n"); + len = -EINVAL; + goto free_skb; + } + + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + goto dst_unreach; + + orig_node = batadv_orig_hash_find(bat_priv, icmp_header->dst); + if (!orig_node) + goto dst_unreach; + + neigh_node = batadv_orig_node_get_router(orig_node); + if (!neigh_node) + goto dst_unreach; + + if (!neigh_node->if_incoming) + goto dst_unreach; + + if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE) + goto dst_unreach; + + icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmp_header; + if (packet_len == sizeof(*icmp_packet_rr)) + memcpy(icmp_packet_rr->rr, + neigh_node->if_incoming->net_dev->dev_addr, + ETH_ALEN); + + break; + default: batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Error - can't send packet from char device: got bogus message type (expected: ECHO_REQUEST)\n"); + "Error - can't send packet from char device: got unknown message type\n"); len = -EINVAL; goto free_skb; } - icmp_packet->icmph.uid = socket_client->index; + icmp_header->uid = socket_client->index; - if (icmp_packet->icmph.header.version != BATADV_COMPAT_VERSION) { - icmp_packet->icmph.msg_type = BATADV_PARAMETER_PROBLEM; - icmp_packet->icmph.header.version = BATADV_COMPAT_VERSION; - batadv_socket_add_packet(socket_client, icmp_packet, + if (icmp_header->header.version != BATADV_COMPAT_VERSION) { + icmp_header->msg_type = BATADV_PARAMETER_PROBLEM; + icmp_header->header.version = BATADV_COMPAT_VERSION; + batadv_socket_add_packet(socket_client, icmp_header, packet_len); goto free_skb; } - if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) - goto dst_unreach; - - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst); - if (!orig_node) - goto dst_unreach; - - neigh_node = batadv_orig_node_get_router(orig_node); - if (!neigh_node) - goto dst_unreach; - - if (!neigh_node->if_incoming) - goto dst_unreach; - - if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE) - goto dst_unreach; - - memcpy(icmp_packet->icmph.orig, - primary_if->net_dev->dev_addr, ETH_ALEN); - - if (packet_len == sizeof(struct batadv_icmp_packet_rr)) - memcpy(icmp_packet->rr, - neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN); + memcpy(icmp_header->orig, primary_if->net_dev->dev_addr, ETH_ALEN); batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); goto out; dst_unreach: - icmp_packet->icmph.msg_type = BATADV_DESTINATION_UNREACHABLE; - batadv_socket_add_packet(socket_client, icmp_packet, packet_len); + icmp_header->msg_type = BATADV_DESTINATION_UNREACHABLE; + batadv_socket_add_packet(socket_client, icmp_header, packet_len); free_skb: kfree_skb(skb); out: @@ -298,27 +311,40 @@ err: return -ENOMEM; } +/** + * batadv_socket_receive_packet - schedule an icmp packet to be sent to userspace + * on an icmp socket. + * @socket_client: the socket this packet belongs to + * @icmph: pointer to the header of the icmp packet + * @icmp_len: total length of the icmp packet + */ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, - struct batadv_icmp_packet_rr *icmp_packet, + struct batadv_icmp_header *icmph, size_t icmp_len) { struct batadv_socket_packet *socket_packet; + size_t len; socket_packet = kmalloc(sizeof(*socket_packet), GFP_ATOMIC); if (!socket_packet) return; + len = icmp_len; + /* check the maximum length before filling the buffer */ + if (len > sizeof(socket_packet->icmp_packet)) + len = sizeof(socket_packet->icmp_packet); + INIT_LIST_HEAD(&socket_packet->list); - memcpy(&socket_packet->icmp_packet, icmp_packet, icmp_len); - socket_packet->icmp_len = icmp_len; + memcpy(&socket_packet->icmp_packet, icmph, len); + socket_packet->icmp_len = len; spin_lock_bh(&socket_client->lock); /* while waiting for the lock the socket_client could have been * deleted */ - if (!batadv_socket_client_hash[icmp_packet->icmph.uid]) { + if (!batadv_socket_client_hash[icmph->uid]) { spin_unlock_bh(&socket_client->lock); kfree(socket_packet); return; @@ -342,12 +368,18 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, wake_up(&socket_client->queue_wait); } -void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet, +/** + * batadv_socket_receive_packet - schedule an icmp packet to be received + * locally and sent to userspace. + * @icmph: pointer to the header of the icmp packet + * @icmp_len: total length of the icmp packet + */ +void batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len) { struct batadv_socket_client *hash; - hash = batadv_socket_client_hash[icmp_packet->icmph.uid]; + hash = batadv_socket_client_hash[icmph->uid]; if (hash) - batadv_socket_add_packet(hash, icmp_packet, icmp_len); + batadv_socket_add_packet(hash, icmph, icmp_len); } diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 1fcca37b6223..6665080dff79 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -24,7 +24,7 @@ void batadv_socket_init(void); int batadv_socket_setup(struct batadv_priv *bat_priv); -void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet, +void batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len); #endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 7f3a5c426615..c51a5e568f0a 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -110,9 +110,11 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->tt.req_list_lock); spin_lock_init(&bat_priv->tt.roam_list_lock); spin_lock_init(&bat_priv->tt.last_changeset_lock); + spin_lock_init(&bat_priv->tt.commit_lock); spin_lock_init(&bat_priv->gw.list_lock); spin_lock_init(&bat_priv->tvlv.container_list_lock); spin_lock_init(&bat_priv->tvlv.handler_list_lock); + spin_lock_init(&bat_priv->softif_vlan_list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); @@ -122,6 +124,7 @@ int batadv_mesh_init(struct net_device *soft_iface) INIT_LIST_HEAD(&bat_priv->tt.roam_list); INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); + INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); ret = batadv_originator_init(bat_priv); if (ret < 0) @@ -131,9 +134,6 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - batadv_tt_local_add(soft_iface, soft_iface->dev_addr, - BATADV_NULL_IFINDEX); - ret = batadv_bla_init(bat_priv); if (ret < 0) goto err; @@ -501,7 +501,9 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) !bat_algo_ops->bat_iface_update_mac || !bat_algo_ops->bat_primary_iface_set || !bat_algo_ops->bat_ogm_schedule || - !bat_algo_ops->bat_ogm_emit) { + !bat_algo_ops->bat_ogm_emit || + !bat_algo_ops->bat_neigh_cmp || + !bat_algo_ops->bat_neigh_is_equiv_or_better) { pr_info("Routing algo '%s' does not implement required ops\n", bat_algo_ops->name); ret = -EINVAL; @@ -1144,6 +1146,33 @@ out: batadv_orig_node_free_ref(orig_node); } +/** + * batadv_get_vid - extract the VLAN identifier from skb if any + * @skb: the buffer containing the packet + * @header_len: length of the batman header preceding the ethernet header + * + * If the packet embedded in the skb is vlan tagged this function returns the + * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. + */ +unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) +{ + struct ethhdr *ethhdr = (struct ethhdr *)(skb->data + header_len); + struct vlan_ethhdr *vhdr; + unsigned short vid; + + if (ethhdr->h_proto != htons(ETH_P_8021Q)) + return BATADV_NO_FLAGS; + + if (!pskb_may_pull(skb, header_len + VLAN_ETH_HLEN)) + return BATADV_NO_FLAGS; + + vhdr = (struct vlan_ethhdr *)(skb->data + header_len); + vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; + + return vid; +} + static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { struct batadv_algo_ops *bat_algo_ops; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 54c13d51edbe..f94f287b8670 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -20,13 +20,13 @@ #ifndef _NET_BATMAN_ADV_MAIN_H_ #define _NET_BATMAN_ADV_MAIN_H_ -#define BATADV_DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \ - "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>" +#define BATADV_DRIVER_AUTHOR "Marek Lindner <mareklindner@neomailbox.ch>, " \ + "Simon Wunderlich <sw@simonwunderlich.de>" #define BATADV_DRIVER_DESC "B.A.T.M.A.N. advanced" #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.4.0" +#define BATADV_SOURCE_VERSION "2013.5.0" #endif /* B.A.T.M.A.N. parameters */ @@ -86,6 +86,12 @@ /* numbers of originator to contact for any PUT/GET DHT operation */ #define BATADV_DAT_CANDIDATES_NUM 3 +/** + * BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ + * at most from the primary one in order to be still considered acceptable + */ +#define BATADV_TQ_SIMILARITY_THRESHOLD 50 + /* how much worse secondary interfaces may be to be considered as bonding * candidates */ @@ -167,15 +173,9 @@ enum batadv_uev_type { #include <net/rtnetlink.h> #include <linux/jiffies.h> #include <linux/seq_file.h> -#include "types.h" +#include <linux/if_vlan.h> -/** - * batadv_vlan_flags - flags for the four MSB of any vlan ID field - * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not - */ -enum batadv_vlan_flags { - BATADV_VLAN_HAS_TAG = BIT(15), -}; +#include "types.h" #define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \ (int)(vid & VLAN_VID_MASK) : -1) @@ -368,5 +368,6 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, uint8_t *dst, uint8_t type, uint8_t version, void *tvlv_value, uint16_t tvlv_value_len); +unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len); #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 23f611bedb0f..351e199bc0af 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1003,7 +1003,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, struct batadv_nc_packet *nc_packet, struct batadv_neigh_node *neigh_node) { - uint8_t tq_weighted_neigh, tq_weighted_coding; + uint8_t tq_weighted_neigh, tq_weighted_coding, tq_tmp; struct sk_buff *skb_dest, *skb_src; struct batadv_unicast_packet *packet1; struct batadv_unicast_packet *packet2; @@ -1028,8 +1028,10 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, if (!router_coding) goto out; - tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg); - tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg); + tq_tmp = batadv_nc_random_weight_tq(router_neigh->bat_iv.tq_avg); + tq_weighted_neigh = tq_tmp; + tq_tmp = batadv_nc_random_weight_tq(router_coding->bat_iv.tq_avg); + tq_weighted_coding = tq_tmp; /* Select one destination for the MAC-header dst-field based on * weighted TQ-values. diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a591dc5c321e..8ab14340d10f 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -36,7 +36,7 @@ static struct lock_class_key batadv_orig_hash_lock_class_key; static void batadv_purge_orig(struct work_struct *work); /* returns 1 if they are the same originator */ -static int batadv_compare_orig(const struct hlist_node *node, const void *data2) +int batadv_compare_orig(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_orig_node, hash_entry); @@ -44,6 +44,88 @@ static int batadv_compare_orig(const struct hlist_node *node, const void *data2) return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } +/** + * batadv_orig_node_vlan_get - get an orig_node_vlan object + * @orig_node: the originator serving the VLAN + * @vid: the VLAN identifier + * + * Returns the vlan object identified by vid and belonging to orig_node or NULL + * if it does not exist. + */ +struct batadv_orig_node_vlan * +batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, + unsigned short vid) +{ + struct batadv_orig_node_vlan *vlan = NULL, *tmp; + + rcu_read_lock(); + list_for_each_entry_rcu(tmp, &orig_node->vlan_list, list) { + if (tmp->vid != vid) + continue; + + if (!atomic_inc_not_zero(&tmp->refcount)) + continue; + + vlan = tmp; + + break; + } + rcu_read_unlock(); + + return vlan; +} + +/** + * batadv_orig_node_vlan_new - search and possibly create an orig_node_vlan + * object + * @orig_node: the originator serving the VLAN + * @vid: the VLAN identifier + * + * Returns NULL in case of failure or the vlan object identified by vid and + * belonging to orig_node otherwise. The object is created and added to the list + * if it does not exist. + * + * The object is returned with refcounter increased by 1. + */ +struct batadv_orig_node_vlan * +batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, + unsigned short vid) +{ + struct batadv_orig_node_vlan *vlan; + + spin_lock_bh(&orig_node->vlan_list_lock); + + /* first look if an object for this vid already exists */ + vlan = batadv_orig_node_vlan_get(orig_node, vid); + if (vlan) + goto out; + + vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); + if (!vlan) + goto out; + + atomic_set(&vlan->refcount, 2); + vlan->vid = vid; + + list_add_rcu(&vlan->list, &orig_node->vlan_list); + +out: + spin_unlock_bh(&orig_node->vlan_list_lock); + + return vlan; +} + +/** + * batadv_orig_node_vlan_free_ref - decrement the refcounter and possibly free + * the originator-vlan object + * @orig_vlan: the originator-vlan object to release + */ +void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan) +{ + if (atomic_dec_and_test(&orig_vlan->refcount)) + kfree_rcu(orig_vlan, rcu); +} + int batadv_originator_init(struct batadv_priv *bat_priv) { if (bat_priv->orig_hash) @@ -90,11 +172,20 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) return router; } +/** + * batadv_neigh_node_new - create and init a new neigh_node object + * @hard_iface: the interface where the neighbour is connected to + * @neigh_addr: the mac address of the neighbour interface + * @orig_node: originator object representing the neighbour + * + * Allocates a new neigh_node object and initialises all the generic fields. + * Returns the new object or NULL on failure. + */ struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr) + const uint8_t *neigh_addr, + struct batadv_orig_node *orig_node) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_neigh_node *neigh_node; neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); @@ -104,15 +195,14 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, INIT_HLIST_NODE(&neigh_node->list); memcpy(neigh_node->addr, neigh_addr, ETH_ALEN); - spin_lock_init(&neigh_node->lq_update_lock); + neigh_node->if_incoming = hard_iface; + neigh_node->orig_node = orig_node; + + INIT_LIST_HEAD(&neigh_node->bonding_list); /* extra reference for return */ atomic_set(&neigh_node->refcount, 2); - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM on interface %s\n", neigh_addr, - hard_iface->net_dev->name); - out: return neigh_node; } @@ -148,12 +238,13 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) batadv_frag_purge_orig(orig_node, NULL); - batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, + batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, -1, "originator timed out"); + if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) + orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); + kfree(orig_node->tt_buff); - kfree(orig_node->bcast_own); - kfree(orig_node->bcast_own_sum); kfree(orig_node); } @@ -211,20 +302,22 @@ void batadv_originator_free(struct batadv_priv *bat_priv) batadv_hash_destroy(hash); } -/* this function finds or creates an originator entry for the given - * address if it does not exits +/** + * batadv_orig_node_new - creates a new orig_node + * @bat_priv: the bat priv with all the soft interface information + * @addr: the mac address of the originator + * + * Creates a new originator object and initialise all the generic fields. + * The new object is not added to the originator list. + * Returns the newly created object or NULL on failure. */ -struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, +struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const uint8_t *addr) { struct batadv_orig_node *orig_node; - int size, i; - int hash_added; + struct batadv_orig_node_vlan *vlan; unsigned long reset_time; - - orig_node = batadv_orig_hash_find(bat_priv, addr); - if (orig_node) - return orig_node; + int i; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Creating new originator: %pM\n", addr); @@ -235,10 +328,12 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, INIT_HLIST_HEAD(&orig_node->neigh_list); INIT_LIST_HEAD(&orig_node->bond_list); - spin_lock_init(&orig_node->ogm_cnt_lock); + INIT_LIST_HEAD(&orig_node->vlan_list); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); spin_lock_init(&orig_node->tt_buff_lock); + spin_lock_init(&orig_node->tt_lock); + spin_lock_init(&orig_node->vlan_list_lock); batadv_nc_init_orig(orig_node); @@ -250,25 +345,24 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, memcpy(orig_node->orig, addr, ETH_ALEN); batadv_dat_init_orig_node_addr(orig_node); orig_node->router = NULL; - orig_node->tt_crc = 0; atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; - atomic_set(&orig_node->tt_size, 0); reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; orig_node->batman_seqno_reset = reset_time; atomic_set(&orig_node->bond_candidates, 0); - size = bat_priv->num_ifaces * sizeof(unsigned long) * BATADV_NUM_WORDS; - - orig_node->bcast_own = kzalloc(size, GFP_ATOMIC); - if (!orig_node->bcast_own) + /* create a vlan object for the "untagged" LAN */ + vlan = batadv_orig_node_vlan_new(orig_node, BATADV_NO_FLAGS); + if (!vlan) goto free_orig_node; - - size = bat_priv->num_ifaces * sizeof(uint8_t); - orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC); + /* batadv_orig_node_vlan_new() increases the refcounter. + * Immediately release vlan since it is not needed anymore in this + * context + */ + batadv_orig_node_vlan_free_ref(vlan); for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { INIT_HLIST_HEAD(&orig_node->fragments[i].head); @@ -276,20 +370,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, orig_node->fragments[i].size = 0; } - if (!orig_node->bcast_own_sum) - goto free_bcast_own; - - hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, - batadv_choose_orig, orig_node, - &orig_node->hash_entry); - if (hash_added != 0) - goto free_bcast_own_sum; - return orig_node; -free_bcast_own_sum: - kfree(orig_node->bcast_own_sum); -free_bcast_own: - kfree(orig_node->bcast_own); free_orig_node: kfree(orig_node); return NULL; @@ -298,15 +379,16 @@ free_orig_node: static bool batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - struct batadv_neigh_node **best_neigh_node) + struct batadv_neigh_node **best_neigh) { + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; bool neigh_purged = false; unsigned long last_seen; struct batadv_hard_iface *if_incoming; - *best_neigh_node = NULL; + *best_neigh = NULL; spin_lock_bh(&orig_node->neigh_list_lock); @@ -339,9 +421,12 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, batadv_bonding_candidate_del(orig_node, neigh_node); batadv_neigh_node_free_ref(neigh_node); } else { - if ((!*best_neigh_node) || - (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) - *best_neigh_node = neigh_node; + /* store the best_neighbour if this is the first + * iteration or if a better neighbor has been found + */ + if (!*best_neigh || + bao->bat_neigh_cmp(neigh_node, *best_neigh) > 0) + *best_neigh = neigh_node; } } @@ -430,100 +515,26 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; struct batadv_hard_iface *primary_if; - struct batadv_orig_node *orig_node; - struct batadv_neigh_node *neigh_node, *neigh_node_tmp; - int batman_count = 0; - int last_seen_secs; - int last_seen_msecs; - unsigned long last_seen_jiffies; - uint32_t i; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) - goto out; + return 0; - seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", + seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, - primary_if->net_dev->dev_addr, net_dev->name); - seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", - "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE, - "Nexthop", "outgoingIF", "Potential nexthops"); + primary_if->net_dev->dev_addr, net_dev->name, + bat_priv->bat_algo_ops->name); - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; + batadv_hardif_free_ref(primary_if); - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - neigh_node = batadv_orig_node_get_router(orig_node); - if (!neigh_node) - continue; - - if (neigh_node->tq_avg == 0) - goto next; - - last_seen_jiffies = jiffies - orig_node->last_seen; - last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); - last_seen_secs = last_seen_msecs / 1000; - last_seen_msecs = last_seen_msecs % 1000; - - seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:", - orig_node->orig, last_seen_secs, - last_seen_msecs, neigh_node->tq_avg, - neigh_node->addr, - neigh_node->if_incoming->net_dev->name); - - hlist_for_each_entry_rcu(neigh_node_tmp, - &orig_node->neigh_list, list) { - seq_printf(seq, " %pM (%3i)", - neigh_node_tmp->addr, - neigh_node_tmp->tq_avg); - } - - seq_puts(seq, "\n"); - batman_count++; - -next: - batadv_neigh_node_free_ref(neigh_node); - } - rcu_read_unlock(); + if (!bat_priv->bat_algo_ops->bat_orig_print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + return 0; } - if (batman_count == 0) - seq_puts(seq, "No batman nodes in range ...\n"); - -out: - if (primary_if) - batadv_hardif_free_ref(primary_if); - return 0; -} - -static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node, - int max_if_num) -{ - void *data_ptr; - size_t data_size, old_size; - - data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS; - old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS; - data_ptr = kmalloc(data_size, GFP_ATOMIC); - if (!data_ptr) - return -ENOMEM; - - memcpy(data_ptr, orig_node->bcast_own, old_size); - kfree(orig_node->bcast_own); - orig_node->bcast_own = data_ptr; - - data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); - if (!data_ptr) - return -ENOMEM; - - memcpy(data_ptr, orig_node->bcast_own_sum, - (max_if_num - 1) * sizeof(uint8_t)); - kfree(orig_node->bcast_own_sum); - orig_node->bcast_own_sum = data_ptr; + bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq); return 0; } @@ -532,6 +543,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; @@ -546,10 +558,10 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); - ret = batadv_orig_node_add_if(orig_node, max_if_num); - spin_unlock_bh(&orig_node->ogm_cnt_lock); - + ret = 0; + if (bao->bat_orig_add_if) + ret = bao->bat_orig_add_if(orig_node, + max_if_num); if (ret == -ENOMEM) goto err; } @@ -563,54 +575,6 @@ err: return -ENOMEM; } -static int batadv_orig_node_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) -{ - void *data_ptr = NULL; - int chunk_size; - - /* last interface was removed */ - if (max_if_num == 0) - goto free_bcast_own; - - chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS; - data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); - if (!data_ptr) - return -ENOMEM; - - /* copy first part */ - memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); - - /* copy second part */ - memcpy((char *)data_ptr + del_if_num * chunk_size, - orig_node->bcast_own + ((del_if_num + 1) * chunk_size), - (max_if_num - del_if_num) * chunk_size); - -free_bcast_own: - kfree(orig_node->bcast_own); - orig_node->bcast_own = data_ptr; - - if (max_if_num == 0) - goto free_own_sum; - - data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); - if (!data_ptr) - return -ENOMEM; - - memcpy(data_ptr, orig_node->bcast_own_sum, - del_if_num * sizeof(uint8_t)); - - memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t), - orig_node->bcast_own_sum + ((del_if_num + 1) * sizeof(uint8_t)), - (max_if_num - del_if_num) * sizeof(uint8_t)); - -free_own_sum: - kfree(orig_node->bcast_own_sum); - orig_node->bcast_own_sum = data_ptr; - - return 0; -} - int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, int max_if_num) { @@ -619,6 +583,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, struct hlist_head *head; struct batadv_hard_iface *hard_iface_tmp; struct batadv_orig_node *orig_node; + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; uint32_t i; int ret; @@ -630,11 +595,11 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); - ret = batadv_orig_node_del_if(orig_node, max_if_num, - hard_iface->if_num); - spin_unlock_bh(&orig_node->ogm_cnt_lock); - + ret = 0; + if (bao->bat_orig_del_if) + ret = bao->bat_orig_del_if(orig_node, + max_if_num, + hard_iface->if_num); if (ret == -ENOMEM) goto err; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 7887b84a9af4..6f77d808a916 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -22,16 +22,18 @@ #include "hash.h" +int batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); -struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, +struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr); + const uint8_t *neigh_addr, + struct batadv_orig_node *orig_node); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_node_get_router(struct batadv_orig_node *orig_node); @@ -40,6 +42,13 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, int max_if_num); +struct batadv_orig_node_vlan * +batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, + unsigned short vid); +struct batadv_orig_node_vlan * +batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, + unsigned short vid); +void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan); /* hashfunction to choose an entry in a hash table of given size diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 65e723ed030b..207459b62966 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -110,18 +110,27 @@ enum batadv_tt_data_flags { /* BATADV_TT_CLIENT flags. * Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to - * BIT(15) are used for local computation only + * BIT(15) are used for local computation only. + * Flags from BIT(4) to BIT(7) are kept in sync with the rest of the network. */ enum batadv_tt_client_flags { BATADV_TT_CLIENT_DEL = BIT(0), BATADV_TT_CLIENT_ROAM = BIT(1), - BATADV_TT_CLIENT_WIFI = BIT(2), + BATADV_TT_CLIENT_WIFI = BIT(4), BATADV_TT_CLIENT_NOPURGE = BIT(8), BATADV_TT_CLIENT_NEW = BIT(9), BATADV_TT_CLIENT_PENDING = BIT(10), BATADV_TT_CLIENT_TEMP = BIT(11), }; +/** + * batadv_vlan_flags - flags for the four MSB of any vlan ID field + * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not + */ +enum batadv_vlan_flags { + BATADV_VLAN_HAS_TAG = BIT(15), +}; + /* claim frame types for the bridge loop avoidance */ enum batadv_bla_claimframe { BATADV_CLAIM_TYPE_CLAIM = 0x00, @@ -230,6 +239,8 @@ struct batadv_icmp_packet_rr { uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; }; +#define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr) + /* All packet headers in front of an ethernet header have to be completely * divisible by 2 but not by 4 to make the payload after the ethernet * header again 4 bytes boundary aligned. @@ -383,14 +394,26 @@ struct batadv_tvlv_gateway_data { * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container * @flags: translation table flags (see batadv_tt_data_flags) * @ttvn: translation table version number - * @reserved: field reserved for future use - * @crc: crc32 checksum of the local translation table + * @vlan_num: number of announced VLANs. In the TVLV this struct is followed by + * one batadv_tvlv_tt_vlan_data object per announced vlan */ struct batadv_tvlv_tt_data { uint8_t flags; uint8_t ttvn; + __be16 num_vlan; +}; + +/** + * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through + * the tt tvlv container + * @crc: crc32 checksum of the entries belonging to this vlan + * @vid: vlan identifier + * @reserved: unused, useful for alignment purposes + */ +struct batadv_tvlv_tt_vlan_data { + __be32 crc; + __be16 vid; uint16_t reserved; - __be32 crc; }; /** @@ -399,21 +422,23 @@ struct batadv_tvlv_tt_data { * batadv_tt_client_flags) * @reserved: reserved field * @addr: mac address of non-mesh client that triggered this tt change + * @vid: VLAN identifier */ struct batadv_tvlv_tt_change { uint8_t flags; uint8_t reserved; uint8_t addr[ETH_ALEN]; + __be16 vid; }; /** * struct batadv_tvlv_roam_adv - roaming advertisement * @client: mac address of roaming client - * @reserved: field reserved for future use + * @vid: VLAN identifier */ struct batadv_tvlv_roam_adv { uint8_t client[ETH_ALEN]; - uint16_t reserved; + __be16 vid; }; #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 3281a504c20a..d4114d775ad6 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -30,6 +30,8 @@ #include "network-coding.h" #include "fragmentation.h" +#include <linux/if_vlan.h> + static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -45,7 +47,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, if ((curr_router) && (!neigh_node)) { batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", orig_node->orig); - batadv_tt_global_del_orig(bat_priv, orig_node, + batadv_tt_global_del_orig(bat_priv, orig_node, -1, "Deleted route towards originator"); /* route added */ @@ -113,9 +115,19 @@ out: return; } -void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, +/** + * batadv_bonding_candidate_add - consider a new link for bonding mode towards + * the given originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the target node + * @neigh_node: the neighbor representing the new link to consider for bonding + * mode + */ +void batadv_bonding_candidate_add(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; struct batadv_neigh_node *tmp_neigh_node, *router = NULL; uint8_t interference_candidate = 0; @@ -130,8 +142,9 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, if (!router) goto candidate_del; + /* ... and is good enough to be considered */ - if (neigh_node->tq_avg < router->tq_avg - BATADV_BONDING_TQ_THRESHOLD) + if (bao->bat_neigh_is_equiv_or_better(neigh_node, router)) goto candidate_del; /* check if we have another candidate with the same mac address or @@ -247,47 +260,65 @@ bool batadv_check_management_packet(struct sk_buff *skb, return true; } +/** + * batadv_recv_my_icmp_packet - receive an icmp packet locally + * @bat_priv: the bat priv with all the soft interface information + * @skb: icmp packet to process + * + * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP + * otherwise. + */ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, - struct sk_buff *skb, size_t icmp_len) + struct sk_buff *skb) { struct batadv_hard_iface *primary_if = NULL; struct batadv_orig_node *orig_node = NULL; - struct batadv_icmp_packet_rr *icmp_packet; - int ret = NET_RX_DROP; + struct batadv_icmp_header *icmph; + int res, ret = NET_RX_DROP; - icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; + icmph = (struct batadv_icmp_header *)skb->data; - /* add data to device queue */ - if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { - batadv_socket_receive_packet(icmp_packet, icmp_len); - goto out; - } + switch (icmph->msg_type) { + case BATADV_ECHO_REPLY: + case BATADV_DESTINATION_UNREACHABLE: + case BATADV_TTL_EXCEEDED: + /* receive the packet */ + if (skb_linearize(skb) < 0) + break; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; + batadv_socket_receive_packet(icmph, skb->len); + break; + case BATADV_ECHO_REQUEST: + /* answer echo request (ping) */ + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; - /* answer echo request (ping) */ - /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig); - if (!orig_node) - goto out; + /* get routing information */ + orig_node = batadv_orig_hash_find(bat_priv, icmph->orig); + if (!orig_node) + goto out; - /* create a copy of the skb, if needed, to modify it. */ - if (skb_cow(skb, ETH_HLEN) < 0) - goto out; + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, ETH_HLEN) < 0) + goto out; - icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; + icmph = (struct batadv_icmp_header *)skb->data; - memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN); - memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, - ETH_ALEN); - icmp_packet->icmph.msg_type = BATADV_ECHO_REPLY; - icmp_packet->icmph.header.ttl = BATADV_TTL; + memcpy(icmph->dst, icmph->orig, ETH_ALEN); + memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN); + icmph->msg_type = BATADV_ECHO_REPLY; + icmph->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) - ret = NET_RX_SUCCESS; + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res != NET_XMIT_DROP) + ret = NET_RX_SUCCESS; + break; + default: + /* drop unknown type */ + goto out; + } out: if (primary_if) batadv_hardif_free_ref(primary_if); @@ -350,16 +381,13 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_icmp_packet_rr *icmp_packet; + struct batadv_icmp_header *icmph; + struct batadv_icmp_packet_rr *icmp_packet_rr; struct ethhdr *ethhdr; struct batadv_orig_node *orig_node = NULL; - int hdr_size = sizeof(struct batadv_icmp_packet); + int hdr_size = sizeof(struct batadv_icmp_header); int ret = NET_RX_DROP; - /* we truncate all incoming icmp packets if they don't match our size */ - if (skb->len >= sizeof(struct batadv_icmp_packet_rr)) - hdr_size = sizeof(struct batadv_icmp_packet_rr); - /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; @@ -378,28 +406,39 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) goto out; - icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; + icmph = (struct batadv_icmp_header *)skb->data; /* add record route information if not full */ - if ((icmp_packet->icmph.msg_type == BATADV_ECHO_REPLY || - icmp_packet->icmph.msg_type == BATADV_ECHO_REQUEST) && - (hdr_size == sizeof(struct batadv_icmp_packet_rr)) && - (icmp_packet->rr_cur < BATADV_RR_LEN)) { - memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]), + if ((icmph->msg_type == BATADV_ECHO_REPLY || + icmph->msg_type == BATADV_ECHO_REQUEST) && + (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { + if (skb_linearize(skb) < 0) + goto out; + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, ETH_HLEN) < 0) + goto out; + + icmph = (struct batadv_icmp_header *)skb->data; + icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; + if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) + goto out; + + memcpy(&(icmp_packet_rr->rr[icmp_packet_rr->rr_cur]), ethhdr->h_dest, ETH_ALEN); - icmp_packet->rr_cur++; + icmp_packet_rr->rr_cur++; } /* packet for me */ - if (batadv_is_my_mac(bat_priv, icmp_packet->icmph.dst)) - return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); + if (batadv_is_my_mac(bat_priv, icmph->dst)) + return batadv_recv_my_icmp_packet(bat_priv, skb); /* TTL exceeded */ - if (icmp_packet->icmph.header.ttl < 2) + if (icmph->header.ttl < 2) return batadv_recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst); + orig_node = batadv_orig_hash_find(bat_priv, icmph->dst); if (!orig_node) goto out; @@ -407,10 +446,10 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (skb_cow(skb, ETH_HLEN) < 0) goto out; - icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; + icmph = (struct batadv_icmp_header *)skb->data; /* decrement ttl */ - icmp_packet->icmph.header.ttl--; + icmph->header.ttl--; /* route it */ if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) @@ -477,18 +516,25 @@ out: return router; } -/* Interface Alternating: Use the best of the - * remaining candidates which are not using - * this interface. +/** + * batadv_find_ifalter_router - find the best of the remaining candidates which + * are not using this interface + * @bat_priv: the bat priv with all the soft interface information + * @primary_orig: the destination + * @recv_if: the interface that the router returned by this function has to not + * use * - * Increases the returned router's refcount + * Returns the best candidate towards primary_orig that is not using recv_if. + * Increases the returned neighbor's refcount */ static struct batadv_neigh_node * -batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, +batadv_find_ifalter_router(struct batadv_priv *bat_priv, + struct batadv_orig_node *primary_orig, const struct batadv_hard_iface *recv_if) { - struct batadv_neigh_node *tmp_neigh_node; struct batadv_neigh_node *router = NULL, *first_candidate = NULL; + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_neigh_node *tmp_neigh_node; rcu_read_lock(); list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list, @@ -500,7 +546,7 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, if (tmp_neigh_node->if_incoming == recv_if) continue; - if (router && tmp_neigh_node->tq_avg <= router->tq_avg) + if (router && bao->bat_neigh_cmp(tmp_neigh_node, router)) continue; if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) @@ -634,7 +680,8 @@ batadv_find_router(struct batadv_priv *bat_priv, if (bonding_enabled) router = batadv_find_bond_router(primary_orig_node, recv_if); else - router = batadv_find_ifalter_router(primary_orig_node, recv_if); + router = batadv_find_ifalter_router(bat_priv, primary_orig_node, + recv_if); return_router: if (router && router->if_incoming->if_status != BATADV_IF_ACTIVE) @@ -724,6 +771,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @unicast_packet: the unicast header to be updated * @dst_addr: the payload destination + * @vid: VLAN identifier * * Search the translation table for dst_addr and update the unicast header with * the new corresponding information (originator address where the destination @@ -734,21 +782,22 @@ out: static bool batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct batadv_unicast_packet *unicast_packet, - uint8_t *dst_addr) + uint8_t *dst_addr, unsigned short vid) { struct batadv_orig_node *orig_node = NULL; struct batadv_hard_iface *primary_if = NULL; bool ret = false; uint8_t *orig_addr, orig_ttvn; - if (batadv_is_my_client(bat_priv, dst_addr)) { + if (batadv_is_my_client(bat_priv, dst_addr, vid)) { primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; orig_addr = primary_if->net_dev->dev_addr; orig_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); } else { - orig_node = batadv_transtable_search(bat_priv, NULL, dst_addr); + orig_node = batadv_transtable_search(bat_priv, NULL, dst_addr, + vid); if (!orig_node) goto out; @@ -775,11 +824,12 @@ out: static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_len) { - uint8_t curr_ttvn, old_ttvn; + struct batadv_unicast_packet *unicast_packet; + struct batadv_hard_iface *primary_if; struct batadv_orig_node *orig_node; + uint8_t curr_ttvn, old_ttvn; struct ethhdr *ethhdr; - struct batadv_hard_iface *primary_if; - struct batadv_unicast_packet *unicast_packet; + unsigned short vid; int is_old_ttvn; /* check if there is enough data before accessing it */ @@ -791,6 +841,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, return 0; unicast_packet = (struct batadv_unicast_packet *)skb->data; + vid = batadv_get_vid(skb, hdr_len); ethhdr = (struct ethhdr *)(skb->data + hdr_len); /* check if the destination client was served by this node and it is now @@ -798,9 +849,9 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * message and that it knows the new destination in the mesh to re-route * the packet to */ - if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest)) { + if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) { if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, - ethhdr->h_dest)) + ethhdr->h_dest, vid)) net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv, "Rerouting unicast packet to %pM (dst=%pM): Local Roaming\n", @@ -846,7 +897,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * target host */ if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, - ethhdr->h_dest)) { + ethhdr->h_dest, vid)) { net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv, "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", unicast_packet->dest, ethhdr->h_dest, @@ -858,7 +909,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * currently served by this node or there is no destination at all and * it is possible to drop the packet */ - if (!batadv_is_my_client(bat_priv, ethhdr->h_dest)) + if (!batadv_is_my_client(bat_priv, ethhdr->h_dest, vid)) return 0; /* update the header in order to let the packet be delivered to this diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 55d637a90621..19544ddb81b5 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -48,7 +48,8 @@ batadv_find_router(struct batadv_priv *bat_priv, const struct batadv_hard_iface *recv_if); void batadv_bonding_candidate_del(struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node); -void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, +void batadv_bonding_candidate_add(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node); void batadv_bonding_save_primary(const struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 82588e425641..c83be5ebaa28 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -234,44 +234,45 @@ out: } /** - * batadv_send_generic_unicast_skb - send an skb as unicast + * batadv_send_skb_unicast - encapsulate and send an skb via unicast * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast * 4addr packets) + * @orig_node: the originator to send the packet to + * @vid: the vid to be used to search the translation table * - * Returns 1 in case of error or 0 otherwise. + * Wrap the given skb into a batman-adv unicast or unicast-4addr header + * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied + * as packet_type. Then send this frame to the given orig_node and release a + * reference to this orig_node. + * + * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ -int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv, - struct sk_buff *skb, int packet_type, - int packet_subtype) +static int batadv_send_skb_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype, + struct batadv_orig_node *orig_node, + unsigned short vid) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_unicast_packet *unicast_packet; - struct batadv_orig_node *orig_node; - int ret = NET_RX_DROP; - - /* get routing information */ - if (is_multicast_ether_addr(ethhdr->h_dest)) - orig_node = batadv_gw_get_selected_orig(bat_priv); - else - /* check for tt host - increases orig_node refcount. - * returns NULL in case of AP isolation - */ - orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source, - ethhdr->h_dest); + int ret = NET_XMIT_DROP; if (!orig_node) goto out; switch (packet_type) { case BATADV_UNICAST: - batadv_send_skb_prepare_unicast(skb, orig_node); + if (!batadv_send_skb_prepare_unicast(skb, orig_node)) + goto out; break; case BATADV_UNICAST_4ADDR: - batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node, - packet_subtype); + if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, + orig_node, + packet_subtype)) + goto out; break; default: /* this function supports UNICAST and UNICAST_4ADDR only. It @@ -287,20 +288,71 @@ int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv, * try to reroute it because the ttvn contained in the header is less * than the current one */ - if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest)) + if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) unicast_packet->ttvn = unicast_packet->ttvn - 1; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) - ret = 0; + ret = NET_XMIT_SUCCESS; out: if (orig_node) batadv_orig_node_free_ref(orig_node); - if (ret == NET_RX_DROP) + if (ret == NET_XMIT_DROP) kfree_skb(skb); return ret; } +/** + * batadv_send_skb_via_tt_generic - send an skb via TT lookup + * @bat_priv: the bat priv with all the soft interface information + * @skb: payload to send + * @packet_type: the batman unicast packet type to use + * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast + * 4addr packets) + * @vid: the vid to be used to search the translation table + * + * Look up the recipient node for the destination address in the ethernet + * header via the translation table. Wrap the given skb into a batman-adv + * unicast or unicast-4addr header depending on whether BATADV_UNICAST or + * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame + * to the according destination node. + * + * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + */ +int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype, unsigned short vid) +{ + struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct batadv_orig_node *orig_node; + + orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest, vid); + return batadv_send_skb_unicast(bat_priv, skb, packet_type, + packet_subtype, orig_node, vid); +} + +/** + * batadv_send_skb_via_gw - send an skb via gateway lookup + * @bat_priv: the bat priv with all the soft interface information + * @skb: payload to send + * @vid: the vid to be used to search the translation table + * + * Look up the currently selected gateway. Wrap the given skb into a batman-adv + * unicast header and send this frame to this gateway node. + * + * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + */ +int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) +{ + struct batadv_orig_node *orig_node; + + orig_node = batadv_gw_get_selected_orig(bat_priv); + return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0, + orig_node, vid); +} + void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index ad63184a4dd9..aa2e2537a739 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -38,41 +38,54 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node *orig_node, int packet_subtype); -int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv, - struct sk_buff *skb, int packet_type, - int packet_subtype); - +int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype, unsigned short vid); +int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); /** - * batadv_send_unicast_skb - send the skb encapsulated in a unicast packet + * batadv_send_skb_via_tt - send an skb via TT lookup * @bat_priv: the bat priv with all the soft interface information * @skb: the payload to send + * @vid: the vid to be used to search the translation table + * + * Look up the recipient node for the destination address in the ethernet + * header via the translation table. Wrap the given skb into a batman-adv + * unicast header. Then send this frame to the according destination node. * - * Returns 1 in case of error or 0 otherwise. + * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ -static inline int batadv_send_skb_unicast(struct batadv_priv *bat_priv, - struct sk_buff *skb) +static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid) { - return batadv_send_skb_generic_unicast(bat_priv, skb, BATADV_UNICAST, - 0); + return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST, 0, + vid); } /** - * batadv_send_4addr_unicast_skb - send the skb encapsulated in a unicast 4addr - * packet + * batadv_send_skb_via_tt_4addr - send an skb via TT lookup * @bat_priv: the bat priv with all the soft interface information * @skb: the payload to send * @packet_subtype: the unicast 4addr packet subtype to use + * @vid: the vid to be used to search the translation table + * + * Look up the recipient node for the destination address in the ethernet + * header via the translation table. Wrap the given skb into a batman-adv + * unicast-4addr header. Then send this frame to the according destination + * node. * - * Returns 1 in case of error or 0 otherwise. + * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ -static inline int batadv_send_skb_unicast_4addr(struct batadv_priv *bat_priv, - struct sk_buff *skb, - int packet_subtype) +static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv, + struct sk_buff *skb, + int packet_subtype, + unsigned short vid) { - return batadv_send_skb_generic_unicast(bat_priv, skb, - BATADV_UNICAST_4ADDR, - packet_subtype); + return batadv_send_skb_via_tt_generic(bat_priv, skb, + BATADV_UNICAST_4ADDR, + packet_subtype, vid); } #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index e8a2bd699d40..36f050876f82 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -118,9 +118,10 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { - batadv_tt_local_remove(bat_priv, old_addr, + batadv_tt_local_remove(bat_priv, old_addr, BATADV_NO_FLAGS, "mac address changed", false); - batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); + batadv_tt_local_add(dev, addr->sa_data, BATADV_NO_FLAGS, + BATADV_NULL_IFINDEX); } return 0; @@ -152,33 +153,33 @@ static void batadv_interface_set_rx_mode(struct net_device *dev) static int batadv_interface_tx(struct sk_buff *skb, struct net_device *soft_iface) { - struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct ethhdr *ethhdr; struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; - struct vlan_ethhdr *vhdr; __be16 ethertype = htons(ETH_P_BATMAN); static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x00}; static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 0x00, 0x00}; + struct vlan_ethhdr *vhdr; unsigned int header_len = 0; int data_len = skb->len, ret; - unsigned short vid __maybe_unused = BATADV_NO_FLAGS; - bool do_bcast = false; - uint32_t seqno; unsigned long brd_delay = 1; + bool do_bcast = false, client_added; + unsigned short vid; + uint32_t seqno; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; soft_iface->trans_start = jiffies; + vid = batadv_get_vid(skb, 0); + ethhdr = (struct ethhdr *)skb->data; switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: vhdr = (struct vlan_ethhdr *)skb->data; - vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; - vid |= BATADV_VLAN_HAS_TAG; if (vhdr->h_vlan_encapsulated_proto != ethertype) break; @@ -195,8 +196,12 @@ static int batadv_interface_tx(struct sk_buff *skb, ethhdr = (struct ethhdr *)skb->data; /* Register the client MAC in the transtable */ - if (!is_multicast_ether_addr(ethhdr->h_source)) - batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); + if (!is_multicast_ether_addr(ethhdr->h_source)) { + client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source, + vid, skb->skb_iif); + if (!client_added) + goto dropped; + } /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... @@ -296,8 +301,12 @@ static int batadv_interface_tx(struct sk_buff *skb, batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); - ret = batadv_send_skb_unicast(bat_priv, skb); - if (ret != 0) + if (is_multicast_ether_addr(ethhdr->h_dest)) + ret = batadv_send_skb_via_gw(bat_priv, skb, vid); + else + ret = batadv_send_skb_via_tt(bat_priv, skb, vid); + + if (ret == NET_XMIT_DROP) goto dropped_freed; } @@ -319,12 +328,12 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, int hdr_size, struct batadv_orig_node *orig_node) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); - struct ethhdr *ethhdr; - struct vlan_ethhdr *vhdr; struct batadv_header *batadv_header = (struct batadv_header *)skb->data; - unsigned short vid __maybe_unused = BATADV_NO_FLAGS; + struct batadv_priv *bat_priv = netdev_priv(soft_iface); __be16 ethertype = htons(ETH_P_BATMAN); + struct vlan_ethhdr *vhdr; + struct ethhdr *ethhdr; + unsigned short vid; bool is_bcast; is_bcast = (batadv_header->packet_type == BATADV_BCAST); @@ -336,13 +345,12 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); + vid = batadv_get_vid(skb, hdr_size); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: vhdr = (struct vlan_ethhdr *)skb->data; - vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; - vid |= BATADV_VLAN_HAS_TAG; if (vhdr->h_vlan_encapsulated_proto != ethertype) break; @@ -378,9 +386,10 @@ void batadv_interface_rx(struct net_device *soft_iface, if (orig_node) batadv_tt_add_temporary_global_entry(bat_priv, orig_node, - ethhdr->h_source); + ethhdr->h_source, vid); - if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest)) + if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest, + vid)) goto dropped; netif_rx(skb); @@ -392,6 +401,177 @@ out: return; } +/** + * batadv_softif_vlan_free_ref - decrease the vlan object refcounter and + * possibly free it + * @softif_vlan: the vlan object to release + */ +void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan) +{ + if (atomic_dec_and_test(&softif_vlan->refcount)) + kfree_rcu(softif_vlan, rcu); +} + +/** + * batadv_softif_vlan_get - get the vlan object for a specific vid + * @bat_priv: the bat priv with all the soft interface information + * @vid: the identifier of the vlan object to retrieve + * + * Returns the private data of the vlan matching the vid passed as argument or + * NULL otherwise. The refcounter of the returned object is incremented by 1. + */ +struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, + unsigned short vid) +{ + struct batadv_softif_vlan *vlan_tmp, *vlan = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) { + if (vlan_tmp->vid != vid) + continue; + + if (!atomic_inc_not_zero(&vlan_tmp->refcount)) + continue; + + vlan = vlan_tmp; + break; + } + rcu_read_unlock(); + + return vlan; +} + +/** + * batadv_create_vlan - allocate the needed resources for a new vlan + * @bat_priv: the bat priv with all the soft interface information + * @vid: the VLAN identifier + * + * Returns 0 on success, a negative error otherwise. + */ +int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) +{ + struct batadv_softif_vlan *vlan; + int err; + + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (vlan) { + batadv_softif_vlan_free_ref(vlan); + return -EEXIST; + } + + vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); + if (!vlan) + return -ENOMEM; + + vlan->vid = vid; + atomic_set(&vlan->refcount, 1); + + atomic_set(&vlan->ap_isolation, 0); + + err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); + if (err) { + kfree(vlan); + return err; + } + + /* add a new TT local entry. This one will be marked with the NOPURGE + * flag + */ + batadv_tt_local_add(bat_priv->soft_iface, + bat_priv->soft_iface->dev_addr, vid, + BATADV_NULL_IFINDEX); + + spin_lock_bh(&bat_priv->softif_vlan_list_lock); + hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + + return 0; +} + +/** + * batadv_softif_destroy_vlan - remove and destroy a softif_vlan object + * @bat_priv: the bat priv with all the soft interface information + * @vlan: the object to remove + */ +static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) +{ + spin_lock_bh(&bat_priv->softif_vlan_list_lock); + hlist_del_rcu(&vlan->list); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + + batadv_sysfs_del_vlan(bat_priv, vlan); + + /* explicitly remove the associated TT local entry because it is marked + * with the NOPURGE flag + */ + batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr, + vlan->vid, "vlan interface destroyed", false); + + batadv_softif_vlan_free_ref(vlan); +} + +/** + * batadv_interface_add_vid - ndo_add_vid API implementation + * @dev: the netdev of the mesh interface + * @vid: identifier of the new vlan + * + * Set up all the internal structures for handling the new vlan on top of the + * mesh interface + * + * Returns 0 on success or a negative error code in case of failure. + */ +static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, + unsigned short vid) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + + /* only 802.1Q vlans are supported. + * batman-adv does not know how to handle other types + */ + if (proto != htons(ETH_P_8021Q)) + return -EINVAL; + + vid |= BATADV_VLAN_HAS_TAG; + + return batadv_softif_create_vlan(bat_priv, vid); +} + +/** + * batadv_interface_kill_vid - ndo_kill_vid API implementation + * @dev: the netdev of the mesh interface + * @vid: identifier of the deleted vlan + * + * Destroy all the internal structures used to handle the vlan identified by vid + * on top of the mesh interface + * + * Returns 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q + * or -ENOENT if the specified vlan id wasn't registered. + */ +static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, + unsigned short vid) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + struct batadv_softif_vlan *vlan; + + /* only 802.1Q vlans are supported. batman-adv does not know how to + * handle other types + */ + if (proto != htons(ETH_P_8021Q)) + return -EINVAL; + + vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + if (!vlan) + return -ENOENT; + + batadv_softif_destroy_vlan(bat_priv, vlan); + + /* finally free the vlan object */ + batadv_softif_vlan_free_ref(vlan); + + return 0; +} + /* batman-adv network devices have devices nesting below it and are a special * "super class" of normal network devices; split their locks off into a * separate class since they always nest. @@ -431,6 +611,7 @@ static void batadv_set_lockdep_class(struct net_device *dev) */ static void batadv_softif_destroy_finish(struct work_struct *work) { + struct batadv_softif_vlan *vlan; struct batadv_priv *bat_priv; struct net_device *soft_iface; @@ -438,6 +619,13 @@ static void batadv_softif_destroy_finish(struct work_struct *work) cleanup_work); soft_iface = bat_priv->soft_iface; + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_free_ref(vlan); + } + batadv_sysfs_del_meshif(soft_iface); rtnl_lock(); @@ -479,7 +667,6 @@ static int batadv_softif_init_late(struct net_device *dev) #ifdef CONFIG_BATMAN_ADV_DAT atomic_set(&bat_priv->distributed_arp_table, 1); #endif - atomic_set(&bat_priv->ap_isolation, 0); atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw_sel_class, 20); atomic_set(&bat_priv->gw.bandwidth_down, 100); @@ -490,6 +677,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->log_level, 0); #endif atomic_set(&bat_priv->fragmentation, 1); + atomic_set(&bat_priv->packet_size_max, ETH_DATA_LEN); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); @@ -593,6 +781,8 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_open = batadv_interface_open, .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, + .ndo_vlan_rx_add_vid = batadv_interface_add_vid, + .ndo_vlan_rx_kill_vid = batadv_interface_kill_vid, .ndo_set_mac_address = batadv_interface_set_mac_addr, .ndo_change_mtu = batadv_interface_change_mtu, .ndo_set_rx_mode = batadv_interface_set_rx_mode, @@ -632,6 +822,7 @@ static void batadv_softif_init_early(struct net_device *dev) dev->netdev_ops = &batadv_netdev_ops; dev->destructor = batadv_softif_free; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; dev->tx_queue_len = 0; /* can't call min_mtu, because the needed variables diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 2f2472c2ea0d..06fc91ff5a02 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -28,5 +28,9 @@ struct net_device *batadv_softif_create(const char *name); void batadv_softif_destroy_sysfs(struct net_device *soft_iface); int batadv_softif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; +int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); +void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan); +struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, + unsigned short vid); #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 869eb46329cb..6335433310af 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -24,6 +24,7 @@ #include "network-coding.h" #include "originator.h" #include "hard-interface.h" +#include "soft-interface.h" #include "gateway_common.h" #include "gateway_client.h" @@ -39,6 +40,53 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj) return netdev_priv(net_dev); } +/** + * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv + * @obj: kobject to covert + * + * Returns the associated batadv_priv struct. + */ +static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj) +{ + /* VLAN specific attributes are located in the root sysfs folder if they + * refer to the untagged VLAN.. + */ + if (!strcmp(BATADV_SYSFS_IF_MESH_SUBDIR, obj->name)) + return batadv_kobj_to_batpriv(obj); + + /* ..while the attributes for the tagged vlans are located in + * the in the corresponding "vlan%VID" subfolder + */ + return batadv_kobj_to_batpriv(obj->parent); +} + +/** + * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct + * @obj: kobject to covert + * + * Returns the associated softif_vlan struct if found, NULL otherwise. + */ +static struct batadv_softif_vlan * +batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) +{ + struct batadv_softif_vlan *vlan_tmp, *vlan = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) { + if (vlan_tmp->kobj != obj) + continue; + + if (!atomic_inc_not_zero(&vlan_tmp->refcount)) + continue; + + vlan = vlan_tmp; + break; + } + rcu_read_unlock(); + + return vlan; +} + #define BATADV_UEV_TYPE_VAR "BATTYPE=" #define BATADV_UEV_ACTION_VAR "BATACTION=" #define BATADV_UEV_DATA_VAR "BATDATA=" @@ -53,6 +101,15 @@ static char *batadv_uev_type_str[] = { "gw" }; +/* Use this, if you have customized show and store functions for vlan attrs */ +#define BATADV_ATTR_VLAN(_name, _mode, _show, _store) \ +struct batadv_attribute batadv_attr_vlan_##_name = { \ + .attr = {.name = __stringify(_name), \ + .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + /* Use this, if you have customized show and store functions */ #define BATADV_ATTR(_name, _mode, _show, _store) \ struct batadv_attribute batadv_attr_##_name = { \ @@ -122,6 +179,41 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ batadv_store_##_name) +#define BATADV_ATTR_VLAN_STORE_BOOL(_name, _post_func) \ +ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ + struct attribute *attr, char *buff, \ + size_t count) \ +{ \ + struct batadv_priv *bat_priv = batadv_vlan_kobj_to_batpriv(kobj);\ + struct batadv_softif_vlan *vlan = batadv_kobj_to_vlan(bat_priv, \ + kobj); \ + size_t res = __batadv_store_bool_attr(buff, count, _post_func, \ + attr, &vlan->_name, \ + bat_priv->soft_iface); \ + batadv_softif_vlan_free_ref(vlan); \ + return res; \ +} + +#define BATADV_ATTR_VLAN_SHOW_BOOL(_name) \ +ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \ + struct attribute *attr, char *buff) \ +{ \ + struct batadv_priv *bat_priv = batadv_vlan_kobj_to_batpriv(kobj);\ + struct batadv_softif_vlan *vlan = batadv_kobj_to_vlan(bat_priv, \ + kobj); \ + size_t res = sprintf(buff, "%s\n", \ + atomic_read(&vlan->_name) == 0 ? \ + "disabled" : "enabled"); \ + batadv_softif_vlan_free_ref(vlan); \ + return res; \ +} + +/* Use this, if you are going to turn a [name] in the vlan struct on or off */ +#define BATADV_ATTR_VLAN_BOOL(_name, _mode, _post_func) \ + static BATADV_ATTR_VLAN_STORE_BOOL(_name, _post_func) \ + static BATADV_ATTR_VLAN_SHOW_BOOL(_name) \ + static BATADV_ATTR_VLAN(_name, _mode, batadv_show_vlan_##_name, \ + batadv_store_vlan_##_name) static int batadv_store_bool_attr(char *buff, size_t count, struct net_device *net_dev, @@ -361,7 +453,6 @@ BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, batadv_dat_status_update); #endif BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu); -BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL); static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode, batadv_store_gw_mode); @@ -391,7 +482,6 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { &batadv_attr_distributed_arp_table, #endif &batadv_attr_fragmentation, - &batadv_attr_ap_isolation, &batadv_attr_routing_algo, &batadv_attr_gw_mode, &batadv_attr_orig_interval, @@ -407,6 +497,16 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { NULL, }; +BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); + +/** + * batadv_vlan_attrs - array of vlan specific sysfs attributes + */ +static struct batadv_attribute *batadv_vlan_attrs[] = { + &batadv_attr_vlan_ap_isolation, + NULL, +}; + int batadv_sysfs_add_meshif(struct net_device *dev) { struct kobject *batif_kobject = &dev->dev.kobj; @@ -457,6 +557,80 @@ void batadv_sysfs_del_meshif(struct net_device *dev) bat_priv->mesh_obj = NULL; } +/** + * batadv_sysfs_add_vlan - add all the needed sysfs objects for the new vlan + * @dev: netdev of the mesh interface + * @vlan: private data of the newly added VLAN interface + * + * Returns 0 on success and -ENOMEM if any of the structure allocations fails. + */ +int batadv_sysfs_add_vlan(struct net_device *dev, + struct batadv_softif_vlan *vlan) +{ + char vlan_subdir[sizeof(BATADV_SYSFS_VLAN_SUBDIR_PREFIX) + 5]; + struct batadv_priv *bat_priv = netdev_priv(dev); + struct batadv_attribute **bat_attr; + int err; + + if (vlan->vid & BATADV_VLAN_HAS_TAG) { + sprintf(vlan_subdir, BATADV_SYSFS_VLAN_SUBDIR_PREFIX "%hu", + vlan->vid & VLAN_VID_MASK); + + vlan->kobj = kobject_create_and_add(vlan_subdir, + bat_priv->mesh_obj); + if (!vlan->kobj) { + batadv_err(dev, "Can't add sysfs directory: %s/%s\n", + dev->name, vlan_subdir); + goto out; + } + } else { + /* the untagged LAN uses the root folder to store its "VLAN + * specific attributes" + */ + vlan->kobj = bat_priv->mesh_obj; + kobject_get(bat_priv->mesh_obj); + } + + for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) { + err = sysfs_create_file(vlan->kobj, + &((*bat_attr)->attr)); + if (err) { + batadv_err(dev, "Can't add sysfs file: %s/%s/%s\n", + dev->name, vlan_subdir, + ((*bat_attr)->attr).name); + goto rem_attr; + } + } + + return 0; + +rem_attr: + for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) + sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + + kobject_put(vlan->kobj); + vlan->kobj = NULL; +out: + return -ENOMEM; +} + +/** + * batadv_sysfs_del_vlan - remove all the sysfs objects for a given VLAN + * @bat_priv: the bat priv with all the soft interface information + * @vlan: the private data of the VLAN to destroy + */ +void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) +{ + struct batadv_attribute **bat_attr; + + for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) + sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + + kobject_put(vlan->kobj); + vlan->kobj = NULL; +} + static ssize_t batadv_show_mesh_iface(struct kobject *kobj, struct attribute *attr, char *buff) { diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index 479acf4c16f4..c7d725de50ad 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -22,6 +22,12 @@ #define BATADV_SYSFS_IF_MESH_SUBDIR "mesh" #define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv" +/** + * BATADV_SYSFS_VLAN_SUBDIR_PREFIX - prefix of the subfolder that will be + * created in the sysfs hierarchy for each VLAN interface. The subfolder will + * be named "BATADV_SYSFS_VLAN_SUBDIR_PREFIX%vid". + */ +#define BATADV_SYSFS_VLAN_SUBDIR_PREFIX "vlan" struct batadv_attribute { struct attribute attr; @@ -36,6 +42,10 @@ void batadv_sysfs_del_meshif(struct net_device *dev); int batadv_sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev); void batadv_sysfs_del_hardif(struct kobject **hardif_obj); +int batadv_sysfs_add_vlan(struct net_device *dev, + struct batadv_softif_vlan *vlan); +void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan); int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, enum batadv_uev_action action, const char *data); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b521afb186d4..4add57d4857f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -34,6 +34,7 @@ static struct lock_class_key batadv_tt_local_hash_lock_class_key; static struct lock_class_key batadv_tt_global_hash_lock_class_key; static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, + unsigned short vid, struct batadv_orig_node *orig_node); static void batadv_tt_purge(struct work_struct *work); static void @@ -41,7 +42,8 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry); static void batadv_tt_global_del(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, - const char *message, bool roaming); + unsigned short vid, const char *message, + bool roaming); /* returns 1 if they are the same mac addr */ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) @@ -52,43 +54,93 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } +/** + * batadv_choose_tt - return the index of the tt entry in the hash table + * @data: pointer to the tt_common_entry object to map + * @size: the size of the hash table + * + * Returns the hash index where the object represented by 'data' should be + * stored at. + */ +static inline uint32_t batadv_choose_tt(const void *data, uint32_t size) +{ + struct batadv_tt_common_entry *tt; + uint32_t hash = 0; + + tt = (struct batadv_tt_common_entry *)data; + hash = batadv_hash_bytes(hash, &tt->addr, ETH_ALEN); + hash = batadv_hash_bytes(hash, &tt->vid, sizeof(tt->vid)); + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +/** + * batadv_tt_hash_find - look for a client in the given hash table + * @hash: the hash table to search + * @addr: the mac address of the client to look for + * @vid: VLAN identifier + * + * Returns a pointer to the tt_common struct belonging to the searched client if + * found, NULL otherwise. + */ static struct batadv_tt_common_entry * -batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) +batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr, + unsigned short vid) { struct hlist_head *head; - struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tt_common_entry *tt_common_entry_tmp = NULL; + struct batadv_tt_common_entry to_search, *tt, *tt_tmp = NULL; uint32_t index; if (!hash) return NULL; - index = batadv_choose_orig(data, hash->size); + memcpy(to_search.addr, addr, ETH_ALEN); + to_search.vid = vid; + + index = batadv_choose_tt(&to_search, hash->size); head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { - if (!batadv_compare_eth(tt_common_entry, data)) + hlist_for_each_entry_rcu(tt, head, hash_entry) { + if (!batadv_compare_eth(tt, addr)) + continue; + + if (tt->vid != vid) continue; - if (!atomic_inc_not_zero(&tt_common_entry->refcount)) + if (!atomic_inc_not_zero(&tt->refcount)) continue; - tt_common_entry_tmp = tt_common_entry; + tt_tmp = tt; break; } rcu_read_unlock(); - return tt_common_entry_tmp; + return tt_tmp; } +/** + * batadv_tt_local_hash_find - search the local table for a given client + * @bat_priv: the bat priv with all the soft interface information + * @addr: the mac address of the client to look for + * @vid: VLAN identifier + * + * Returns a pointer to the corresponding tt_local_entry struct if the client is + * found, NULL otherwise. + */ static struct batadv_tt_local_entry * -batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const void *data) +batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr, + unsigned short vid) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local_entry = NULL; - tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, data); + tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, addr, + vid); if (tt_common_entry) tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -96,13 +148,24 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const void *data) return tt_local_entry; } +/** + * batadv_tt_global_hash_find - search the global table for a given client + * @bat_priv: the bat priv with all the soft interface information + * @addr: the mac address of the client to look for + * @vid: VLAN identifier + * + * Returns a pointer to the corresponding tt_global_entry struct if the client + * is found, NULL otherwise. + */ static struct batadv_tt_global_entry * -batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data) +batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr, + unsigned short vid) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global_entry = NULL; - tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, data); + tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, addr, + vid); if (tt_common_entry) tt_global_entry = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -145,13 +208,107 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) kfree(orig_entry); } +/** + * batadv_tt_local_size_mod - change the size by v of the local table identified + * by vid + * @bat_priv: the bat priv with all the soft interface information + * @vid: the VLAN identifier of the sub-table to change + * @v: the amount to sum to the local table size + */ +static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv, + unsigned short vid, int v) +{ + struct batadv_softif_vlan *vlan; + + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan) + return; + + atomic_add(v, &vlan->tt.num_entries); + + batadv_softif_vlan_free_ref(vlan); +} + +/** + * batadv_tt_local_size_inc - increase by one the local table size for the given + * vid + * @bat_priv: the bat priv with all the soft interface information + * @vid: the VLAN identifier + */ +static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv, + unsigned short vid) +{ + batadv_tt_local_size_mod(bat_priv, vid, 1); +} + +/** + * batadv_tt_local_size_dec - decrease by one the local table size for the given + * vid + * @bat_priv: the bat priv with all the soft interface information + * @vid: the VLAN identifier + */ +static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv, + unsigned short vid) +{ + batadv_tt_local_size_mod(bat_priv, vid, -1); +} + +/** + * batadv_tt_global_size_mod - change the size by v of the local table + * identified by vid + * @bat_priv: the bat priv with all the soft interface information + * @vid: the VLAN identifier + * @v: the amount to sum to the global table size + */ +static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node, + unsigned short vid, int v) +{ + struct batadv_orig_node_vlan *vlan; + + vlan = batadv_orig_node_vlan_new(orig_node, vid); + if (!vlan) + return; + + if (atomic_add_return(v, &vlan->tt.num_entries) == 0) { + spin_lock_bh(&orig_node->vlan_list_lock); + list_del_rcu(&vlan->list); + spin_unlock_bh(&orig_node->vlan_list_lock); + batadv_orig_node_vlan_free_ref(vlan); + } + + batadv_orig_node_vlan_free_ref(vlan); +} + +/** + * batadv_tt_global_size_inc - increase by one the global table size for the + * given vid + * @orig_node: the originator which global table size has to be decreased + * @vid: the vlan identifier + */ +static void batadv_tt_global_size_inc(struct batadv_orig_node *orig_node, + unsigned short vid) +{ + batadv_tt_global_size_mod(orig_node, vid, 1); +} + +/** + * batadv_tt_global_size_dec - decrease by one the global table size for the + * given vid + * @orig_node: the originator which global table size has to be decreased + * @vid: the vlan identifier + */ +static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, + unsigned short vid) +{ + batadv_tt_global_size_mod(orig_node, vid, -1); +} + static void batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) { if (!atomic_dec_and_test(&orig_entry->refcount)) return; - /* to avoid race conditions, immediately decrease the tt counter */ - atomic_dec(&orig_entry->orig_node->tt_size); + call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } @@ -178,6 +335,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, tt_change_node->change.flags = flags; tt_change_node->change.reserved = 0; memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); + tt_change_node->change.vid = htons(common->vid); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -200,6 +358,13 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, goto del; if (del_op_requested && !del_op_entry) goto del; + + /* this is a second add in the same originator interval. It + * means that flags have been changed: update them! + */ + if (!del_op_requested && !del_op_entry) + entry->change.flags = flags; + continue; del: list_del(&entry->list); @@ -243,6 +408,35 @@ static uint16_t batadv_tt_entries(uint16_t tt_len) return tt_len / batadv_tt_len(1); } +/** + * batadv_tt_local_table_transmit_size - calculates the local translation table + * size when transmitted over the air + * @bat_priv: the bat priv with all the soft interface information + * + * Returns local translation table size in bytes. + */ +static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) +{ + uint16_t num_vlan = 0, tt_local_entries = 0; + struct batadv_softif_vlan *vlan; + int hdr_size; + + rcu_read_lock(); + hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + num_vlan++; + tt_local_entries += atomic_read(&vlan->tt.num_entries); + } + rcu_read_unlock(); + + /* header size of tvlv encapsulated tt response payload */ + hdr_size = sizeof(struct batadv_unicast_tvlv_packet); + hdr_size += sizeof(struct batadv_tvlv_hdr); + hdr_size += sizeof(struct batadv_tvlv_tt_data); + hdr_size += num_vlan * sizeof(struct batadv_tvlv_tt_vlan_data); + + return hdr_size + batadv_tt_len(tt_local_entries); +} + static int batadv_tt_local_init(struct batadv_priv *bat_priv) { if (bat_priv->tt.local_hash) @@ -264,33 +458,51 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, const char *message) { batadv_dbg(BATADV_DBG_TT, bat_priv, - "Deleting global tt entry %pM: %s\n", - tt_global->common.addr, message); + "Deleting global tt entry %pM (vid: %d): %s\n", + tt_global->common.addr, + BATADV_PRINT_VID(tt_global->common.vid), message); batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_orig, tt_global->common.addr); + batadv_choose_tt, &tt_global->common); batadv_tt_global_entry_free_ref(tt_global); } -void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - int ifindex) +/** + * batadv_tt_local_add - add a new client to the local table or update an + * existing client + * @soft_iface: netdev struct of the mesh interface + * @addr: the mac address of the client to add + * @vid: VLAN identifier + * @ifindex: index of the interface where the client is connected to (useful to + * identify wireless clients) + * + * Returns true if the client was successfully added, false otherwise. + */ +bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + unsigned short vid, int ifindex) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global; + struct net_device *in_dev = NULL; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry; - int hash_added; - bool roamed_back = false; + int hash_added, table_size, packet_size_max; + bool ret = false, roamed_back = false; + uint8_t remote_flags; + + if (ifindex != BATADV_NULL_IFINDEX) + in_dev = dev_get_by_index(&init_net, ifindex); - tt_local = batadv_tt_local_hash_find(bat_priv, addr); - tt_global = batadv_tt_global_hash_find(bat_priv, addr); + tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid); + tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); if (tt_local) { tt_local->last_seen = jiffies; if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) { batadv_dbg(BATADV_DBG_TT, bat_priv, - "Re-adding pending client %pM\n", addr); + "Re-adding pending client %pM (vid: %d)\n", + addr, BATADV_PRINT_VID(vid)); /* whatever the reason why the PENDING flag was set, * this is a client which was enqueued to be removed in * this orig_interval. Since it popped up again, the @@ -302,8 +514,8 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) { batadv_dbg(BATADV_DBG_TT, bat_priv, - "Roaming client %pM came back to its original location\n", - addr); + "Roaming client %pM (vid: %d) came back to its original location\n", + addr, BATADV_PRINT_VID(vid)); /* the ROAM flag is set because this client roamed away * and the node got a roaming_advertisement message. Now * that the client popped up again at its original @@ -315,12 +527,24 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, goto check_roaming; } + /* Ignore the client if we cannot send it in a full table response. */ + table_size = batadv_tt_local_table_transmit_size(bat_priv); + table_size += batadv_tt_len(1); + packet_size_max = atomic_read(&bat_priv->packet_size_max); + if (table_size > packet_size_max) { + net_ratelimited_function(batadv_info, soft_iface, + "Local translation table size (%i) exceeds maximum packet size (%i); Ignoring new local tt entry: %pM\n", + table_size, packet_size_max, addr); + goto out; + } + tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC); if (!tt_local) goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Creating new local tt entry: %pM (ttvn: %d)\n", addr, + "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", + addr, BATADV_PRINT_VID(vid), (uint8_t)atomic_read(&bat_priv->tt.vn)); memcpy(tt_local->common.addr, addr, ETH_ALEN); @@ -329,7 +553,8 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, * (consistency check) */ tt_local->common.flags = BATADV_TT_CLIENT_NEW; - if (batadv_is_wifi_iface(ifindex)) + tt_local->common.vid = vid; + if (batadv_is_wifi_netdev(in_dev)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; atomic_set(&tt_local->common.refcount, 2); tt_local->last_seen = jiffies; @@ -340,7 +565,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, - batadv_choose_orig, &tt_local->common, + batadv_choose_tt, &tt_local->common, &tt_local->common.hash_entry); if (unlikely(hash_added != 0)) { @@ -362,6 +587,7 @@ check_roaming: rcu_read_lock(); hlist_for_each_entry_rcu(orig_entry, head, list) { batadv_send_roam_adv(bat_priv, tt_global->common.addr, + tt_global->common.vid, orig_entry->orig_node); } rcu_read_unlock(); @@ -378,11 +604,174 @@ check_roaming: } } + /* store the current remote flags before altering them. This helps + * understanding is flags are changing or not + */ + remote_flags = tt_local->common.flags & BATADV_TT_REMOTE_MASK; + + if (batadv_is_wifi_netdev(in_dev)) + tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; + else + tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI; + + /* if any "dynamic" flag has been modified, resend an ADD event for this + * entry so that all the nodes can get the new flags + */ + if (remote_flags ^ (tt_local->common.flags & BATADV_TT_REMOTE_MASK)) + batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); + + ret = true; out: + if (in_dev) + dev_put(in_dev); if (tt_local) batadv_tt_local_entry_free_ref(tt_local); if (tt_global) batadv_tt_global_entry_free_ref(tt_global); + return ret; +} + +/** + * batadv_tt_prepare_tvlv_global_data - prepare the TVLV TT header to send + * within a TT Response directed to another node + * @orig_node: originator for which the TT data has to be prepared + * @tt_data: uninitialised pointer to the address of the TVLV buffer + * @tt_change: uninitialised pointer to the address of the area where the TT + * changed can be stored + * @tt_len: pointer to the length to reserve to the tt_change. if -1 this + * function reserves the amount of space needed to send the entire global TT + * table. In case of success the value is updated with the real amount of + * reserved bytes + + * Allocate the needed amount of memory for the entire TT TVLV and write its + * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data + * objects, one per active VLAN served by the originator node. + * + * Return the size of the allocated buffer or 0 in case of failure. + */ +static uint16_t +batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, + struct batadv_tvlv_tt_data **tt_data, + struct batadv_tvlv_tt_change **tt_change, + int32_t *tt_len) +{ + uint16_t num_vlan = 0, num_entries = 0, change_offset, tvlv_len; + struct batadv_tvlv_tt_vlan_data *tt_vlan; + struct batadv_orig_node_vlan *vlan; + uint8_t *tt_change_ptr; + + rcu_read_lock(); + list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + num_vlan++; + num_entries += atomic_read(&vlan->tt.num_entries); + } + + change_offset = sizeof(**tt_data); + change_offset += num_vlan * sizeof(*tt_vlan); + + /* if tt_len is negative, allocate the space needed by the full table */ + if (*tt_len < 0) + *tt_len = batadv_tt_len(num_entries); + + tvlv_len = *tt_len; + tvlv_len += change_offset; + + *tt_data = kmalloc(tvlv_len, GFP_ATOMIC); + if (!*tt_data) { + *tt_len = 0; + goto out; + } + + (*tt_data)->flags = BATADV_NO_FLAGS; + (*tt_data)->ttvn = atomic_read(&orig_node->last_ttvn); + (*tt_data)->num_vlan = htons(num_vlan); + + tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); + list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + tt_vlan->vid = htons(vlan->vid); + tt_vlan->crc = htonl(vlan->tt.crc); + + tt_vlan++; + } + + tt_change_ptr = (uint8_t *)*tt_data + change_offset; + *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; + +out: + rcu_read_unlock(); + return tvlv_len; +} + +/** + * batadv_tt_prepare_tvlv_local_data - allocate and prepare the TT TVLV for this + * node + * @bat_priv: the bat priv with all the soft interface information + * @tt_data: uninitialised pointer to the address of the TVLV buffer + * @tt_change: uninitialised pointer to the address of the area where the TT + * changes can be stored + * @tt_len: pointer to the length to reserve to the tt_change. if -1 this + * function reserves the amount of space needed to send the entire local TT + * table. In case of success the value is updated with the real amount of + * reserved bytes + * + * Allocate the needed amount of memory for the entire TT TVLV and write its + * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data + * objects, one per active VLAN. + * + * Return the size of the allocated buffer or 0 in case of failure. + */ +static uint16_t +batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, + struct batadv_tvlv_tt_data **tt_data, + struct batadv_tvlv_tt_change **tt_change, + int32_t *tt_len) +{ + struct batadv_tvlv_tt_vlan_data *tt_vlan; + struct batadv_softif_vlan *vlan; + uint16_t num_vlan = 0, num_entries = 0, tvlv_len; + uint8_t *tt_change_ptr; + int change_offset; + + rcu_read_lock(); + hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + num_vlan++; + num_entries += atomic_read(&vlan->tt.num_entries); + } + + change_offset = sizeof(**tt_data); + change_offset += num_vlan * sizeof(*tt_vlan); + + /* if tt_len is negative, allocate the space needed by the full table */ + if (*tt_len < 0) + *tt_len = batadv_tt_len(num_entries); + + tvlv_len = *tt_len; + tvlv_len += change_offset; + + *tt_data = kmalloc(tvlv_len, GFP_ATOMIC); + if (!*tt_data) { + tvlv_len = 0; + goto out; + } + + (*tt_data)->flags = BATADV_NO_FLAGS; + (*tt_data)->ttvn = atomic_read(&bat_priv->tt.vn); + (*tt_data)->num_vlan = htons(num_vlan); + + tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); + hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + tt_vlan->vid = htons(vlan->vid); + tt_vlan->crc = htonl(vlan->tt.crc); + + tt_vlan++; + } + + tt_change_ptr = (uint8_t *)*tt_data + change_offset; + *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; + +out: + rcu_read_unlock(); + return tvlv_len; } /** @@ -395,10 +784,12 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) struct batadv_tt_change_node *entry, *safe; struct batadv_tvlv_tt_data *tt_data; struct batadv_tvlv_tt_change *tt_change; - int tt_diff_len = 0, tt_change_len = 0; + int tt_diff_len, tt_change_len = 0; int tt_diff_entries_num = 0, tt_diff_entries_count = 0; + uint16_t tvlv_len; - tt_diff_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); + tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes); + tt_diff_len = batadv_tt_len(tt_diff_entries_num); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented @@ -406,24 +797,19 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) if (tt_diff_len > bat_priv->soft_iface->mtu) tt_diff_len = 0; - tt_data = kzalloc(sizeof(*tt_data) + tt_diff_len, GFP_ATOMIC); - if (!tt_data) + tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tt_data, + &tt_change, &tt_diff_len); + if (!tvlv_len) return; tt_data->flags = BATADV_TT_OGM_DIFF; - tt_data->ttvn = atomic_read(&bat_priv->tt.vn); - tt_data->crc = htonl(bat_priv->tt.local_crc); if (tt_diff_len == 0) goto container_register; - tt_diff_entries_num = batadv_tt_entries(tt_diff_len); - spin_lock_bh(&bat_priv->tt.changes_list_lock); atomic_set(&bat_priv->tt.local_changes, 0); - tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1); - list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (tt_diff_entries_count < tt_diff_entries_num) { @@ -459,7 +845,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) container_register: batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data, - sizeof(*tt_data) + tt_change_len); + tvlv_len); kfree(tt_data); } @@ -471,7 +857,9 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; + struct batadv_softif_vlan *vlan; struct hlist_head *head; + unsigned short vid; uint32_t i; int last_seen_secs; int last_seen_msecs; @@ -484,11 +872,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) goto out; seq_printf(seq, - "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.8x):\n", - net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn), - bat_priv->tt.local_crc); - seq_printf(seq, " %-13s %-7s %-10s\n", "Client", "Flags", - "Last seen"); + "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", + net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); + seq_printf(seq, " %-13s %s %-7s %-9s (%-10s)\n", "Client", "VID", + "Flags", "Last seen", "CRC"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -499,6 +886,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); + vid = tt_common_entry->vid; last_seen_jiffies = jiffies - tt_local->last_seen; last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); last_seen_secs = last_seen_msecs / 1000; @@ -506,8 +894,17 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) no_purge = tt_common_entry->flags & np_flag; - seq_printf(seq, " * %pM [%c%c%c%c%c] %3u.%03u\n", + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan) { + seq_printf(seq, "Cannot retrieve VLAN %d\n", + BATADV_PRINT_VID(vid)); + continue; + } + + seq_printf(seq, + " * %pM %4i [%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, + BATADV_PRINT_VID(tt_common_entry->vid), (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), no_purge ? 'P' : '.', @@ -518,7 +915,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) (tt_common_entry->flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), no_purge ? 0 : last_seen_secs, - no_purge ? 0 : last_seen_msecs); + no_purge ? 0 : last_seen_msecs, + vlan->tt.crc); + + batadv_softif_vlan_free_ref(vlan); } rcu_read_unlock(); } @@ -542,27 +942,29 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, tt_local_entry->common.flags |= BATADV_TT_CLIENT_PENDING; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Local tt entry (%pM) pending to be removed: %s\n", - tt_local_entry->common.addr, message); + "Local tt entry (%pM, vid: %d) pending to be removed: %s\n", + tt_local_entry->common.addr, + BATADV_PRINT_VID(tt_local_entry->common.vid), message); } /** * batadv_tt_local_remove - logically remove an entry from the local table * @bat_priv: the bat priv with all the soft interface information * @addr: the MAC address of the client to remove + * @vid: VLAN identifier * @message: message to append to the log on deletion * @roaming: true if the deletion is due to a roaming event * * Returns the flags assigned to the local entry before being deleted */ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, - const uint8_t *addr, const char *message, - bool roaming) + const uint8_t *addr, unsigned short vid, + const char *message, bool roaming) { struct batadv_tt_local_entry *tt_local_entry; uint16_t flags, curr_flags = BATADV_NO_FLAGS; - tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr); + tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) goto out; @@ -598,8 +1000,16 @@ out: return curr_flags; } +/** + * batadv_tt_local_purge_list - purge inactive tt local entries + * @bat_priv: the bat priv with all the soft interface information + * @head: pointer to the list containing the local tt entries + * @timeout: parameter deciding whether a given tt local entry is considered + * inactive or not + */ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, - struct hlist_head *head) + struct hlist_head *head, + int timeout) { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_common_entry *tt_common_entry; @@ -617,8 +1027,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, if (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING) continue; - if (!batadv_has_timed_out(tt_local_entry->last_seen, - BATADV_TT_LOCAL_TIMEOUT)) + if (!batadv_has_timed_out(tt_local_entry->last_seen, timeout)) continue; batadv_tt_local_set_pending(bat_priv, tt_local_entry, @@ -626,7 +1035,14 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, } } -static void batadv_tt_local_purge(struct batadv_priv *bat_priv) +/** + * batadv_tt_local_purge - purge inactive tt local entries + * @bat_priv: the bat priv with all the soft interface information + * @timeout: parameter deciding whether a given tt local entry is considered + * inactive or not + */ +static void batadv_tt_local_purge(struct batadv_priv *bat_priv, + int timeout) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct hlist_head *head; @@ -638,7 +1054,7 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - batadv_tt_local_purge_list(bat_priv, head); + batadv_tt_local_purge_list(bat_priv, head, timeout); spin_unlock_bh(list_lock); } } @@ -779,7 +1195,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, INIT_HLIST_NODE(&orig_entry->list); atomic_inc(&orig_node->refcount); - atomic_inc(&orig_node->tt_size); + batadv_tt_global_size_inc(orig_node, tt_global->common.vid); orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; atomic_set(&orig_entry->refcount, 2); @@ -798,6 +1214,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @orig_node: the originator announcing the client * @tt_addr: the mac address of the non-mesh client + * @vid: VLAN identifier * @flags: TT flags that have to be set for this non-mesh client * @ttvn: the tt version number ever announcing this non-mesh client * @@ -813,7 +1230,8 @@ out: */ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_addr, uint16_t flags, + const unsigned char *tt_addr, + unsigned short vid, uint16_t flags, uint8_t ttvn) { struct batadv_tt_global_entry *tt_global_entry; @@ -823,8 +1241,12 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_tt_common_entry *common; uint16_t local_flags; - tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr); - tt_local_entry = batadv_tt_local_hash_find(bat_priv, tt_addr); + /* ignore global entries from backbone nodes */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) + return true; + + tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr, vid); + tt_local_entry = batadv_tt_local_hash_find(bat_priv, tt_addr, vid); /* if the node already has a local client for this entry, it has to wait * for a roaming advertisement instead of manually messing up the global @@ -841,6 +1263,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, common = &tt_global_entry->common; memcpy(common->addr, tt_addr, ETH_ALEN); + common->vid = vid; common->flags = flags; tt_global_entry->roam_at = 0; @@ -858,7 +1281,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, hash_added = batadv_hash_add(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_orig, common, + batadv_choose_tt, common, &common->hash_entry); if (unlikely(hash_added != 0)) { @@ -917,14 +1340,15 @@ add_orig_entry: batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn); batadv_dbg(BATADV_DBG_TT, bat_priv, - "Creating new global tt entry: %pM (via %pM)\n", - common->addr, orig_node->orig); + "Creating new global tt entry: %pM (vid: %d, via %pM)\n", + common->addr, BATADV_PRINT_VID(common->vid), + orig_node->orig); ret = true; out_remove: /* remove address from local hash if present */ - local_flags = batadv_tt_local_remove(bat_priv, tt_addr, + local_flags = batadv_tt_local_remove(bat_priv, tt_addr, vid, "global tt received", flags & BATADV_TT_CLIENT_ROAM); tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; @@ -944,18 +1368,20 @@ out: } /* batadv_transtable_best_orig - Get best originator list entry from tt entry + * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be analyzed * * This functon assumes the caller holds rcu_read_lock(). * Returns best originator list entry or NULL on errors. */ static struct batadv_tt_orig_list_entry * -batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry) +batadv_transtable_best_orig(struct batadv_priv *bat_priv, + struct batadv_tt_global_entry *tt_global_entry) { - struct batadv_neigh_node *router = NULL; + struct batadv_neigh_node *router, *best_router = NULL; + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL; - int best_tq = 0; head = &tt_global_entry->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) { @@ -963,64 +1389,104 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry) if (!router) continue; - if (router->tq_avg > best_tq) { - best_entry = orig_entry; - best_tq = router->tq_avg; + if (best_router && + bao->bat_neigh_cmp(router, best_router) <= 0) { + batadv_neigh_node_free_ref(router); + continue; } - batadv_neigh_node_free_ref(router); + /* release the refcount for the "old" best */ + if (best_router) + batadv_neigh_node_free_ref(best_router); + + best_entry = orig_entry; + best_router = router; } + if (best_router) + batadv_neigh_node_free_ref(best_router); + return best_entry; } /* batadv_tt_global_print_entry - print all orig nodes who announce the address * for this global entry + * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be printed * @seq: debugfs table seq_file struct * * This functon assumes the caller holds rcu_read_lock(). */ static void -batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, +batadv_tt_global_print_entry(struct batadv_priv *bat_priv, + struct batadv_tt_global_entry *tt_global_entry, struct seq_file *seq) { - struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry, *best_entry; struct batadv_tt_common_entry *tt_common_entry; - uint16_t flags; + struct batadv_orig_node_vlan *vlan; + struct hlist_head *head; uint8_t last_ttvn; + uint16_t flags; tt_common_entry = &tt_global_entry->common; flags = tt_common_entry->flags; - best_entry = batadv_transtable_best_orig(tt_global_entry); + best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry); if (best_entry) { + vlan = batadv_orig_node_vlan_get(best_entry->orig_node, + tt_common_entry->vid); + if (!vlan) { + seq_printf(seq, + " * Cannot retrieve VLAN %d for originator %pM\n", + BATADV_PRINT_VID(tt_common_entry->vid), + best_entry->orig_node->orig); + goto print_list; + } + last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn); seq_printf(seq, - " %c %pM (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", + " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", '*', tt_global_entry->common.addr, + BATADV_PRINT_VID(tt_global_entry->common.vid), best_entry->ttvn, best_entry->orig_node->orig, - last_ttvn, best_entry->orig_node->tt_crc, + last_ttvn, vlan->tt.crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); + + batadv_orig_node_vlan_free_ref(vlan); } +print_list: head = &tt_global_entry->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) { if (best_entry == orig_entry) continue; + vlan = batadv_orig_node_vlan_get(orig_entry->orig_node, + tt_common_entry->vid); + if (!vlan) { + seq_printf(seq, + " + Cannot retrieve VLAN %d for originator %pM\n", + BATADV_PRINT_VID(tt_common_entry->vid), + orig_entry->orig_node->orig); + continue; + } + last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn); - seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n", + seq_printf(seq, + " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", '+', tt_global_entry->common.addr, + BATADV_PRINT_VID(tt_global_entry->common.vid), orig_entry->ttvn, orig_entry->orig_node->orig, - last_ttvn, + last_ttvn, vlan->tt.crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); + + batadv_orig_node_vlan_free_ref(vlan); } } @@ -1042,9 +1508,9 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %-15s %s (%-10s) %s\n", - "Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC", - "Flags"); + seq_printf(seq, " %-13s %s %s %-15s %s (%-10s) %s\n", + "Client", "VID", "(TTVN)", "Originator", "(Curr TTVN)", + "CRC", "Flags"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1055,7 +1521,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, common); - batadv_tt_global_print_entry(tt_global, seq); + batadv_tt_global_print_entry(bat_priv, tt_global, seq); } rcu_read_unlock(); } @@ -1077,6 +1543,8 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) head = &tt_global_entry->orig_list; hlist_for_each_entry_safe(orig_entry, safe, head, list) { hlist_del_rcu(&orig_entry->list); + batadv_tt_global_size_dec(orig_entry->orig_node, + tt_global_entry->common.vid); batadv_tt_orig_list_entry_free_ref(orig_entry); } spin_unlock_bh(&tt_global_entry->list_lock); @@ -1091,16 +1559,21 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv, struct hlist_head *head; struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; + unsigned short vid; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; hlist_for_each_entry_safe(orig_entry, safe, head, list) { if (orig_entry->orig_node == orig_node) { + vid = tt_global_entry->common.vid; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Deleting %pM from global tt entry %pM: %s\n", + "Deleting %pM from global tt entry %pM (vid: %d): %s\n", orig_node->orig, - tt_global_entry->common.addr, message); + tt_global_entry->common.addr, + BATADV_PRINT_VID(vid), message); hlist_del_rcu(&orig_entry->list); + batadv_tt_global_size_dec(orig_node, + tt_global_entry->common.vid); batadv_tt_orig_list_entry_free_ref(orig_entry); } } @@ -1147,17 +1620,25 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, orig_node, message); } - - +/** + * batadv_tt_global_del - remove a client from the global table + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: an originator serving this client + * @addr: the mac address of the client + * @vid: VLAN identifier + * @message: a message explaining the reason for deleting the client to print + * for debugging purpose + * @roaming: true if the deletion has been triggered by a roaming event + */ static void batadv_tt_global_del(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr, + const unsigned char *addr, unsigned short vid, const char *message, bool roaming) { struct batadv_tt_global_entry *tt_global_entry; struct batadv_tt_local_entry *local_entry = NULL; - tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr); + tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global_entry) goto out; @@ -1186,7 +1667,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, * the global entry, since it is useless now. */ local_entry = batadv_tt_local_hash_find(bat_priv, - tt_global_entry->common.addr); + tt_global_entry->common.addr, + vid); if (local_entry) { /* local entry exists, case 2: client roamed to us. */ batadv_tt_global_del_orig_list(tt_global_entry); @@ -1204,8 +1686,18 @@ out: batadv_tt_local_entry_free_ref(local_entry); } +/** + * batadv_tt_global_del_orig - remove all the TT global entries belonging to the + * given originator matching the provided vid + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the originator owning the entries to remove + * @match_vid: the VLAN identifier to match. If negative all the entries will be + * removed + * @message: debug message to print as "reason" + */ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + int32_t match_vid, const char *message) { struct batadv_tt_global_entry *tt_global; @@ -1215,6 +1707,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct hlist_node *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ + unsigned short vid; if (!hash) return; @@ -1226,6 +1719,10 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_common_entry, safe, head, hash_entry) { + /* remove only matching entries */ + if (match_vid >= 0 && tt_common_entry->vid != match_vid) + continue; + tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, common); @@ -1234,9 +1731,11 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, orig_node, message); if (hlist_empty(&tt_global->orig_list)) { + vid = tt_global->common.vid; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Deleting global tt entry %pM: %s\n", - tt_global->common.addr, message); + "Deleting global tt entry %pM (vid: %d): %s\n", + tt_global->common.addr, + BATADV_PRINT_VID(vid), message); hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_free_ref(tt_global); } @@ -1294,8 +1793,10 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) continue; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Deleting global tt entry (%pM): %s\n", - tt_global->common.addr, msg); + "Deleting global tt entry %pM (vid: %d): %s\n", + tt_global->common.addr, + BATADV_PRINT_VID(tt_global->common.vid), + msg); hlist_del_rcu(&tt_common->hash_entry); @@ -1354,23 +1855,49 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, return ret; } +/** + * batadv_transtable_search - get the mesh destination for a given client + * @bat_priv: the bat priv with all the soft interface information + * @src: mac address of the source client + * @addr: mac address of the destination client + * @vid: VLAN identifier + * + * Returns a pointer to the originator that was selected as destination in the + * mesh for contacting the client 'addr', NULL otherwise. + * In case of multiple originators serving the same client, the function returns + * the best one (best in terms of metric towards the destination node). + * + * If the two clients are AP isolated the function returns NULL. + */ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, const uint8_t *src, - const uint8_t *addr) + const uint8_t *addr, + unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry = NULL; struct batadv_tt_global_entry *tt_global_entry = NULL; struct batadv_orig_node *orig_node = NULL; struct batadv_tt_orig_list_entry *best_entry; + bool ap_isolation_enabled = false; + struct batadv_softif_vlan *vlan; + + /* if the AP isolation is requested on a VLAN, then check for its + * setting in the proper VLAN private data structure + */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (vlan) { + ap_isolation_enabled = atomic_read(&vlan->ap_isolation); + batadv_softif_vlan_free_ref(vlan); + } - if (src && atomic_read(&bat_priv->ap_isolation)) { - tt_local_entry = batadv_tt_local_hash_find(bat_priv, src); + if (src && ap_isolation_enabled) { + tt_local_entry = batadv_tt_local_hash_find(bat_priv, src, vid); if (!tt_local_entry || (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING)) goto out; } - tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr); + tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global_entry) goto out; @@ -1382,7 +1909,7 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, goto out; rcu_read_lock(); - best_entry = batadv_transtable_best_orig(tt_global_entry); + best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry); /* found anything? */ if (best_entry) orig_node = best_entry->orig_node; @@ -1403,15 +1930,36 @@ out: * batadv_tt_global_crc - calculates the checksum of the local table belonging * to the given orig_node * @bat_priv: the bat priv with all the soft interface information + * @orig_node: originator for which the CRC should be computed + * @vid: VLAN identifier for which the CRC32 has to be computed + * + * This function computes the checksum for the global table corresponding to a + * specific originator. In particular, the checksum is computed as follows: For + * each client connected to the originator the CRC32C of the MAC address and the + * VID is computed and then all the CRC32Cs of the various clients are xor'ed + * together. + * + * The idea behind is that CRC32C should be used as much as possible in order to + * produce a unique hash of the table, but since the order which is used to feed + * the CRC32C function affects the result and since every node in the network + * probably sorts the clients differently, the hash function cannot be directly + * computed over the entire table. Hence the CRC32C is used only on + * the single client entry, while all the results are then xor'ed together + * because the XOR operation can combine them all while trying to reduce the + * noise as much as possible. + * + * Returns the checksum of the global table of a given originator. */ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node) + struct batadv_orig_node *orig_node, + unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; - uint32_t i, crc = 0; + uint32_t i, crc_tmp, crc = 0; + uint8_t flags; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1421,6 +1969,12 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, tt_global = container_of(tt_common, struct batadv_tt_global_entry, common); + /* compute the CRC only for entries belonging to the + * VLAN identified by the vid passed as parameter + */ + if (tt_common->vid != vid) + continue; + /* Roaming clients are in the global table for * consistency only. They don't have to be * taken into account while computing the @@ -1442,7 +1996,16 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, orig_node)) continue; - crc ^= crc32c(0, tt_common->addr, ETH_ALEN); + crc_tmp = crc32c(0, &tt_common->vid, + sizeof(tt_common->vid)); + + /* compute the CRC on flags that have to be kept in sync + * among nodes + */ + flags = tt_common->flags & BATADV_TT_SYNC_MASK; + crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags)); + + crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN); } rcu_read_unlock(); } @@ -1453,26 +2016,49 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, /** * batadv_tt_local_crc - calculates the checksum of the local table * @bat_priv: the bat priv with all the soft interface information + * @vid: VLAN identifier for which the CRC32 has to be computed + * + * For details about the computation, please refer to the documentation for + * batadv_tt_global_crc(). + * + * Returns the checksum of the local table */ -static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv) +static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv, + unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct hlist_head *head; - uint32_t i, crc = 0; + uint32_t i, crc_tmp, crc = 0; + uint8_t flags; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(tt_common, head, hash_entry) { + /* compute the CRC only for entries belonging to the + * VLAN identified by vid + */ + if (tt_common->vid != vid) + continue; + /* not yet committed clients have not to be taken into * account while computing the CRC */ if (tt_common->flags & BATADV_TT_CLIENT_NEW) continue; - crc ^= crc32c(0, tt_common->addr, ETH_ALEN); + crc_tmp = crc32c(0, &tt_common->vid, + sizeof(tt_common->vid)); + + /* compute the CRC on flags that have to be kept in sync + * among nodes + */ + flags = tt_common->flags & BATADV_TT_SYNC_MASK; + crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags)); + + crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN); } rcu_read_unlock(); } @@ -1595,44 +2181,29 @@ static int batadv_tt_global_valid(const void *entry_ptr, } /** - * batadv_tt_tvlv_generate - creates tvlv tt data buffer to fill it with the - * tt entries from the specified tt hash + * batadv_tt_tvlv_generate - fill the tvlv buff with the tt entries from the + * specified tt hash * @bat_priv: the bat priv with all the soft interface information * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes + * @tvlv_buff: pointer to the buffer to fill with the TT data * @valid_cb: function to filter tt change entries * @cb_data: data passed to the filter function as argument - * - * Returns pointer to allocated tvlv tt data buffer if operation was - * successful or NULL otherwise. */ -static struct batadv_tvlv_tt_data * -batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, - struct batadv_hashtable *hash, uint16_t tt_len, - int (*valid_cb)(const void *, const void *), - void *cb_data) +static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + void *tvlv_buff, uint16_t tt_len, + int (*valid_cb)(const void *, const void *), + void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; uint16_t tt_tot, tt_num_entries = 0; - ssize_t tvlv_tt_size = sizeof(struct batadv_tvlv_tt_data); uint32_t i; - if (tvlv_tt_size + tt_len > bat_priv->soft_iface->mtu) { - tt_len = bat_priv->soft_iface->mtu - tvlv_tt_size; - tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change); - } - tt_tot = batadv_tt_entries(tt_len); - - tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, - GFP_ATOMIC); - if (!tvlv_tt_data) - goto out; - - tt_change = (struct batadv_tvlv_tt_change *)(tvlv_tt_data + 1); + tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; rcu_read_lock(); for (i = 0; i < hash->size; i++) { @@ -1649,6 +2220,7 @@ batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, memcpy(tt_change->addr, tt_common_entry->addr, ETH_ALEN); tt_change->flags = tt_common_entry->flags; + tt_change->vid = htons(tt_common_entry->vid); tt_change->reserved = 0; tt_num_entries++; @@ -1656,9 +2228,91 @@ batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, } } rcu_read_unlock(); +} -out: - return tvlv_tt_data; +/** + * batadv_tt_global_check_crc - check if all the CRCs are correct + * @orig_node: originator for which the CRCs have to be checked + * @tt_vlan: pointer to the first tvlv VLAN entry + * @num_vlan: number of tvlv VLAN entries + * @create: if true, create VLAN objects if not found + * + * Return true if all the received CRCs match the locally stored ones, false + * otherwise + */ +static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, + struct batadv_tvlv_tt_vlan_data *tt_vlan, + uint16_t num_vlan) +{ + struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; + struct batadv_orig_node_vlan *vlan; + int i; + + /* check if each received CRC matches the locally stored one */ + for (i = 0; i < num_vlan; i++) { + tt_vlan_tmp = tt_vlan + i; + + /* if orig_node is a backbone node for this VLAN, don't check + * the CRC as we ignore all the global entries over it + */ + if (batadv_bla_is_backbone_gw_orig(orig_node->bat_priv, + orig_node->orig, + ntohs(tt_vlan_tmp->vid))) + continue; + + vlan = batadv_orig_node_vlan_get(orig_node, + ntohs(tt_vlan_tmp->vid)); + if (!vlan) + return false; + + if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc)) + return false; + } + + return true; +} + +/** + * batadv_tt_local_update_crc - update all the local CRCs + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + + /* recompute the global CRC for each VLAN */ + rcu_read_lock(); + hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan->tt.crc = batadv_tt_local_crc(bat_priv, vlan->vid); + } + rcu_read_unlock(); +} + +/** + * batadv_tt_global_update_crc - update all the global CRCs for this orig_node + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the orig_node for which the CRCs have to be updated + */ +static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node) +{ + struct batadv_orig_node_vlan *vlan; + uint32_t crc; + + /* recompute the global CRC for each VLAN */ + rcu_read_lock(); + list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + /* if orig_node is a backbone node for this VLAN, don't compute + * the CRC as we ignore all the global entries over it + */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, + vlan->vid)) + continue; + + crc = batadv_tt_global_crc(bat_priv, orig_node, vlan->vid); + vlan->tt.crc = crc; + } + rcu_read_unlock(); } /** @@ -1666,19 +2320,23 @@ out: * @bat_priv: the bat priv with all the soft interface information * @dst_orig_node: the destination of the message * @ttvn: the version number that the source of the message is looking for - * @tt_crc: the CRC associated with the version number + * @tt_vlan: pointer to the first tvlv VLAN object to request + * @num_vlan: number of tvlv VLAN entries * @full_table: ask for the entire translation table if true, while only for the * last TT diff otherwise */ static int batadv_send_tt_request(struct batadv_priv *bat_priv, struct batadv_orig_node *dst_orig_node, - uint8_t ttvn, uint32_t tt_crc, - bool full_table) + uint8_t ttvn, + struct batadv_tvlv_tt_vlan_data *tt_vlan, + uint16_t num_vlan, bool full_table) { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; - struct batadv_hard_iface *primary_if; struct batadv_tt_req_node *tt_req_node = NULL; + struct batadv_tvlv_tt_vlan_data *tt_vlan_req; + struct batadv_hard_iface *primary_if; bool ret = false; + int i, size; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1691,13 +2349,26 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data), GFP_ATOMIC); + size = sizeof(*tvlv_tt_data) + sizeof(*tt_vlan_req) * num_vlan; + tvlv_tt_data = kzalloc(size, GFP_ATOMIC); if (!tvlv_tt_data) goto out; tvlv_tt_data->flags = BATADV_TT_REQUEST; tvlv_tt_data->ttvn = ttvn; - tvlv_tt_data->crc = htonl(tt_crc); + tvlv_tt_data->num_vlan = htons(num_vlan); + + /* send all the CRCs within the request. This is needed by intermediate + * nodes to ensure they have the correct table before replying + */ + tt_vlan_req = (struct batadv_tvlv_tt_vlan_data *)(tvlv_tt_data + 1); + for (i = 0; i < num_vlan; i++) { + tt_vlan_req->vid = tt_vlan->vid; + tt_vlan_req->crc = tt_vlan->crc; + + tt_vlan_req++; + tt_vlan++; + } if (full_table) tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE; @@ -1708,7 +2379,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, dst_orig_node->orig, BATADV_TVLV_TT, 1, - tvlv_tt_data, sizeof(*tvlv_tt_data)); + tvlv_tt_data, size); ret = true; out: @@ -1740,10 +2411,13 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, { struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; + struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; - uint8_t orig_ttvn, req_ttvn; - uint16_t tt_len; + struct batadv_tvlv_tt_vlan_data *tt_vlan; bool ret = false, full_table; + uint8_t orig_ttvn, req_ttvn; + uint16_t tvlv_len; + int32_t tt_len; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n", @@ -1762,9 +2436,11 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_data->ttvn; + tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); /* this node doesn't have the requested data */ if (orig_ttvn != req_ttvn || - tt_data->crc != htonl(req_dst_orig_node->tt_crc)) + !batadv_tt_global_check_crc(req_dst_orig_node, tt_vlan, + ntohs(tt_data->num_vlan))) goto out; /* If the full table has been explicitly requested */ @@ -1781,26 +2457,43 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, spin_lock_bh(&req_dst_orig_node->tt_buff_lock); tt_len = req_dst_orig_node->tt_buff_len; - tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, - GFP_ATOMIC); - if (!tvlv_tt_data) + tvlv_len = batadv_tt_prepare_tvlv_global_data(req_dst_orig_node, + &tvlv_tt_data, + &tt_change, + &tt_len); + if (!tt_len) goto unlock; /* Copy the last orig_node's OGM buffer */ - memcpy(tvlv_tt_data + 1, req_dst_orig_node->tt_buff, + memcpy(tt_change, req_dst_orig_node->tt_buff, req_dst_orig_node->tt_buff_len); spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); } else { - tt_len = (uint16_t)atomic_read(&req_dst_orig_node->tt_size); - tt_len = batadv_tt_len(tt_len); - - tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv, - bat_priv->tt.global_hash, - tt_len, - batadv_tt_global_valid, - req_dst_orig_node); - if (!tvlv_tt_data) + /* allocate the tvlv, put the tt_data and all the tt_vlan_data + * in the initial part + */ + tt_len = -1; + tvlv_len = batadv_tt_prepare_tvlv_global_data(req_dst_orig_node, + &tvlv_tt_data, + &tt_change, + &tt_len); + if (!tt_len) goto out; + + /* fill the rest of the tvlv with the real TT entries */ + batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.global_hash, + tt_change, tt_len, + batadv_tt_global_valid, + req_dst_orig_node); + } + + /* Don't send the response, if larger than fragmented packet. */ + tt_len = sizeof(struct batadv_unicast_tvlv_packet) + tvlv_len; + if (tt_len > atomic_read(&bat_priv->packet_size_max)) { + net_ratelimited_function(batadv_info, bat_priv->soft_iface, + "Ignoring TT_REQUEST from %pM; Response size exceeds max packet size.\n", + res_dst_orig_node->orig); + goto out; } tvlv_tt_data->flags = BATADV_TT_RESPONSE; @@ -1817,8 +2510,8 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); batadv_tvlv_unicast_send(bat_priv, req_dst_orig_node->orig, - req_src, BATADV_TVLV_TT, 1, - tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len); + req_src, BATADV_TVLV_TT, 1, tvlv_tt_data, + tvlv_len); ret = true; goto out; @@ -1849,17 +2542,20 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, uint8_t *req_src) { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; - struct batadv_orig_node *orig_node; struct batadv_hard_iface *primary_if = NULL; + struct batadv_tvlv_tt_change *tt_change; + struct batadv_orig_node *orig_node; uint8_t my_ttvn, req_ttvn; + uint16_t tvlv_len; bool full_table; - uint16_t tt_len; + int32_t tt_len; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n", req_src, tt_data->ttvn, (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); + spin_lock_bh(&bat_priv->tt.commit_lock); my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); req_ttvn = tt_data->ttvn; @@ -1886,29 +2582,37 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, */ if (!full_table) { spin_lock_bh(&bat_priv->tt.last_changeset_lock); - tt_len = bat_priv->tt.last_changeset_len; - tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, - GFP_ATOMIC); - if (!tvlv_tt_data) + tt_len = bat_priv->tt.last_changeset_len; + tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, + &tvlv_tt_data, + &tt_change, + &tt_len); + if (!tt_len) goto unlock; /* Copy the last orig_node's OGM buffer */ - memcpy(tvlv_tt_data + 1, bat_priv->tt.last_changeset, + memcpy(tt_change, bat_priv->tt.last_changeset, bat_priv->tt.last_changeset_len); spin_unlock_bh(&bat_priv->tt.last_changeset_lock); } else { - tt_len = (uint16_t)atomic_read(&bat_priv->tt.local_entry_num); - tt_len = batadv_tt_len(tt_len); req_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv, - bat_priv->tt.local_hash, - tt_len, - batadv_tt_local_valid, - NULL); - if (!tvlv_tt_data) + /* allocate the tvlv, put the tt_data and all the tt_vlan_data + * in the initial part + */ + tt_len = -1; + tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, + &tvlv_tt_data, + &tt_change, + &tt_len); + if (!tt_len) goto out; + + /* fill the rest of the tvlv with the real TT entries */ + batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.local_hash, + tt_change, tt_len, + batadv_tt_local_valid, NULL); } tvlv_tt_data->flags = BATADV_TT_RESPONSE; @@ -1924,14 +2628,15 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, - req_src, BATADV_TVLV_TT, 1, - tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len); + req_src, BATADV_TVLV_TT, 1, tvlv_tt_data, + tvlv_len); goto out; unlock: spin_unlock_bh(&bat_priv->tt.last_changeset_lock); out: + spin_unlock_bh(&bat_priv->tt.commit_lock); if (orig_node) batadv_orig_node_free_ref(orig_node); if (primary_if) @@ -1954,16 +2659,11 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, uint8_t *req_src, uint8_t *req_dst) { - if (batadv_is_my_mac(bat_priv, req_dst)) { - /* don't answer backbone gws! */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, req_src)) - return true; - + if (batadv_is_my_mac(bat_priv, req_dst)) return batadv_send_my_tt_response(bat_priv, tt_data, req_src); - } else { + else return batadv_send_other_tt_response(bat_priv, tt_data, req_src, req_dst); - } } static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, @@ -1979,11 +2679,13 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, roams = (tt_change + i)->flags & BATADV_TT_CLIENT_ROAM; batadv_tt_global_del(bat_priv, orig_node, (tt_change + i)->addr, + ntohs((tt_change + i)->vid), "tt removed by changes", roams); } else { if (!batadv_tt_global_add(bat_priv, orig_node, (tt_change + i)->addr, + ntohs((tt_change + i)->vid), (tt_change + i)->flags, ttvn)) /* In case of problem while storing a * global_entry, we stop the updating @@ -1998,8 +2700,9 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, } static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, - struct batadv_tvlv_tt_data *tt_data, - uint8_t *resp_src, uint16_t num_entries) + struct batadv_tvlv_tt_change *tt_change, + uint8_t ttvn, uint8_t *resp_src, + uint16_t num_entries) { struct batadv_orig_node *orig_node; @@ -2008,11 +2711,11 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, goto out; /* Purge the old table first.. */ - batadv_tt_global_del_orig(bat_priv, orig_node, "Received full table"); + batadv_tt_global_del_orig(bat_priv, orig_node, -1, + "Received full table"); - _batadv_tt_update_changes(bat_priv, orig_node, - (struct batadv_tvlv_tt_change *)(tt_data + 1), - num_entries, tt_data->ttvn); + _batadv_tt_update_changes(bat_priv, orig_node, tt_change, num_entries, + ttvn); spin_lock_bh(&orig_node->tt_buff_lock); kfree(orig_node->tt_buff); @@ -2020,7 +2723,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, orig_node->tt_buff = NULL; spin_unlock_bh(&orig_node->tt_buff_lock); - atomic_set(&orig_node->last_ttvn, tt_data->ttvn); + atomic_set(&orig_node->last_ttvn, ttvn); out: if (orig_node) @@ -2040,12 +2743,21 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, atomic_set(&orig_node->last_ttvn, ttvn); } -bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr) +/** + * batadv_is_my_client - check if a client is served by the local node + * @bat_priv: the bat priv with all the soft interface information + * @addr: the mac adress of the client to check + * @vid: VLAN identifier + * + * Returns true if the client is served by this node, false otherwise. + */ +bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr, + unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry; bool ret = false; - tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr); + tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) goto out; /* Check if the client has been logically deleted (but is kept for @@ -2075,28 +2787,39 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, struct batadv_tt_req_node *node, *safe; struct batadv_orig_node *orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; + uint8_t *tvlv_ptr = (uint8_t *)tt_data; + uint16_t change_offset; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", resp_src, tt_data->ttvn, num_entries, (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); - /* we should have never asked a backbone gw */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, resp_src)) - goto out; - orig_node = batadv_orig_hash_find(bat_priv, resp_src); if (!orig_node) goto out; + spin_lock_bh(&orig_node->tt_lock); + + change_offset = sizeof(struct batadv_tvlv_tt_vlan_data); + change_offset *= ntohs(tt_data->num_vlan); + change_offset += sizeof(*tt_data); + tvlv_ptr += change_offset; + + tt_change = (struct batadv_tvlv_tt_change *)tvlv_ptr; if (tt_data->flags & BATADV_TT_FULL_TABLE) { - batadv_tt_fill_gtable(bat_priv, tt_data, resp_src, num_entries); + batadv_tt_fill_gtable(bat_priv, tt_change, tt_data->ttvn, + resp_src, num_entries); } else { - tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1); batadv_tt_update_changes(bat_priv, orig_node, num_entries, tt_data->ttvn, tt_change); } + /* Recalculate the CRC for this orig_node and store it */ + batadv_tt_global_update_crc(bat_priv, orig_node); + + spin_unlock_bh(&orig_node->tt_lock); + /* Delete the tt_req_node from pending tt_requests list */ spin_lock_bh(&bat_priv->tt.req_list_lock); list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { @@ -2105,10 +2828,8 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt.req_list_lock); - /* Recalculate the CRC for this orig_node and store it */ - orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node); + spin_unlock_bh(&bat_priv->tt.req_list_lock); out: if (orig_node) batadv_orig_node_free_ref(orig_node); @@ -2194,7 +2915,20 @@ unlock: return ret; } +/** + * batadv_send_roam_adv - send a roaming advertisement message + * @bat_priv: the bat priv with all the soft interface information + * @client: mac address of the roaming client + * @vid: VLAN identifier + * @orig_node: message destination + * + * Send a ROAMING_ADV message to the node which was previously serving this + * client. This is done to inform the node that from now on all traffic destined + * for this particular roamed client has to be forwarded to the sender of the + * roaming message. + */ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, + unsigned short vid, struct batadv_orig_node *orig_node) { struct batadv_hard_iface *primary_if; @@ -2211,13 +2945,13 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Sending ROAMING_ADV to %pM (client %pM)\n", - orig_node->orig, client); + "Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n", + orig_node->orig, client, BATADV_PRINT_VID(vid)); batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); memcpy(tvlv_roam.client, client, sizeof(tvlv_roam.client)); - tvlv_roam.reserved = 0; + tvlv_roam.vid = htons(vid); batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, orig_node->orig, BATADV_TVLV_ROAM, 1, @@ -2238,7 +2972,7 @@ static void batadv_tt_purge(struct work_struct *work) priv_tt = container_of(delayed_work, struct batadv_priv_tt, work); bat_priv = container_of(priv_tt, struct batadv_priv, tt); - batadv_tt_local_purge(bat_priv); + batadv_tt_local_purge(bat_priv, BATADV_TT_LOCAL_TIMEOUT); batadv_tt_global_purge(bat_priv); batadv_tt_req_purge(bat_priv); batadv_tt_roam_purge(bat_priv); @@ -2263,19 +2997,25 @@ void batadv_tt_free(struct batadv_priv *bat_priv) kfree(bat_priv->tt.last_changeset); } -/* This function will enable or disable the specified flags for all the entries - * in the given hash table and returns the number of modified entries +/** + * batadv_tt_local_set_flags - set or unset the specified flags on the local + * table and possibly count them in the TT size + * @bat_priv: the bat priv with all the soft interface information + * @flags: the flag to switch + * @enable: whether to set or unset the flag + * @count: whether to increase the TT size by the number of changed entries */ -static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, - uint16_t flags, bool enable) +static void batadv_tt_local_set_flags(struct batadv_priv *bat_priv, + uint16_t flags, bool enable, bool count) { - uint32_t i; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; + struct batadv_tt_common_entry *tt_common_entry; uint16_t changed_num = 0; struct hlist_head *head; - struct batadv_tt_common_entry *tt_common_entry; + uint32_t i; if (!hash) - goto out; + return; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2293,11 +3033,15 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, tt_common_entry->flags &= ~flags; } changed_num++; + + if (!count) + continue; + + batadv_tt_local_size_inc(bat_priv, + tt_common_entry->vid); } rcu_read_unlock(); } -out: - return changed_num; } /* Purge out all the tt local entries marked with BATADV_TT_CLIENT_PENDING */ @@ -2325,10 +3069,11 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) continue; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Deleting local tt entry (%pM): pending\n", - tt_common->addr); + "Deleting local tt entry (%pM, vid: %d): pending\n", + tt_common->addr, + BATADV_PRINT_VID(tt_common->vid)); - atomic_dec(&bat_priv->tt.local_entry_num); + batadv_tt_local_size_dec(bat_priv, tt_common->vid); hlist_del_rcu(&tt_common->hash_entry); tt_local = container_of(tt_common, struct batadv_tt_local_entry, @@ -2340,27 +3085,24 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) } /** - * batadv_tt_local_commit_changes - commit all pending local tt changes which - * have been queued in the time since the last commit + * batadv_tt_local_commit_changes_nolock - commit all pending local tt changes + * which have been queued in the time since the last commit * @bat_priv: the bat priv with all the soft interface information + * + * Caller must hold tt->commit_lock. */ -void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) +static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) { - uint16_t changed_num = 0; - if (atomic_read(&bat_priv->tt.local_changes) < 1) { if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) batadv_tt_tvlv_container_update(bat_priv); return; } - changed_num = batadv_tt_set_flags(bat_priv->tt.local_hash, - BATADV_TT_CLIENT_NEW, false); + batadv_tt_local_set_flags(bat_priv, BATADV_TT_CLIENT_NEW, false, true); - /* all reset entries have to be counted as local entries */ - atomic_add(changed_num, &bat_priv->tt.local_entry_num); batadv_tt_local_purge_pending_clients(bat_priv); - bat_priv->tt.local_crc = batadv_tt_local_crc(bat_priv); + batadv_tt_local_update_crc(bat_priv); /* Increment the TTVN only once per OGM interval */ atomic_inc(&bat_priv->tt.vn); @@ -2373,21 +3115,35 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) batadv_tt_tvlv_container_update(bat_priv); } +/** + * batadv_tt_local_commit_changes - commit all pending local tt changes which + * have been queued in the time since the last commit + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) +{ + spin_lock_bh(&bat_priv->tt.commit_lock); + batadv_tt_local_commit_changes_nolock(bat_priv); + spin_unlock_bh(&bat_priv->tt.commit_lock); +} + bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src, - uint8_t *dst) + uint8_t *dst, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry = NULL; struct batadv_tt_global_entry *tt_global_entry = NULL; + struct batadv_softif_vlan *vlan; bool ret = false; - if (!atomic_read(&bat_priv->ap_isolation)) + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan || !atomic_read(&vlan->ap_isolation)) goto out; - tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst); + tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid); if (!tt_local_entry) goto out; - tt_global_entry = batadv_tt_global_hash_find(bat_priv, src); + tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid); if (!tt_global_entry) goto out; @@ -2397,6 +3153,8 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src, ret = true; out: + if (vlan) + batadv_softif_vlan_free_ref(vlan); if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); if (tt_local_entry) @@ -2409,25 +3167,24 @@ out: * information received via ogms * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm - * @tt_buff: buffer holding the tt information + * @tt_vlan: pointer to the first tvlv VLAN entry + * @tt_num_vlan: number of tvlv VLAN entries + * @tt_change: pointer to the first entry in the TT buffer * @tt_num_changes: number of tt changes inside the tt buffer * @ttvn: translation table version number of this changeset * @tt_crc: crc32 checksum of orig node's translation table */ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_buff, - uint16_t tt_num_changes, uint8_t ttvn, - uint32_t tt_crc) + const void *tt_buff, uint16_t tt_num_vlan, + struct batadv_tvlv_tt_change *tt_change, + uint16_t tt_num_changes, uint8_t ttvn) { uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + struct batadv_tvlv_tt_vlan_data *tt_vlan; bool full_table = true; - struct batadv_tvlv_tt_change *tt_change; - - /* don't care about a backbone gateways updates. */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig)) - return; + tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff; /* orig table not initialised AND first diff is in the OGM OR the ttvn * increased by one -> we can apply the attached changes */ @@ -2443,6 +3200,8 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, goto request_table; } + spin_lock_bh(&orig_node->tt_lock); + tt_change = (struct batadv_tvlv_tt_change *)tt_buff; batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, tt_change); @@ -2451,7 +3210,9 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, * prefer to recompute it to spot any possible inconsistency * in the global table */ - orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node); + batadv_tt_global_update_crc(bat_priv, orig_node); + + spin_unlock_bh(&orig_node->tt_lock); /* The ttvn alone is not enough to guarantee consistency * because a single value could represent different states @@ -2462,37 +3223,46 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, * checking the CRC value is mandatory to detect the * inconsistency */ - if (orig_node->tt_crc != tt_crc) + if (!batadv_tt_global_check_crc(orig_node, tt_vlan, + tt_num_vlan)) goto request_table; } else { /* if we missed more than one change or our tables are not * in sync anymore -> request fresh tt data */ if (!orig_node->tt_initialised || ttvn != orig_ttvn || - orig_node->tt_crc != tt_crc) { + !batadv_tt_global_check_crc(orig_node, tt_vlan, + tt_num_vlan)) { request_table: batadv_dbg(BATADV_DBG_TT, bat_priv, - "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.8x last_crc: %#.8x num_changes: %u)\n", - orig_node->orig, ttvn, orig_ttvn, tt_crc, - orig_node->tt_crc, tt_num_changes); + "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u num_changes: %u)\n", + orig_node->orig, ttvn, orig_ttvn, + tt_num_changes); batadv_send_tt_request(bat_priv, orig_node, ttvn, - tt_crc, full_table); + tt_vlan, tt_num_vlan, + full_table); return; } } } -/* returns true whether we know that the client has moved from its old - * originator to another one. This entry is kept is still kept for consistency - * purposes +/** + * batadv_tt_global_client_is_roaming - check if a client is marked as roaming + * @bat_priv: the bat priv with all the soft interface information + * @addr: the mac address of the client to check + * @vid: VLAN identifier + * + * Returns true if we know that the client has moved from its old originator + * to another one. This entry is still kept for consistency purposes and will be + * deleted later by a DEL or because of timeout */ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr) + uint8_t *addr, unsigned short vid) { struct batadv_tt_global_entry *tt_global_entry; bool ret = false; - tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr); + tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global_entry) goto out; @@ -2505,19 +3275,20 @@ out: /** * batadv_tt_local_client_is_roaming - tells whether the client is roaming * @bat_priv: the bat priv with all the soft interface information - * @addr: the MAC address of the local client to query + * @addr: the mac address of the local client to query + * @vid: VLAN identifier * * Returns true if the local client is known to be roaming (it is not served by * this node anymore) or not. If yes, the client is still present in the table * to keep the latter consistent with the node TTVN */ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr) + uint8_t *addr, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry; bool ret = false; - tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr); + tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) goto out; @@ -2529,31 +3300,66 @@ out: bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr) + const unsigned char *addr, + unsigned short vid) { bool ret = false; - /* if the originator is a backbone node (meaning it belongs to the same - * LAN of this node) the temporary client must not be added because to - * reach such destination the node must use the LAN instead of the mesh - */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig)) - goto out; - - if (!batadv_tt_global_add(bat_priv, orig_node, addr, + if (!batadv_tt_global_add(bat_priv, orig_node, addr, vid, BATADV_TT_CLIENT_TEMP, atomic_read(&orig_node->last_ttvn))) goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Added temporary global client (addr: %pM orig: %pM)\n", - addr, orig_node->orig); + "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n", + addr, BATADV_PRINT_VID(vid), orig_node->orig); ret = true; out: return ret; } /** + * batadv_tt_local_resize_to_mtu - resize the local translation table fit the + * maximum packet size that can be transported through the mesh + * @soft_iface: netdev struct of the mesh interface + * + * Remove entries older than 'timeout' and half timeout if more entries need + * to be removed. + */ +void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + int packet_size_max = atomic_read(&bat_priv->packet_size_max); + int table_size, timeout = BATADV_TT_LOCAL_TIMEOUT / 2; + bool reduced = false; + + spin_lock_bh(&bat_priv->tt.commit_lock); + + while (true) { + table_size = batadv_tt_local_table_transmit_size(bat_priv); + if (packet_size_max >= table_size) + break; + + batadv_tt_local_purge(bat_priv, timeout); + batadv_tt_local_purge_pending_clients(bat_priv); + + timeout /= 2; + reduced = true; + net_ratelimited_function(batadv_info, soft_iface, + "Forced to purge local tt entries to fit new maximum fragment MTU (%i)\n", + packet_size_max); + } + + /* commit these changes immediately, to avoid synchronization problem + * with the TTVN + */ + if (reduced) + batadv_tt_local_commit_changes_nolock(bat_priv); + + spin_unlock_bh(&bat_priv->tt.commit_lock); +} + +/** * batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm @@ -2563,12 +3369,13 @@ out: */ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, - void *tvlv_value, + uint8_t flags, void *tvlv_value, uint16_t tvlv_value_len) { + struct batadv_tvlv_tt_vlan_data *tt_vlan; + struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; - uint16_t num_entries; + uint16_t num_entries, num_vlan; if (tvlv_value_len < sizeof(*tt_data)) return; @@ -2576,11 +3383,19 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; tvlv_value_len -= sizeof(*tt_data); + num_vlan = ntohs(tt_data->num_vlan); + + if (tvlv_value_len < sizeof(*tt_vlan) * num_vlan) + return; + + tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); + tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan); + tvlv_value_len -= sizeof(*tt_vlan) * num_vlan; + num_entries = batadv_tt_entries(tvlv_value_len); - batadv_tt_update_orig(bat_priv, orig, - (unsigned char *)(tt_data + 1), - num_entries, tt_data->ttvn, ntohl(tt_data->crc)); + batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change, + num_entries, tt_data->ttvn); } /** @@ -2601,7 +3416,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, uint16_t tvlv_value_len) { struct batadv_tvlv_tt_data *tt_data; - uint16_t num_entries; + uint16_t tt_vlan_len, tt_num_entries; char tt_flag; bool ret; @@ -2611,7 +3426,14 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; tvlv_value_len -= sizeof(*tt_data); - num_entries = batadv_tt_entries(tvlv_value_len); + tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data); + tt_vlan_len *= ntohs(tt_data->num_vlan); + + if (tvlv_value_len < tt_vlan_len) + return NET_RX_SUCCESS; + + tvlv_value_len -= tt_vlan_len; + tt_num_entries = batadv_tt_entries(tvlv_value_len); switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) { case BATADV_TT_REQUEST: @@ -2639,7 +3461,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, if (batadv_is_my_mac(bat_priv, dst)) { batadv_handle_tt_response(bat_priv, tt_data, - src, num_entries); + src, tt_num_entries); return NET_RX_SUCCESS; } @@ -2684,13 +3506,6 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, if (!batadv_is_my_mac(bat_priv, dst)) return NET_RX_DROP; - /* check if it is a backbone gateway. we don't accept - * roaming advertisement from it, as it has the same - * entries as we have. - */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, src)) - goto out; - if (tvlv_value_len < sizeof(*roaming_adv)) goto out; @@ -2706,7 +3521,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, src, roaming_adv->client); batadv_tt_global_add(bat_priv, orig_node, roaming_adv->client, - BATADV_TT_CLIENT_ROAM, + ntohs(roaming_adv->vid), BATADV_TT_CLIENT_ROAM, atomic_read(&orig_node->last_ttvn) + 1); out: @@ -2725,6 +3540,9 @@ int batadv_tt_init(struct batadv_priv *bat_priv) { int ret; + /* synchronized flags must be remote */ + BUILD_BUG_ON(!(BATADV_TT_SYNC_MASK & BATADV_TT_REMOTE_MASK)); + ret = batadv_tt_local_init(bat_priv); if (ret < 0) return ret; diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 015d8b9e63b9..026b1ffa6746 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -21,30 +21,34 @@ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ int batadv_tt_init(struct batadv_priv *bat_priv); -void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - int ifindex); +bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + unsigned short vid, int ifindex); uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, - const uint8_t *addr, const char *message, - bool roaming); + const uint8_t *addr, unsigned short vid, + const char *message, bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const char *message); + int32_t match_vid, const char *message); struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, const uint8_t *src, - const uint8_t *addr); + const uint8_t *addr, + unsigned short vid); void batadv_tt_free(struct batadv_priv *bat_priv); -bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr); +bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr, + unsigned short vid); bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src, - uint8_t *dst); + uint8_t *dst, unsigned short vid); void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv); bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr); + uint8_t *addr, unsigned short vid); bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr); + uint8_t *addr, unsigned short vid); +void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface); bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr); + const unsigned char *addr, + unsigned short vid); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 5cbb0d09a9b5..91dd369b0ff2 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -36,6 +36,18 @@ #endif /* CONFIG_BATMAN_ADV_DAT */ /** + * BATADV_TT_REMOTE_MASK - bitmask selecting the flags that are sent over the + * wire only + */ +#define BATADV_TT_REMOTE_MASK 0x00FF + +/** + * BATADV_TT_SYNC_MASK - bitmask of the flags that need to be kept in sync + * among the nodes. These flags are used to compute the global/local CRC + */ +#define BATADV_TT_SYNC_MASK 0x00F0 + +/** * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer @@ -107,27 +119,69 @@ struct batadv_frag_list_entry { }; /** + * struct batadv_vlan_tt - VLAN specific TT attributes + * @crc: CRC32 checksum of the entries belonging to this vlan + * @num_entries: number of TT entries for this VLAN + */ +struct batadv_vlan_tt { + uint32_t crc; + atomic_t num_entries; +}; + +/** + * batadv_orig_node_vlan - VLAN specific data per orig_node + * @vid: the VLAN identifier + * @tt: VLAN specific TT attributes + * @list: list node for orig_node::vlan_list + * @refcount: number of context where this object is currently in use + * @rcu: struct used for freeing in a RCU-safe manner + */ +struct batadv_orig_node_vlan { + unsigned short vid; + struct batadv_vlan_tt tt; + struct list_head list; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** + * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members + * @bcast_own: bitfield containing the number of our OGMs this orig_node + * rebroadcasted "back" to us (relative to last_real_seqno) + * @bcast_own_sum: counted result of bcast_own + * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum, + * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count + */ +struct batadv_orig_bat_iv { + unsigned long *bcast_own; + uint8_t *bcast_own_sum; + /* ogm_cnt_lock protects: bcast_own, bcast_own_sum, + * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count + */ + spinlock_t ogm_cnt_lock; +}; + +/** * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh * @orig: originator ethernet address * @primary_addr: hosts primary interface address * @router: router that should be used to reach this originator * @batadv_dat_addr_t: address of the orig node in the distributed hash - * @bcast_own: bitfield containing the number of our OGMs this orig_node - * rebroadcasted "back" to us (relative to last_real_seqno) - * @bcast_own_sum: counted result of bcast_own * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset * @batman_seqno_reset: time when the batman seqno window was reset * @capabilities: announced capabilities of this originator * @last_ttvn: last seen translation table version number - * @tt_crc: CRC of the translation table * @tt_buff: last tt changeset this node received from the orig node * @tt_buff_len: length of the last tt changeset this node received from the * orig node * @tt_buff_lock: lock that protects tt_buff and tt_buff_len - * @tt_size: number of global TT entries announced by the orig node * @tt_initialised: bool keeping track of whether or not this node have received * any translation table information from the orig node yet + * @tt_lock: prevents from updating the table while reading it. Table update is + * made up by two operations (data structure update and metdata -CRC/TTVN- + * recalculation) and they have to be executed atomically in order to avoid + * another thread to read the table/metadata between those. * @last_real_seqno: last and best known sequence number * @last_ttl: ttl of last received packet * @bcast_bits: bitfield containing the info which payload broadcast originated @@ -138,8 +192,6 @@ struct batadv_frag_list_entry { * @neigh_list_lock: lock protecting neigh_list, router and bonding_list * @hash_entry: hlist node for batadv_priv::orig_hash * @bat_priv: pointer to soft_iface this orig node belongs to - * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum, - * neigh_node->real_bits & neigh_node->real_packet_count * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno * @bond_candidates: how many candidates are available * @bond_list: list of bonding candidates @@ -150,6 +202,10 @@ struct batadv_frag_list_entry { * @in_coding_list_lock: protects in_coding_list * @out_coding_list_lock: protects out_coding_list * @fragments: array with heads for fragment chains + * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by the + * originator represented by this object + * @vlan_list_lock: lock protecting vlan_list + * @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; @@ -158,19 +214,17 @@ struct batadv_orig_node { #ifdef CONFIG_BATMAN_ADV_DAT batadv_dat_addr_t dat_addr; #endif - unsigned long *bcast_own; - uint8_t *bcast_own_sum; unsigned long last_seen; unsigned long bcast_seqno_reset; unsigned long batman_seqno_reset; uint8_t capabilities; atomic_t last_ttvn; - uint32_t tt_crc; unsigned char *tt_buff; int16_t tt_buff_len; spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ - atomic_t tt_size; bool tt_initialised; + /* prevents from changing the table while reading it */ + spinlock_t tt_lock; uint32_t last_real_seqno; uint8_t last_ttl; DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); @@ -180,10 +234,6 @@ struct batadv_orig_node { spinlock_t neigh_list_lock; struct hlist_node hash_entry; struct batadv_priv *bat_priv; - /* ogm_cnt_lock protects: bcast_own, bcast_own_sum, - * neigh_node->real_bits & neigh_node->real_packet_count - */ - spinlock_t ogm_cnt_lock; /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ spinlock_t bcast_seqno_lock; atomic_t bond_candidates; @@ -197,6 +247,9 @@ struct batadv_orig_node { spinlock_t out_coding_list_lock; /* Protects out_coding_list */ #endif struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT]; + struct list_head vlan_list; + spinlock_t vlan_list_lock; /* protects vlan_list */ + struct batadv_orig_bat_iv bat_iv; }; /** @@ -230,40 +283,49 @@ struct batadv_gw_node { }; /** - * struct batadv_neigh_node - structure for single hop neighbors - * @list: list node for batadv_orig_node::neigh_list - * @addr: mac address of neigh node + * struct batadv_neigh_bat_iv - B.A.T.M.A.N. IV specific structure for single + * hop neighbors * @tq_recv: ring buffer of received TQ values from this neigh node * @tq_index: ring buffer index * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) - * @last_ttl: last received ttl from this neigh node - * @bonding_list: list node for batadv_orig_node::bond_list - * @last_seen: when last packet via this neighbor was received * @real_bits: bitfield containing the number of OGMs received from this neigh * node (relative to orig_node->last_real_seqno) * @real_packet_count: counted result of real_bits + * @lq_update_lock: lock protecting tq_recv & tq_index + */ +struct batadv_neigh_bat_iv { + uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; + uint8_t tq_index; + uint8_t tq_avg; + DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); + uint8_t real_packet_count; + spinlock_t lq_update_lock; /* protects tq_recv & tq_index */ +}; + +/** + * struct batadv_neigh_node - structure for single hops neighbors + * @list: list node for batadv_orig_node::neigh_list * @orig_node: pointer to corresponding orig_node + * @addr: the MAC address of the neighboring interface * @if_incoming: pointer to incoming hard interface - * @lq_update_lock: lock protecting tq_recv & tq_index + * @last_seen: when last packet via this neighbor was received + * @last_ttl: last received ttl from this neigh node + * @bonding_list: list node for batadv_orig_node::bond_list * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner + * @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_neigh_node { struct hlist_node list; + struct batadv_orig_node *orig_node; uint8_t addr[ETH_ALEN]; - uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; - uint8_t tq_index; - uint8_t tq_avg; + struct batadv_hard_iface *if_incoming; + unsigned long last_seen; uint8_t last_ttl; struct list_head bonding_list; - unsigned long last_seen; - DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - uint8_t real_packet_count; - struct batadv_orig_node *orig_node; - struct batadv_hard_iface *if_incoming; - spinlock_t lq_update_lock; /* protects tq_recv & tq_index */ atomic_t refcount; struct rcu_head rcu; + struct batadv_neigh_bat_iv bat_iv; }; /** @@ -383,11 +445,14 @@ enum batadv_counters { * @changes_list_lock: lock protecting changes_list * @req_list_lock: lock protecting req_list * @roam_list_lock: lock protecting roam_list - * @local_entry_num: number of entries in the local hash table - * @local_crc: Checksum of the local table, recomputed before sending a new OGM * @last_changeset: last tt changeset this host has generated * @last_changeset_len: length of last tt changeset this host has generated * @last_changeset_lock: lock protecting last_changeset & last_changeset_len + * @commit_lock: prevents from executing a local TT commit while reading the + * local table. The local TT commit is made up by two operations (data + * structure update and metdata -CRC/TTVN- recalculation) and they have to be + * executed atomically in order to avoid another thread to read the + * table/metadata between those. * @work: work queue callback item for translation table purging */ struct batadv_priv_tt { @@ -402,12 +467,12 @@ struct batadv_priv_tt { spinlock_t changes_list_lock; /* protects changes */ spinlock_t req_list_lock; /* protects req_list */ spinlock_t roam_list_lock; /* protects roam_list */ - atomic_t local_entry_num; - uint32_t local_crc; unsigned char *last_changeset; int16_t last_changeset_len; /* protects last_changeset & last_changeset_len */ spinlock_t last_changeset_lock; + /* prevents from executing a commit while reading the table */ + spinlock_t commit_lock; struct delayed_work work; }; @@ -531,6 +596,26 @@ struct batadv_priv_nc { }; /** + * struct batadv_softif_vlan - per VLAN attributes set + * @vid: VLAN identifier + * @kobj: kobject for sysfs vlan subdirectory + * @ap_isolation: AP isolation state + * @tt: TT private attributes (VLAN specific) + * @list: list node for bat_priv::softif_vlan_list + * @refcount: number of context where this object is currently in use + * @rcu: struct used for freeing in a RCU-safe manner + */ +struct batadv_softif_vlan { + unsigned short vid; + struct kobject *kobj; + atomic_t ap_isolation; /* boolean */ + struct batadv_vlan_tt tt; + struct hlist_node list; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** * struct batadv_priv - per mesh interface data * @mesh_state: current status of the mesh (inactive/active/deactivating) * @soft_iface: net device which holds this struct as private data @@ -539,8 +624,9 @@ struct batadv_priv_nc { * @aggregated_ogms: bool indicating whether OGM aggregation is enabled * @bonding: bool indicating whether traffic bonding is enabled * @fragmentation: bool indicating whether traffic fragmentation is enabled + * @packet_size_max: max packet size that can be transmitted via + * multiple fragmented skbs or a single frame if fragmentation is disabled * @frag_seqno: incremental counter to identify chains of egress fragments - * @ap_isolation: bool indicating whether ap isolation is enabled * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is * enabled * @distributed_arp_table: bool indicating whether distributed ARP table is @@ -566,6 +652,9 @@ struct batadv_priv_nc { * @primary_if: one of the hard interfaces assigned to this mesh interface * becomes the primary interface * @bat_algo_ops: routing algorithm used by this mesh interface + * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top + * of the mesh interface represented by this object + * @softif_vlan_list_lock: lock protecting softif_vlan_list * @bla: bridge loope avoidance data * @debug_log: holding debug logging relevant data * @gw: gateway data @@ -583,8 +672,8 @@ struct batadv_priv { atomic_t aggregated_ogms; atomic_t bonding; atomic_t fragmentation; + atomic_t packet_size_max; atomic_t frag_seqno; - atomic_t ap_isolation; #ifdef CONFIG_BATMAN_ADV_BLA atomic_t bridge_loop_avoidance; #endif @@ -613,6 +702,8 @@ struct batadv_priv { struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ struct batadv_algo_ops *bat_algo_ops; + struct hlist_head softif_vlan_list; + spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */ #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_priv_bla bla; #endif @@ -658,7 +749,7 @@ struct batadv_socket_client { struct batadv_socket_packet { struct list_head list; size_t icmp_len; - struct batadv_icmp_packet_rr icmp_packet; + uint8_t icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; }; /** @@ -715,6 +806,7 @@ struct batadv_bla_claim { /** * struct batadv_tt_common_entry - tt local & tt global common data * @addr: mac address of non-mesh client + * @vid: VLAN identifier * @hash_entry: hlist node for batadv_priv_tt::local_hash or for * batadv_priv_tt::global_hash * @flags: various state handling flags (see batadv_tt_client_flags) @@ -724,6 +816,7 @@ struct batadv_bla_claim { */ struct batadv_tt_common_entry { uint8_t addr[ETH_ALEN]; + unsigned short vid; struct hlist_node hash_entry; uint16_t flags; unsigned long added_at; @@ -914,6 +1007,16 @@ struct batadv_forw_packet { * @bat_primary_iface_set: called when primary interface is selected / changed * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue * @bat_ogm_emit: send scheduled OGM + * @bat_neigh_cmp: compare the metrics of two neighbors + * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or + * better than neigh2 from the metric prospective + * @bat_orig_print: print the originator table (optional) + * @bat_orig_free: free the resources allocated by the routing algorithm for an + * orig_node object + * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to + * the orig_node due to a new hard-interface being added into the mesh + * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to + * the orig_node due to an hard-interface being removed from the mesh */ struct batadv_algo_ops { struct hlist_node list; @@ -924,6 +1027,17 @@ struct batadv_algo_ops { void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface); void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); + int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, + struct batadv_neigh_node *neigh2); + bool (*bat_neigh_is_equiv_or_better)(struct batadv_neigh_node *neigh1, + struct batadv_neigh_node *neigh2); + /* orig_node handling API */ + void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq); + void (*bat_orig_free)(struct batadv_orig_node *orig_node); + int (*bat_orig_add_if)(struct batadv_orig_node *orig_node, + int max_if_num); + int (*bat_orig_del_if)(struct batadv_orig_node *orig_node, + int max_if_num, int del_if_num); }; /** @@ -931,6 +1045,7 @@ struct batadv_algo_ops { * is used to stored ARP entries needed for the global DAT cache * @ip: the IPv4 corresponding to this DAT/ARP entry * @mac_addr: the MAC address associated to the stored IPv4 + * @vid: the vlan ID associated to this entry * @last_update: time in jiffies when this entry was refreshed last time * @hash_entry: hlist node for batadv_priv_dat::hash * @refcount: number of contexts the object is used @@ -939,6 +1054,7 @@ struct batadv_algo_ops { struct batadv_dat_entry { __be32 ip; uint8_t mac_addr[ETH_ALEN]; + unsigned short vid; unsigned long last_update; struct hlist_node hash_entry; atomic_t refcount; diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index 9e6cc3553105..ab5241400cf7 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -182,7 +182,7 @@ struct hidp_session { }; /* HIDP init defines */ -extern int __init hidp_init_sockets(void); -extern void __exit hidp_cleanup_sockets(void); +int __init hidp_init_sockets(void); +void __exit hidp_cleanup_sockets(void); #endif /* __HIDP_H */ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163635da..e6b7fecb3af1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -64,7 +64,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood_deliver(br, skb, false); goto out; } - if (br_multicast_rcv(br, NULL, skb)) { + if (br_multicast_rcv(br, NULL, skb, vid)) { kfree_skb(skb); goto out; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ffd5874f2592..33e8f23acddd 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -700,7 +700,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], vid = nla_get_u16(tb[NDA_VLAN]); - if (vid >= VLAN_N_VID) { + if (!vid || vid >= VLAN_VID_MASK) { pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", vid); return -EINVAL; @@ -794,7 +794,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], vid = nla_get_u16(tb[NDA_VLAN]); - if (vid >= VLAN_N_VID) { + if (!vid || vid >= VLAN_VID_MASK) { pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", vid); return -EINVAL; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index a2fd37ec35f7..7e73c32e205d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && - br_multicast_rcv(br, p, skb)) + br_multicast_rcv(br, p, skb, vid)) goto drop; if (p->state == BR_STATE_LEARNING) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 85a09bb5ca51..b7b1914dfa25 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -453,7 +453,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 005d876dd86c..4c214b2b88ef 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -272,7 +272,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -620,7 +620,6 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); @@ -660,6 +659,7 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); @@ -674,6 +674,7 @@ static int br_multicast_add_group(struct net_bridge *br, if (!port) { mp->mglist = true; + mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -681,7 +682,7 @@ static int br_multicast_add_group(struct net_bridge *br, (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->port == port) - goto out; + goto found; if ((unsigned long)p->port < (unsigned long)port) break; } @@ -692,6 +693,8 @@ static int br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); +found: + mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; @@ -944,7 +947,8 @@ void br_multicast_disable_port(struct net_bridge_port *port) static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct igmpv3_report *ih; struct igmpv3_grec *grec; @@ -954,12 +958,10 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; - br_vlan_get_tag(skb, &vid); ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1002,7 +1004,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct icmp6hdr *icmp6h; struct mld2_grec *grec; @@ -1010,12 +1013,10 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, int len; int num; int err = 0; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; - br_vlan_get_tag(skb, &vid); icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); @@ -1138,7 +1139,8 @@ static void br_multicast_query_received(struct net_bridge *br, static int br_ip4_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); @@ -1150,7 +1152,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1186,14 +1187,10 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; - mod_timer(&mp->timer, now + br->multicast_membership_interval); - mp->timer_armed = true; - max_delay *= br->multicast_last_member_count; if (mp->mglist && @@ -1219,7 +1216,8 @@ out: #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld; @@ -1231,7 +1229,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1265,14 +1262,10 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; - mod_timer(&mp->timer, now + br->multicast_membership_interval); - mp->timer_armed = true; - max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? @@ -1358,7 +1351,7 @@ static void br_multicast_leave_group(struct net_bridge *br, call_rcu_bh(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB); - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1370,12 +1363,30 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && mp->timer_armed && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } + + goto out; + } + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; } out: spin_unlock(&br->multicast_lock); @@ -1424,7 +1435,8 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2 = skb; const struct iphdr *iph; @@ -1432,7 +1444,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned int len; unsigned int offset; int err; - u16 vid = 0; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) @@ -1493,7 +1504,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb2, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); @@ -1504,10 +1514,10 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2); + err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2); + err = br_ip4_multicast_query(br, port, skb2, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); @@ -1525,7 +1535,8 @@ err_out: #if IS_ENABLED(CONFIG_IPV6) static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2; const struct ipv6hdr *ip6h; @@ -1535,7 +1546,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, unsigned int len; int offset; int err; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; @@ -1625,7 +1635,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { @@ -1642,10 +1651,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2); + err = br_ip6_multicast_mld2_report(br, port, skb2, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2); + err = br_ip6_multicast_query(br, port, skb2, vid); break; case ICMPV6_MGM_REDUCTION: { @@ -1666,7 +1675,7 @@ out: #endif int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; @@ -1676,10 +1685,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, switch (skb->protocol) { case htons(ETH_P_IP): - return br_multicast_ipv4_rcv(br, port, skb); + return br_multicast_ipv4_rcv(br, port, skb, vid); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_multicast_ipv6_rcv(br, port, skb); + return br_multicast_ipv6_rcv(br, port, skb, vid); #endif } @@ -1798,7 +1807,6 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); - mp->timer_armed = false; call_rcu_bh(&mp->rcu, br_multicast_free_group); } } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 878f008afefa..80cad2cf02a7 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -559,6 +559,8 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) else if (skb->protocol == htons(ETH_P_PPP_SES)) nf_bridge->mask |= BRNF_PPPoE; + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); return skb->dev; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e74ddc1c29a8..f75d92e4f96b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -243,7 +243,7 @@ static int br_afspec(struct net_bridge *br, vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); - if (vinfo->vid >= VLAN_N_VID) + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) return -EINVAL; switch (cmd) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index efb57d911569..229d820bdf0b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -126,7 +126,6 @@ struct net_bridge_mdb_entry struct timer_list timer; struct br_ip addr; bool mglist; - bool timer_armed; }; struct net_bridge_mdb_htable @@ -344,10 +343,9 @@ static inline int br_is_root_bridge(const struct net_bridge *br) } /* br_device.c */ -extern void br_dev_setup(struct net_device *dev); -extern void br_dev_delete(struct net_device *dev, struct list_head *list); -extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, - struct net_device *dev); +void br_dev_setup(struct net_device *dev); +void br_dev_delete(struct net_device *dev, struct list_head *list); +netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER static inline void br_netpoll_send_skb(const struct net_bridge_port *p, struct sk_buff *skb) @@ -358,8 +356,8 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, netpoll_send_skb(np, skb); } -extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); -extern void br_netpoll_disable(struct net_bridge_port *p); +int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); +void br_netpoll_disable(struct net_bridge_port *p); #else static inline void br_netpoll_send_skb(const struct net_bridge_port *p, struct sk_buff *skb) @@ -377,116 +375,99 @@ static inline void br_netpoll_disable(struct net_bridge_port *p) #endif /* br_fdb.c */ -extern int br_fdb_init(void); -extern void br_fdb_fini(void); -extern void br_fdb_flush(struct net_bridge *br); -extern void br_fdb_changeaddr(struct net_bridge_port *p, - const unsigned char *newaddr); -extern void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); -extern void br_fdb_cleanup(unsigned long arg); -extern void br_fdb_delete_by_port(struct net_bridge *br, - const struct net_bridge_port *p, int do_all); -extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr, - __u16 vid); -extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); -extern int br_fdb_fillbuf(struct net_bridge *br, void *buf, - unsigned long count, unsigned long off); -extern int br_fdb_insert(struct net_bridge *br, - struct net_bridge_port *source, - const unsigned char *addr, - u16 vid); -extern void br_fdb_update(struct net_bridge *br, - struct net_bridge_port *source, - const unsigned char *addr, - u16 vid); -extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid); - -extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr); -extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, - u16 nlh_flags); -extern int br_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - int idx); +int br_fdb_init(void); +void br_fdb_fini(void); +void br_fdb_flush(struct net_bridge *br); +void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); +void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); +void br_fdb_cleanup(unsigned long arg); +void br_fdb_delete_by_port(struct net_bridge *br, + const struct net_bridge_port *p, int do_all); +struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, + const unsigned char *addr, __u16 vid); +int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); +int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, + unsigned long off); +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid); +void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid); +int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid); + +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr); +int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, + const unsigned char *addr, u16 nlh_flags); +int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, int idx); /* br_forward.c */ -extern void br_deliver(const struct net_bridge_port *to, - struct sk_buff *skb); -extern int br_dev_queue_push_xmit(struct sk_buff *skb); -extern void br_forward(const struct net_bridge_port *to, +void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); +int br_dev_queue_push_xmit(struct sk_buff *skb); +void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); -extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, - bool unicast); -extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2, bool unicast); +int br_forward_finish(struct sk_buff *skb); +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast); +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, + struct sk_buff *skb2, bool unicast); /* br_if.c */ -extern void br_port_carrier_check(struct net_bridge_port *p); -extern int br_add_bridge(struct net *net, const char *name); -extern int br_del_bridge(struct net *net, const char *name); -extern void br_net_exit(struct net *net); -extern int br_add_if(struct net_bridge *br, - struct net_device *dev); -extern int br_del_if(struct net_bridge *br, - struct net_device *dev); -extern int br_min_mtu(const struct net_bridge *br); -extern netdev_features_t br_features_recompute(struct net_bridge *br, - netdev_features_t features); +void br_port_carrier_check(struct net_bridge_port *p); +int br_add_bridge(struct net *net, const char *name); +int br_del_bridge(struct net *net, const char *name); +void br_net_exit(struct net *net); +int br_add_if(struct net_bridge *br, struct net_device *dev); +int br_del_if(struct net_bridge *br, struct net_device *dev); +int br_min_mtu(const struct net_bridge *br); +netdev_features_t br_features_recompute(struct net_bridge *br, + netdev_features_t features); /* br_input.c */ -extern int br_handle_frame_finish(struct sk_buff *skb); -extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb); +int br_handle_frame_finish(struct sk_buff *skb); +rx_handler_result_t br_handle_frame(struct sk_buff **pskb); /* br_ioctl.c */ -extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); +int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, + void __user *arg); /* br_multicast.c */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING extern unsigned int br_mdb_rehash_seq; -extern int br_multicast_rcv(struct net_bridge *br, - struct net_bridge_port *port, - struct sk_buff *skb); -extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, - struct sk_buff *skb, u16 vid); -extern void br_multicast_add_port(struct net_bridge_port *port); -extern void br_multicast_del_port(struct net_bridge_port *port); -extern void br_multicast_enable_port(struct net_bridge_port *port); -extern void br_multicast_disable_port(struct net_bridge_port *port); -extern void br_multicast_init(struct net_bridge *br); -extern void br_multicast_open(struct net_bridge *br); -extern void br_multicast_stop(struct net_bridge *br); -extern void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, - struct sk_buff *skb); -extern void br_multicast_forward(struct net_bridge_mdb_entry *mdst, - struct sk_buff *skb, struct sk_buff *skb2); -extern int br_multicast_set_router(struct net_bridge *br, unsigned long val); -extern int br_multicast_set_port_router(struct net_bridge_port *p, - unsigned long val); -extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); -extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); -extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); -extern struct net_bridge_mdb_entry *br_mdb_ip_get( - struct net_bridge_mdb_htable *mdb, - struct br_ip *dst); -extern struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, - struct net_bridge_port *port, struct br_ip *group); -extern void br_multicast_free_pg(struct rcu_head *head); -extern struct net_bridge_port_group *br_multicast_new_port_group( - struct net_bridge_port *port, - struct br_ip *group, - struct net_bridge_port_group __rcu *next, - unsigned char state); -extern void br_mdb_init(void); -extern void br_mdb_uninit(void); -extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, - struct br_ip *group, int type); +int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, + struct sk_buff *skb, u16 vid); +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb, u16 vid); +void br_multicast_add_port(struct net_bridge_port *port); +void br_multicast_del_port(struct net_bridge_port *port); +void br_multicast_enable_port(struct net_bridge_port *port); +void br_multicast_disable_port(struct net_bridge_port *port); +void br_multicast_init(struct net_bridge *br); +void br_multicast_open(struct net_bridge *br); +void br_multicast_stop(struct net_bridge *br); +void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb); +void br_multicast_forward(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, struct sk_buff *skb2); +int br_multicast_set_router(struct net_bridge *br, unsigned long val); +int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); +int br_multicast_toggle(struct net_bridge *br, unsigned long val); +int br_multicast_set_querier(struct net_bridge *br, unsigned long val); +int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +struct net_bridge_mdb_entry * +br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst); +struct net_bridge_mdb_entry * +br_multicast_new_group(struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group); +void br_multicast_free_pg(struct rcu_head *head); +struct net_bridge_port_group * +br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, + struct net_bridge_port_group __rcu *next, + unsigned char state); +void br_mdb_init(void); +void br_mdb_uninit(void); +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type); #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) @@ -523,7 +504,8 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { return 0; } @@ -591,22 +573,21 @@ static inline void br_mdb_uninit(void) /* br_vlan.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING -extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, - struct sk_buff *skb, u16 *vid); -extern bool br_allowed_egress(struct net_bridge *br, - const struct net_port_vlans *v, - const struct sk_buff *skb); -extern struct sk_buff *br_handle_vlan(struct net_bridge *br, - const struct net_port_vlans *v, - struct sk_buff *skb); -extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); -extern int br_vlan_delete(struct net_bridge *br, u16 vid); -extern void br_vlan_flush(struct net_bridge *br); -extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); -extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); -extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); -extern void nbp_vlan_flush(struct net_bridge_port *port); -extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); +bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid); +bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v, + const struct sk_buff *skb); +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb); +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +int br_vlan_delete(struct net_bridge *br, u16 vid); +void br_vlan_flush(struct net_bridge *br); +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); +void nbp_vlan_flush(struct net_bridge_port *port); +bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) @@ -643,9 +624,7 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) * vid wasn't set */ smp_rmb(); - return (v->pvid & VLAN_TAG_PRESENT) ? - (v->pvid & ~VLAN_TAG_PRESENT) : - VLAN_N_VID; + return v->pvid ?: VLAN_N_VID; } #else @@ -727,9 +706,9 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER -extern int br_netfilter_init(void); -extern void br_netfilter_fini(void); -extern void br_netfilter_rtable_init(struct net_bridge *); +int br_netfilter_init(void); +void br_netfilter_fini(void); +void br_netfilter_rtable_init(struct net_bridge *); #else #define br_netfilter_init() (0) #define br_netfilter_fini() do { } while(0) @@ -737,43 +716,39 @@ extern void br_netfilter_rtable_init(struct net_bridge *); #endif /* br_stp.c */ -extern void br_log_state(const struct net_bridge_port *p); -extern struct net_bridge_port *br_get_port(struct net_bridge *br, - u16 port_no); -extern void br_init_port(struct net_bridge_port *p); -extern void br_become_designated_port(struct net_bridge_port *p); +void br_log_state(const struct net_bridge_port *p); +struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); +void br_init_port(struct net_bridge_port *p); +void br_become_designated_port(struct net_bridge_port *p); -extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t); -extern int br_set_forward_delay(struct net_bridge *br, unsigned long x); -extern int br_set_hello_time(struct net_bridge *br, unsigned long x); -extern int br_set_max_age(struct net_bridge *br, unsigned long x); +void __br_set_forward_delay(struct net_bridge *br, unsigned long t); +int br_set_forward_delay(struct net_bridge *br, unsigned long x); +int br_set_hello_time(struct net_bridge *br, unsigned long x); +int br_set_max_age(struct net_bridge *br, unsigned long x); /* br_stp_if.c */ -extern void br_stp_enable_bridge(struct net_bridge *br); -extern void br_stp_disable_bridge(struct net_bridge *br); -extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val); -extern void br_stp_enable_port(struct net_bridge_port *p); -extern void br_stp_disable_port(struct net_bridge_port *p); -extern bool br_stp_recalculate_bridge_id(struct net_bridge *br); -extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); -extern void br_stp_set_bridge_priority(struct net_bridge *br, - u16 newprio); -extern int br_stp_set_port_priority(struct net_bridge_port *p, - unsigned long newprio); -extern int br_stp_set_path_cost(struct net_bridge_port *p, - unsigned long path_cost); -extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); +void br_stp_enable_bridge(struct net_bridge *br); +void br_stp_disable_bridge(struct net_bridge *br); +void br_stp_set_enabled(struct net_bridge *br, unsigned long val); +void br_stp_enable_port(struct net_bridge_port *p); +void br_stp_disable_port(struct net_bridge_port *p); +bool br_stp_recalculate_bridge_id(struct net_bridge *br); +void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); +void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio); +int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio); +int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost); +ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ struct stp_proto; -extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, - struct net_device *dev); +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev); /* br_stp_timer.c */ -extern void br_stp_timer_init(struct net_bridge *br); -extern void br_stp_port_timer_init(struct net_bridge_port *p); -extern unsigned long br_timer_value(const struct timer_list *timer); +void br_stp_timer_init(struct net_bridge *br); +void br_stp_port_timer_init(struct net_bridge_port *p); +unsigned long br_timer_value(const struct timer_list *timer); /* br.c */ #if IS_ENABLED(CONFIG_ATM_LANE) @@ -782,23 +757,23 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) /* br_netlink.c */ extern struct rtnl_link_ops br_link_ops; -extern int br_netlink_init(void); -extern void br_netlink_fini(void); -extern void br_ifinfo_notify(int event, struct net_bridge_port *port); -extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); -extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); -extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask); +int br_netlink_init(void); +void br_netlink_fini(void); +void br_ifinfo_notify(int event, struct net_bridge_port *port); +int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, + u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ extern const struct sysfs_ops brport_sysfs_ops; -extern int br_sysfs_addif(struct net_bridge_port *p); -extern int br_sysfs_renameif(struct net_bridge_port *p); +int br_sysfs_addif(struct net_bridge_port *p); +int br_sysfs_renameif(struct net_bridge_port *p); /* br_sysfs_br.c */ -extern int br_sysfs_addbr(struct net_device *dev); -extern void br_sysfs_delbr(struct net_device *dev); +int br_sysfs_addbr(struct net_device *dev); +void br_sysfs_delbr(struct net_device *dev); #else diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index 0c0fe36e7aa9..2fe910c4e170 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h @@ -51,19 +51,19 @@ static inline int br_is_designated_port(const struct net_bridge_port *p) /* br_stp.c */ -extern void br_become_root_bridge(struct net_bridge *br); -extern void br_config_bpdu_generation(struct net_bridge *); -extern void br_configuration_update(struct net_bridge *); -extern void br_port_state_selection(struct net_bridge *); -extern void br_received_config_bpdu(struct net_bridge_port *p, - const struct br_config_bpdu *bpdu); -extern void br_received_tcn_bpdu(struct net_bridge_port *p); -extern void br_transmit_config(struct net_bridge_port *p); -extern void br_transmit_tcn(struct net_bridge *br); -extern void br_topology_change_detection(struct net_bridge *br); +void br_become_root_bridge(struct net_bridge *br); +void br_config_bpdu_generation(struct net_bridge *); +void br_configuration_update(struct net_bridge *); +void br_port_state_selection(struct net_bridge *); +void br_received_config_bpdu(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu); +void br_received_tcn_bpdu(struct net_bridge_port *p); +void br_transmit_config(struct net_bridge_port *p); +void br_transmit_tcn(struct net_bridge *br); +void br_topology_change_detection(struct net_bridge *br); /* br_stp_bpdu.c */ -extern void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); -extern void br_send_tcn_bpdu(struct net_bridge_port *); +void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); +void br_send_tcn_bpdu(struct net_bridge_port *); #endif diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 108084a04671..656a6f3e40de 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -134,7 +134,7 @@ static void br_stp_start(struct net_bridge *br) if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); - else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY) + else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); if (r == 0) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9a9ffe7e4019..53f0990eab58 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -45,37 +45,34 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) return 0; } - if (vid) { - if (v->port_idx) { - p = v->parent.port; - br = p->br; - dev = p->dev; - } else { - br = v->parent.br; - dev = br->dev; - } - ops = dev->netdev_ops; - - if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { - /* Add VLAN to the device filter if it is supported. - * Stricly speaking, this is not necessary now, since - * devices are made promiscuous by the bridge, but if - * that ever changes this code will allow tagged - * traffic to enter the bridge. - */ - err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), - vid); - if (err) - return err; - } - - err = br_fdb_insert(br, p, dev->dev_addr, vid); - if (err) { - br_err(br, "failed insert local address into bridge " - "forwarding table\n"); - goto out_filt; - } + if (v->port_idx) { + p = v->parent.port; + br = p->br; + dev = p->dev; + } else { + br = v->parent.br; + dev = br->dev; + } + ops = dev->netdev_ops; + + if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { + /* Add VLAN to the device filter if it is supported. + * Stricly speaking, this is not necessary now, since + * devices are made promiscuous by the bridge, but if + * that ever changes this code will allow tagged + * traffic to enter the bridge. + */ + err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), + vid); + if (err) + return err; + } + err = br_fdb_insert(br, p, dev->dev_addr, vid); + if (err) { + br_err(br, "failed insert local address into bridge " + "forwarding table\n"); + goto out_filt; } set_bit(vid, v->vlan_bitmap); @@ -98,7 +95,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) __vlan_delete_pvid(v, vid); clear_bit(vid, v->untagged_bitmap); - if (v->port_idx && vid) { + if (v->port_idx) { struct net_device *dev = v->parent.port->dev; const struct net_device_ops *ops = dev->netdev_ops; @@ -192,6 +189,8 @@ out: bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, struct sk_buff *skb, u16 *vid) { + int err; + /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ @@ -204,20 +203,32 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) return false; - if (br_vlan_get_tag(skb, vid)) { + err = br_vlan_get_tag(skb, vid); + if (!*vid) { u16 pvid = br_get_pvid(v); - /* Frame did not have a tag. See if pvid is set - * on this port. That tells us which vlan untagged - * traffic belongs to. + /* Frame had a tag with VID 0 or did not have a tag. + * See if pvid is set on this port. That tells us which + * vlan untagged or priority-tagged traffic belongs to. */ if (pvid == VLAN_N_VID) return false; - /* PVID is set on this port. Any untagged ingress - * frame is considered to belong to this vlan. + /* PVID is set on this port. Any untagged or priority-tagged + * ingress frame is considered to belong to this vlan. */ - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + *vid = pvid; + if (likely(err)) + /* Untagged Frame. */ + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + else + /* Priority-tagged Frame. + * At this point, We know that skb->vlan_tci had + * VLAN_TAG_PRESENT bit and its VID field was 0x000. + * We update only VID field and preserve PCP field. + */ + skb->vlan_tci |= pvid; + return true; } @@ -248,7 +259,9 @@ bool br_allowed_egress(struct net_bridge *br, return false; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; @@ -278,7 +291,9 @@ out: return err; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int br_vlan_delete(struct net_bridge *br, u16 vid) { struct net_port_vlans *pv; @@ -289,14 +304,9 @@ int br_vlan_delete(struct net_bridge *br, u16 vid) if (!pv) return -EINVAL; - if (vid) { - /* If the VID !=0 remove fdb for this vid. VID 0 is special - * in that it's the default and is always there in the fdb. - */ - spin_lock_bh(&br->hash_lock); - fdb_delete_by_addr(br, br->dev->dev_addr, vid); - spin_unlock_bh(&br->hash_lock); - } + spin_lock_bh(&br->hash_lock); + fdb_delete_by_addr(br, br->dev->dev_addr, vid); + spin_unlock_bh(&br->hash_lock); __vlan_del(pv, vid); return 0; @@ -329,7 +339,9 @@ unlock: return 0; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; @@ -363,7 +375,9 @@ clean_up: return err; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) { struct net_port_vlans *pv; @@ -374,14 +388,9 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) if (!pv) return -EINVAL; - if (vid) { - /* If the VID !=0 remove fdb for this vid. VID 0 is special - * in that it's the default and is always there in the fdb. - */ - spin_lock_bh(&port->br->hash_lock); - fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); - spin_unlock_bh(&port->br->hash_lock); - } + spin_lock_bh(&port->br->hash_lock); + fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); + spin_unlock_bh(&port->br->hash_lock); return __vlan_del(pv, vid); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 518093802d1d..7c470c371e14 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -181,6 +181,7 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* Fill in the ulog data */ pm->version = EBT_ULOG_VERSION; @@ -193,8 +194,6 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, pm->hook = hooknr; if (uloginfo->prefix != NULL) strcpy(pm->prefix, uloginfo->prefix); - else - *(pm->prefix) = '\0'; if (in) { strcpy(pm->physindev, in->name); @@ -204,16 +203,14 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); else strcpy(pm->indev, in->name); - } else - pm->indev[0] = pm->physindev[0] = '\0'; + } if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); /* rcu_read_lock()ed by nf_hook_slow */ strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); - } else - pm->outdev[0] = pm->physoutdev[0] = '\0'; + } if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) BUG(); diff --git a/net/can/af_can.h b/net/can/af_can.h index 1dccb4c33894..6de58b40535c 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -108,9 +108,9 @@ struct s_pstats { extern struct dev_rcv_lists can_rx_alldev_list; /* function prototypes for the CAN networklayer procfs (proc.c) */ -extern void can_init_proc(void); -extern void can_remove_proc(void); -extern void can_stat_update(unsigned long data); +void can_init_proc(void); +void can_remove_proc(void); +void can_stat_update(unsigned long data); /* structures and variables from af_can.c needed in proc.c for reading */ extern struct timer_list can_stattimer; /* timer for statistics update */ diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h index ed7d088b1bc9..059a3ce4b53f 100644 --- a/net/ceph/auth_none.h +++ b/net/ceph/auth_none.h @@ -23,7 +23,7 @@ struct ceph_auth_none_info { struct ceph_none_authorizer au; /* we only need one; it's static */ }; -extern int ceph_auth_none_init(struct ceph_auth_client *ac); +int ceph_auth_none_init(struct ceph_auth_client *ac); #endif diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index c5a058da7ac8..65ee72082d99 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -45,7 +45,7 @@ struct ceph_x_info { struct ceph_x_authorizer auth_authorizer; }; -extern int ceph_x_init(struct ceph_auth_client *ac); +int ceph_x_init(struct ceph_auth_client *ac); #endif diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 3572dc518bc9..d1498224c49d 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -20,34 +20,32 @@ static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) kfree(key->key); } -extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst, - const struct ceph_crypto_key *src); -extern int ceph_crypto_key_encode(struct ceph_crypto_key *key, - void **p, void *end); -extern int ceph_crypto_key_decode(struct ceph_crypto_key *key, - void **p, void *end); -extern int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); +int ceph_crypto_key_clone(struct ceph_crypto_key *dst, + const struct ceph_crypto_key *src); +int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end); +int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end); +int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); /* crypto.c */ -extern int ceph_decrypt(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src, size_t src_len); -extern int ceph_encrypt(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src, size_t src_len); -extern int ceph_decrypt2(struct ceph_crypto_key *secret, - void *dst1, size_t *dst1_len, - void *dst2, size_t *dst2_len, - const void *src, size_t src_len); -extern int ceph_encrypt2(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src1, size_t src1_len, - const void *src2, size_t src2_len); -extern int ceph_crypto_init(void); -extern void ceph_crypto_shutdown(void); +int ceph_decrypt(struct ceph_crypto_key *secret, + void *dst, size_t *dst_len, + const void *src, size_t src_len); +int ceph_encrypt(struct ceph_crypto_key *secret, + void *dst, size_t *dst_len, + const void *src, size_t src_len); +int ceph_decrypt2(struct ceph_crypto_key *secret, + void *dst1, size_t *dst1_len, + void *dst2, size_t *dst2_len, + const void *src, size_t src_len); +int ceph_encrypt2(struct ceph_crypto_key *secret, + void *dst, size_t *dst_len, + const void *src1, size_t src1_len, + const void *src2, size_t src2_len); +int ceph_crypto_init(void); +void ceph_crypto_shutdown(void); /* armor.c */ -extern int ceph_armor(char *dst, const char *src, const char *end); -extern int ceph_unarmor(char *dst, const char *src, const char *end); +int ceph_armor(char *dst, const char *src, const char *end); +int ceph_unarmor(char *dst, const char *src, const char *end); #endif diff --git a/net/core/datagram.c b/net/core/datagram.c index af814e764206..a16ed7bbe376 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -577,7 +577,7 @@ EXPORT_SYMBOL(skb_copy_datagram_from_iovec); /** * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec * @skb: buffer to copy - * @from: io vector to copy to + * @from: io vector to copy from * @offset: offset in the io vector to start copying from * @count: amount of vectors to copy to buffer from * diff --git a/net/core/dev.c b/net/core/dev.c index 1b6eadf69289..0e6136546a8c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1203,7 +1203,7 @@ void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { call_netdevice_notifiers(NETDEV_CHANGE, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } } EXPORT_SYMBOL(netdev_state_change); @@ -1293,7 +1293,7 @@ int dev_open(struct net_device *dev) if (ret < 0) return ret; - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_UP, dev); return ret; @@ -1371,7 +1371,7 @@ static int dev_close_many(struct list_head *head) __dev_close_many(head); list_for_each_entry_safe(dev, tmp, head, close_list) { - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); list_del_init(&dev->close_list); } @@ -2377,6 +2377,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, } SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + SKB_GSO_CB(skb)->encap_level = 0; + skb_reset_mac_header(skb); skb_reset_mac_len(skb); @@ -4646,6 +4648,7 @@ remove_symlinks: free_adj: kfree(adj); + dev_put(adj_dev); return ret; } @@ -5255,7 +5258,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, unsigned int changes = dev->flags ^ old_flags; if (gchanges) - rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges); + rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC); if (changes & IFF_UP) { if (dev->flags & IFF_UP) @@ -5487,7 +5490,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); /* * Flush the unicast and multicast chains @@ -5886,7 +5889,7 @@ int register_netdevice(struct net_device *dev) */ if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); out: return ret; @@ -6193,6 +6196,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); +void netdev_freemem(struct net_device *dev) +{ + char *addr = (char *)dev - dev->padded; + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6236,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL); + p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!p) + p = vzalloc(alloc_size); if (!p) return NULL; @@ -6245,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) - goto free_p; + goto free_dev; if (dev_addr_init(dev)) goto free_pcpu; @@ -6298,8 +6313,8 @@ free_pcpu: kfree(dev->_rx); #endif -free_p: - kfree(p); +free_dev: + netdev_freemem(dev); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -6336,7 +6351,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); + netdev_freemem(dev); return; } @@ -6498,7 +6513,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); /* * Flush the unicast and multicast chains @@ -6537,7 +6552,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char * Prevent userspace races by waiting until the network * device is fully setup before sending notifications. */ - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); synchronize_net(); err = 0; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6cda4e2c2132..ec40a849fc42 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -752,7 +752,7 @@ int dev_mc_del_global(struct net_device *dev, const unsigned char *addr) EXPORT_SYMBOL(dev_mc_del_global); /** - * dev_mc_sync - Synchronize device's unicast list to another device + * dev_mc_sync - Synchronize device's multicast list to another device * @to: destination device * @from: source device * @@ -780,7 +780,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_mc_sync); /** - * dev_mc_sync_multiple - Synchronize device's unicast list to another + * dev_mc_sync_multiple - Synchronize device's multicast list to another * device, but allow for multiple calls to sync to multiple devices. * @to: destination device * @from: source device diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 78e9d9223e40..862989898f61 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -81,6 +81,8 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", + [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f8e25ac41c6c..0242035192f1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -66,7 +66,7 @@ again: struct iphdr _iph; ip: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); - if (!iph) + if (!iph || iph->ihl < 5) return false; if (ip_is_fragment(iph)) @@ -184,6 +184,22 @@ ipv6: EXPORT_SYMBOL(skb_flow_dissect); static u32 hashrnd __read_mostly; +static __always_inline void __flow_hash_secret_init(void) +{ + net_get_random_once(&hashrnd, sizeof(hashrnd)); +} + +static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) +{ + __flow_hash_secret_init(); + return jhash_3words(a, b, c, hashrnd); +} + +static __always_inline u32 __flow_hash_1word(u32 a) +{ + __flow_hash_secret_init(); + return jhash_1word(a, hashrnd); +} /* * __skb_get_rxhash: calculate a flow hash based on src/dst addresses @@ -210,9 +226,9 @@ void __skb_get_rxhash(struct sk_buff *skb) swap(keys.port16[0], keys.port16[1]); } - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, hashrnd); + hash = __flow_hash_3words((__force u32)keys.dst, + (__force u32)keys.src, + (__force u32)keys.ports); if (!hash) hash = 1; @@ -248,7 +264,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol; - hash = jhash_1word(hash, hashrnd); + hash = __flow_hash_1word(hash); return (u16) (((u64) hash * qcount) >> 32) + qoffset; } @@ -340,7 +356,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) else hash = (__force u16) skb->protocol ^ skb->rxhash; - hash = jhash_1word(hash, hashrnd); + hash = __flow_hash_1word(hash); queue_index = map->queues[ ((u64)hash * map->len) >> 32]; } @@ -395,11 +411,3 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); } - -static int __init initialize_hashrnd(void) -{ - get_random_bytes(&hashrnd, sizeof(hashrnd)); - return 0; -} - -late_initcall_sync(initialize_hashrnd); diff --git a/net/core/iovec.c b/net/core/iovec.c index b77eeecc0011..4cdb7c48dad6 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -100,7 +100,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, EXPORT_SYMBOL(memcpy_toiovecend); /* - * Copy iovec from kernel. Returns -EFAULT on error. + * Copy iovec to kernel. Returns -EFAULT on error. */ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d954b56b4e47..d03f2c9750fa 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); kfree(dev->ifalias); - kfree((char *)dev - dev->padded); + netdev_freemem(dev); } static const void *net_namespace(struct device *d) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc75c9e461b8..8f971990677c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address, we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb) int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { - int proto, len, ulen; - int hits = 0; + int proto, len, ulen, data_len; + int hits = 0, offset; const struct iphdr *iph; struct udphdr *uh; struct netpoll *np, *tmp; + uint16_t source; if (list_empty(&npinfo->rx_np)) goto out; @@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) len -= iph->ihl*4; uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != len) goto out; @@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } } else { @@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; uh = udp_hdr(skb); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != skb->len) goto out; if (udp6_csum_init(skb, uh, IPPROTO_UDP)) @@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } #endif @@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_hook) { + if (np->rx_skb_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_flags |= NETPOLL_RX_ENABLED; list_add_tail(&np->rx, &npinfo->rx_np); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4aedf03da052..cf67144d3e3c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change) +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, + gfp_t flags) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; size_t if_info_size; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags); if (skb == NULL) goto errout; @@ -2002,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change) kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); return; errout: if (err < 0) @@ -2716,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_JOIN: break; default: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; } return NOTIFY_DONE; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 3f1ec1586ae1..897da56f3aff 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -7,28 +7,20 @@ #include <linux/hrtimer.h> #include <linux/ktime.h> #include <linux/string.h> +#include <linux/net.h> #include <net/secure_seq.h> +#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; -static void net_secret_init(void) +static __always_inline void net_secret_init(void) { - u32 tmp; - int i; - - if (likely(net_secret[0])) - return; - - for (i = NET_SECRET_SIZE; i > 0;) { - do { - get_random_bytes(&tmp, sizeof(tmp)); - } while (!tmp); - cmpxchg(&net_secret[--i], 0, tmp); - } + net_get_random_once(net_secret, sizeof(net_secret)); } +#endif #ifdef CONFIG_INET static u32 seq_scale(u32 seq) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8ead744fcc94..e4115597b38b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -903,6 +903,9 @@ EXPORT_SYMBOL(skb_clone); static void skb_headers_offset_update(struct sk_buff *skb, int off) { + /* Only adjust this if it actually is csum_start rather than csum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += off; /* {transport,network,mac}_header and tail are relative to skb->head */ skb->transport_header += off; skb->network_header += off; @@ -1109,9 +1112,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, #endif skb->tail += off; skb_headers_offset_update(skb, nhead); - /* Only adjust this if it actually is csum_start rather than csum */ - if (skb->ip_summed == CHECKSUM_PARTIAL) - skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -1176,7 +1176,6 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, NUMA_NO_NODE); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; - int off; if (!n) return NULL; @@ -1200,11 +1199,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); - off = newheadroom - oldheadroom; - if (n->ip_summed == CHECKSUM_PARTIAL) - n->csum_start += off; - - skb_headers_offset_update(n, off); + skb_headers_offset_update(n, newheadroom - oldheadroom); return n; } @@ -1933,9 +1928,8 @@ fault: EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ - -__wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum) +__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum, const struct skb_checksum_ops *ops) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1946,7 +1940,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - csum = csum_partial(skb->data + offset, copy, csum); + csum = ops->update(skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; @@ -1967,10 +1961,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap_atomic(skb_frag_page(frag)); - csum2 = csum_partial(vaddr + frag->page_offset + - offset - start, copy, 0); + csum2 = ops->update(vaddr + frag->page_offset + + offset - start, copy, 0); kunmap_atomic(vaddr); - csum = csum_block_add(csum, csum2, pos); + csum = ops->combine(csum, csum2, pos, copy); if (!(len -= copy)) return csum; offset += copy; @@ -1989,9 +1983,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, __wsum csum2; if (copy > len) copy = len; - csum2 = skb_checksum(frag_iter, offset - start, - copy, 0); - csum = csum_block_add(csum, csum2, pos); + csum2 = __skb_checksum(frag_iter, offset - start, + copy, 0, ops); + csum = ops->combine(csum, csum2, pos, copy); if ((len -= copy) == 0) return csum; offset += copy; @@ -2003,6 +1997,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, return csum; } +EXPORT_SYMBOL(__skb_checksum); + +__wsum skb_checksum(const struct sk_buff *skb, int offset, + int len, __wsum csum) +{ + const struct skb_checksum_ops ops = { + .update = csum_partial_ext, + .combine = csum_block_add_ext, + }; + + return __skb_checksum(skb, offset, len, csum, &ops); +} EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ @@ -2837,14 +2843,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) __copy_skb_header(nskb, skb); nskb->mac_len = skb->mac_len; - /* nskb and skb might have different headroom */ - if (nskb->ip_summed == CHECKSUM_PARTIAL) - nskb->csum_start += skb_headroom(nskb) - headroom; - - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb->mac_len); - nskb->transport_header = (nskb->network_header + - skb_network_header_len(skb)); + skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, diff --git a/net/core/sock.c b/net/core/sock.c index fd6afa267475..ab20ed9b0f31 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -475,12 +475,6 @@ discard_and_relse: } EXPORT_SYMBOL(sk_receive_skb); -void sk_reset_txq(struct sock *sk) -{ - sk_tx_queue_clear(sk); -} -EXPORT_SYMBOL(sk_reset_txq); - struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = __sk_dst_get(sk); @@ -1847,7 +1841,17 @@ EXPORT_SYMBOL(sock_alloc_send_skb); /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) -bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +/** + * skb_page_frag_refill - check that a page_frag contains enough room + * @sz: minimum size of the fragment we want to get + * @pfrag: pointer to page_frag + * @prio: priority for memory allocation + * + * Note: While this allocator tries to use high order pages, there is + * no guarantee that allocations succeed. Therefore, @sz MUST be + * less or equal than PAGE_SIZE. + */ +bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) { int order; @@ -1856,16 +1860,16 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) pfrag->offset = 0; return true; } - if (pfrag->offset < pfrag->size) + if (pfrag->offset + sz <= pfrag->size) return true; put_page(pfrag->page); } /* We restrict high order allocations to users that can afford to wait */ - order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; + order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; do { - gfp_t gfp = sk->sk_allocation; + gfp_t gfp = prio; if (order) gfp |= __GFP_COMP | __GFP_NOWARN; @@ -1877,6 +1881,15 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) } } while (--order >= 0); + return false; +} +EXPORT_SYMBOL(skb_page_frag_refill); + +bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +{ + if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation))) + return true; + sk_enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); return false; diff --git a/net/core/utils.c b/net/core/utils.c index aa88e23fc87a..2f737bf90b3f 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -338,3 +338,52 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, csum_unfold(*sum))); } EXPORT_SYMBOL(inet_proto_csum_replace16); + +struct __net_random_once_work { + struct work_struct work; + struct static_key *key; +}; + +static void __net_random_once_deferred(struct work_struct *w) +{ + struct __net_random_once_work *work = + container_of(w, struct __net_random_once_work, work); + if (!static_key_enabled(work->key)) + static_key_slow_inc(work->key); + kfree(work); +} + +static void __net_random_once_disable_jump(struct static_key *key) +{ + struct __net_random_once_work *w; + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return; + + INIT_WORK(&w->work, __net_random_once_deferred); + w->key = key; + schedule_work(&w->work); +} + +bool __net_get_random_once(void *buf, int nbytes, bool *done, + struct static_key *done_key) +{ + static DEFINE_SPINLOCK(lock); + unsigned long flags; + + spin_lock_irqsave(&lock, flags); + if (*done) { + spin_unlock_irqrestore(&lock, flags); + return false; + } + + get_random_bytes(buf, nbytes); + *done = true; + spin_unlock_irqrestore(&lock, flags); + + __net_random_once_disable_jump(done_key); + + return true; +} +EXPORT_SYMBOL(__net_get_random_once); diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index a269aa7f7923..3284bfa988c0 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -101,16 +101,16 @@ struct dccp_ackvec_record { u8 avr_ack_nonce:1; }; -extern int dccp_ackvec_init(void); -extern void dccp_ackvec_exit(void); +int dccp_ackvec_init(void); +void dccp_ackvec_exit(void); -extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); -extern void dccp_ackvec_free(struct dccp_ackvec *av); +struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); +void dccp_ackvec_free(struct dccp_ackvec *av); -extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); -extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); -extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); -extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); +void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); +int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); +void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); +u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) { @@ -133,7 +133,6 @@ struct dccp_ackvec_parsed { struct list_head node; }; -extern int dccp_ackvec_parsed_add(struct list_head *head, - u8 *vec, u8 len, u8 nonce); -extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); +int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce); +void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); #endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index fb85d371a8de..6eb837a47b5c 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -93,8 +93,8 @@ extern struct ccid_operations ccid2_ops; extern struct ccid_operations ccid3_ops; #endif -extern int ccid_initialize_builtins(void); -extern void ccid_cleanup_builtins(void); +int ccid_initialize_builtins(void); +void ccid_cleanup_builtins(void); struct ccid { struct ccid_operations *ccid_ops; @@ -106,12 +106,12 @@ static inline void *ccid_priv(const struct ccid *ccid) return (void *)ccid->ccid_priv; } -extern bool ccid_support_check(u8 const *ccid_array, u8 array_len); -extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); -extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *, int __user *); +bool ccid_support_check(u8 const *ccid_array, u8 array_len); +int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); +int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *, int __user *); -extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); +struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) { @@ -131,8 +131,8 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) return ccid->ccid_ops->ccid_id; } -extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); -extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); +void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); +void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); /* * Congestion control of queued data packets via CCID decision. diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index d1d2f5383b7d..57f631a86ccd 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -65,9 +65,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) struct tfrc_rx_hist; -extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, - u32 (*first_li)(struct sock *), struct sock *); -extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); -extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); +int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, + u32 (*first_li)(struct sock *), struct sock *); +u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); +void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 7ee4a9d9d335..ee362b0b630d 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -60,8 +60,8 @@ static inline struct tfrc_tx_hist_entry * return head; } -extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); -extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); +int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); +void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) @@ -139,20 +139,17 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) return h->loss_count > 0; } -extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, - const struct sk_buff *skb, const u64 ndp); +void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, + const u64 ndp); -extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); +int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; -extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *sk), - struct sock *sk); -extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, - const struct sk_buff *skb); -extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); -extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); +int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *sk), struct sock *sk); +u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); +int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); +void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */ diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ed698c42a5fb..40ee7d62b652 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -55,21 +55,21 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; } -extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); -extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); -extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); +u32 tfrc_calc_x(u16 s, u32 R, u32 p); +u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); -extern int tfrc_tx_packet_history_init(void); -extern void tfrc_tx_packet_history_exit(void); -extern int tfrc_rx_packet_history_init(void); -extern void tfrc_rx_packet_history_exit(void); +int tfrc_tx_packet_history_init(void); +void tfrc_tx_packet_history_exit(void); +int tfrc_rx_packet_history_init(void); +void tfrc_rx_packet_history_exit(void); -extern int tfrc_li_init(void); -extern void tfrc_li_exit(void); +int tfrc_li_init(void); +void tfrc_li_exit(void); #ifdef CONFIG_IP_DCCP_TFRC_LIB -extern int tfrc_lib_init(void); -extern void tfrc_lib_exit(void); +int tfrc_lib_init(void); +void tfrc_lib_exit(void); #else #define tfrc_lib_init() (0) #define tfrc_lib_exit() diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 708e75bf623d..30948784dd58 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -53,7 +53,7 @@ extern struct inet_hashinfo dccp_hashinfo; extern struct percpu_counter dccp_orphan_count; -extern void dccp_time_wait(struct sock *sk, int state, int timeo); +void dccp_time_wait(struct sock *sk, int state, int timeo); /* * Set safe upper bounds for header and option length. Since Data Offset is 8 @@ -224,114 +224,108 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb) skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0); } -extern void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb); +void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb); -extern int dccp_retransmit_skb(struct sock *sk); +int dccp_retransmit_skb(struct sock *sk); -extern void dccp_send_ack(struct sock *sk); -extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, - struct request_sock *rsk); +void dccp_send_ack(struct sock *sk); +void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *rsk); -extern void dccp_send_sync(struct sock *sk, const u64 seq, - const enum dccp_pkt_type pkt_type); +void dccp_send_sync(struct sock *sk, const u64 seq, + const enum dccp_pkt_type pkt_type); /* * TX Packet Dequeueing Interface */ -extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); -extern bool dccp_qpolicy_full(struct sock *sk); -extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); -extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); -extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); -extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); +void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); +bool dccp_qpolicy_full(struct sock *sk); +void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); +struct sk_buff *dccp_qpolicy_top(struct sock *sk); +struct sk_buff *dccp_qpolicy_pop(struct sock *sk); +bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); /* * TX Packet Output and TX Timers */ -extern void dccp_write_xmit(struct sock *sk); -extern void dccp_write_space(struct sock *sk); -extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); +void dccp_write_xmit(struct sock *sk); +void dccp_write_space(struct sock *sk); +void dccp_flush_write_queue(struct sock *sk, long *time_budget); -extern void dccp_init_xmit_timers(struct sock *sk); +void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) { inet_csk_clear_xmit_timers(sk); } -extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); +unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); -extern const char *dccp_packet_name(const int type); +const char *dccp_packet_name(const int type); -extern void dccp_set_state(struct sock *sk, const int state); -extern void dccp_done(struct sock *sk); +void dccp_set_state(struct sock *sk, const int state); +void dccp_done(struct sock *sk); -extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, - struct sk_buff const *skb); +int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, + struct sk_buff const *skb); -extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); -extern struct sock *dccp_create_openreq_child(struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); +struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb); -extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); +int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); -extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst); -extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev); +struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev); -extern int dccp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb); -extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct dccp_hdr *dh, unsigned int len); -extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct dccp_hdr *dh, const unsigned int len); +int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned int len); +int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned int len); -extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); -extern void dccp_destroy_sock(struct sock *sk); +int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); +void dccp_destroy_sock(struct sock *sk); -extern void dccp_close(struct sock *sk, long timeout); -extern struct sk_buff *dccp_make_response(struct sock *sk, - struct dst_entry *dst, - struct request_sock *req); +void dccp_close(struct sock *sk, long timeout); +struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, + struct request_sock *req); -extern int dccp_connect(struct sock *sk); -extern int dccp_disconnect(struct sock *sk, int flags); -extern int dccp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -extern int dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int dccp_connect(struct sock *sk); +int dccp_disconnect(struct sock *sk, int flags); +int dccp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +int dccp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); #ifdef CONFIG_COMPAT -extern int compat_dccp_getsockopt(struct sock *sk, - int level, int optname, - char __user *optval, int __user *optlen); -extern int compat_dccp_setsockopt(struct sock *sk, - int level, int optname, - char __user *optval, unsigned int optlen); +int compat_dccp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +int compat_dccp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); #endif -extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size); -extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len); -extern void dccp_shutdown(struct sock *sk, int how); -extern int inet_dccp_listen(struct socket *sock, int backlog); -extern unsigned int dccp_poll(struct file *file, struct socket *sock, - poll_table *wait); -extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len); - -extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk, - struct sk_buff *skb); -extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); -extern void dccp_send_close(struct sock *sk, const int active); -extern int dccp_invalid_packet(struct sk_buff *skb); -extern u32 dccp_sample_rtt(struct sock *sk, long delta); +int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t size); +int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int nonblock, int flags, + int *addr_len); +void dccp_shutdown(struct sock *sk, int how); +int inet_dccp_listen(struct socket *sock, int backlog); +unsigned int dccp_poll(struct file *file, struct socket *sock, + poll_table *wait); +int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); + +struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb); +int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); +void dccp_send_close(struct sock *sk, const int active); +int dccp_invalid_packet(struct sk_buff *skb); +u32 dccp_sample_rtt(struct sock *sk, long delta); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -475,25 +469,25 @@ static inline int dccp_ack_pending(const struct sock *sk) return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); } -extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); -extern int dccp_feat_finalise_settings(struct dccp_sock *dp); -extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); -extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, - struct sk_buff *skb); -extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn); -extern void dccp_feat_list_purge(struct list_head *fn_list); - -extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); -extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); -extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); -extern u32 dccp_timestamp(void); -extern void dccp_timestamping_init(void); -extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, - const void *value, unsigned char len); +int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); +int dccp_feat_finalise_settings(struct dccp_sock *dp); +int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); +int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, + struct sk_buff *skb); +int dccp_feat_activate_values(struct sock *sk, struct list_head *fn); +void dccp_feat_list_purge(struct list_head *fn_list); + +int dccp_insert_options(struct sock *sk, struct sk_buff *skb); +int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *); +int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); +u32 dccp_timestamp(void); +void dccp_timestamping_init(void); +int dccp_insert_option(struct sk_buff *skb, unsigned char option, + const void *value, unsigned char len); #ifdef CONFIG_SYSCTL -extern int dccp_sysctl_init(void); -extern void dccp_sysctl_exit(void); +int dccp_sysctl_init(void); +void dccp_sysctl_exit(void); #else static inline int dccp_sysctl_init(void) { diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 90b957d34d26..0e75cebb2187 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -107,13 +107,13 @@ extern unsigned long sysctl_dccp_sequence_window; extern int sysctl_dccp_rx_ccid; extern int sysctl_dccp_tx_ccid; -extern int dccp_feat_init(struct sock *sk); -extern void dccp_feat_initialise_sysctls(void); -extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, - u8 const *list, u8 len); -extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, - u8 mand, u8 opt, u8 feat, u8 *val, u8 len); -extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); +int dccp_feat_init(struct sock *sk); +void dccp_feat_initialise_sysctls(void); +int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len); +int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, + u8 mand, u8 opt, u8 feat, u8 *val, u8 len); +int dccp_feat_clone_list(struct list_head const *, struct list_head *); /* * Encoding variable-length options and their maximum length. @@ -127,11 +127,11 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); */ #define DCCP_OPTVAL_MAXLEN 6 -extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); -extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); -extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat); +void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); +u64 dccp_decode_value_var(const u8 *bf, const u8 len); +u64 dccp_feat_nn_get(struct sock *sk, u8 feat); -extern int dccp_insert_option_mandatory(struct sk_buff *skb); -extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len, bool repeat_first); +int dccp_insert_option_mandatory(struct sk_buff *skb); +int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *val, u8 len, + bool repeat_first); #endif /* _DCCP_FEAT_H */ diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig new file mode 100644 index 000000000000..0d3d709052ca --- /dev/null +++ b/net/hsr/Kconfig @@ -0,0 +1,27 @@ +# +# IEC 62439-3 High-availability Seamless Redundancy +# + +config HSR + tristate "High-availability Seamless Redundancy (HSR)" + ---help--- + If you say Y here, then your Linux box will be able to act as a + DANH ("Doubly attached node implementing HSR"). For this to work, + your Linux box needs (at least) two physical Ethernet interfaces, + and it must be connected as a node in a ring network together with + other HSR capable nodes. + + All Ethernet frames sent over the hsr device will be sent in both + directions on the ring (over both slave ports), giving a redundant, + instant fail-over network. Each HSR node in the ring acts like a + bridge for HSR frames, but filters frames that have been forwarded + earlier. + + This code is a "best effort" to comply with the HSR standard as + described in IEC 62439-3:2010 (HSRv0), but no compliancy tests have + been made. + + You need to perform any and all necessary tests yourself before + relying on this code in a safety critical system! + + If unsure, say N. diff --git a/net/hsr/Makefile b/net/hsr/Makefile new file mode 100644 index 000000000000..b68359f181cc --- /dev/null +++ b/net/hsr/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for HSR +# + +obj-$(CONFIG_HSR) += hsr.o + +hsr-y := hsr_main.o hsr_framereg.o hsr_device.o hsr_netlink.o diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c new file mode 100644 index 000000000000..cac505f166d5 --- /dev/null +++ b/net/hsr/hsr_device.c @@ -0,0 +1,596 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * This file contains device methods for creating, using and destroying + * virtual HSR devices. + */ + +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include <linux/rtnetlink.h> +#include <linux/pkt_sched.h> +#include "hsr_device.h" +#include "hsr_framereg.h" +#include "hsr_main.h" + + +static bool is_admin_up(struct net_device *dev) +{ + return dev && (dev->flags & IFF_UP); +} + +static bool is_slave_up(struct net_device *dev) +{ + return dev && is_admin_up(dev) && netif_oper_up(dev); +} + +static void __hsr_set_operstate(struct net_device *dev, int transition) +{ + write_lock_bh(&dev_base_lock); + if (dev->operstate != transition) { + dev->operstate = transition; + write_unlock_bh(&dev_base_lock); + netdev_state_change(dev); + } else { + write_unlock_bh(&dev_base_lock); + } +} + +void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2) +{ + if (!is_admin_up(hsr_dev)) { + __hsr_set_operstate(hsr_dev, IF_OPER_DOWN); + return; + } + + if (is_slave_up(slave1) || is_slave_up(slave2)) + __hsr_set_operstate(hsr_dev, IF_OPER_UP); + else + __hsr_set_operstate(hsr_dev, IF_OPER_LOWERLAYERDOWN); +} + +void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2) +{ + if (is_slave_up(slave1) || is_slave_up(slave2)) + netif_carrier_on(hsr_dev); + else + netif_carrier_off(hsr_dev); +} + + +void hsr_check_announce(struct net_device *hsr_dev, int old_operstate) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(hsr_dev); + + if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { + /* Went up */ + hsr_priv->announce_count = 0; + hsr_priv->announce_timer.expires = jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + add_timer(&hsr_priv->announce_timer); + } + + if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) + /* Went down */ + del_timer(&hsr_priv->announce_timer); +} + + +int hsr_get_max_mtu(struct hsr_priv *hsr_priv) +{ + int mtu_max; + + if (hsr_priv->slave[0] && hsr_priv->slave[1]) + mtu_max = min(hsr_priv->slave[0]->mtu, hsr_priv->slave[1]->mtu); + else if (hsr_priv->slave[0]) + mtu_max = hsr_priv->slave[0]->mtu; + else if (hsr_priv->slave[1]) + mtu_max = hsr_priv->slave[1]->mtu; + else + mtu_max = HSR_TAGLEN; + + return mtu_max - HSR_TAGLEN; +} + +static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(dev); + + if (new_mtu > hsr_get_max_mtu(hsr_priv)) { + netdev_info(hsr_priv->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", + HSR_TAGLEN); + return -EINVAL; + } + + dev->mtu = new_mtu; + + return 0; +} + +static int hsr_dev_open(struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + int i; + char *slave_name; + + hsr_priv = netdev_priv(dev); + + for (i = 0; i < HSR_MAX_SLAVE; i++) { + if (hsr_priv->slave[i]) + slave_name = hsr_priv->slave[i]->name; + else + slave_name = "null"; + + if (!is_slave_up(hsr_priv->slave[i])) + netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a working HSR network\n", + 'A' + i, slave_name); + } + + return 0; +} + +static int hsr_dev_close(struct net_device *dev) +{ + /* Nothing to do here. We could try to restore the state of the slaves + * to what they were before being changed by the hsr master dev's state, + * but they might have been changed manually in the mean time too, so + * taking them up or down here might be confusing and is probably not a + * good idea. + */ + return 0; +} + + +static void hsr_fill_tag(struct hsr_ethhdr *hsr_ethhdr, struct hsr_priv *hsr_priv) +{ + unsigned long irqflags; + + /* IEC 62439-1:2010, p 48, says the 4-bit "path" field can take values + * between 0001-1001 ("ring identifier", for regular HSR frames), + * or 1111 ("HSR management", supervision frames). Unfortunately, the + * spec writers forgot to explain what a "ring identifier" is, or + * how it is used. So we just set this to 0001 for regular frames, + * and 1111 for supervision frames. + */ + set_hsr_tag_path(&hsr_ethhdr->hsr_tag, 0x1); + + /* IEC 62439-1:2010, p 12: "The link service data unit in an Ethernet + * frame is the content of the frame located between the Length/Type + * field and the Frame Check Sequence." + * + * IEC 62439-3, p 48, specifies the "original LPDU" to include the + * original "LT" field (what "LT" means is not explained anywhere as + * far as I can see - perhaps "Length/Type"?). So LSDU_size might + * equal original length + 2. + * Also, the fact that this field is not used anywhere (might be used + * by a RedBox connecting HSR and PRP nets?) means I cannot test its + * correctness. Instead of guessing, I set this to 0 here, to make any + * problems immediately apparent. Anyone using this driver with PRP/HSR + * RedBoxes might need to fix this... + */ + set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, 0); + + spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); + hsr_ethhdr->hsr_tag.sequence_nr = htons(hsr_priv->sequence_nr); + hsr_priv->sequence_nr++; + spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + + hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; + + hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); +} + +static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv, + enum hsr_dev_idx dev_idx) +{ + struct hsr_ethhdr *hsr_ethhdr; + + hsr_ethhdr = (struct hsr_ethhdr *) skb->data; + + skb->dev = hsr_priv->slave[dev_idx]; + + hsr_addr_subst_dest(hsr_priv, &hsr_ethhdr->ethhdr, dev_idx); + + /* Address substitution (IEC62439-3 pp 26, 50): replace mac + * address of outgoing frame with that of the outgoing slave's. + */ + memcpy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr, ETH_ALEN); + + return dev_queue_xmit(skb); +} + + +static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + struct hsr_ethhdr *hsr_ethhdr; + struct sk_buff *skb2; + int res1, res2; + + hsr_priv = netdev_priv(dev); + hsr_ethhdr = (struct hsr_ethhdr *) skb->data; + + if ((skb->protocol != htons(ETH_P_PRP)) || + (hsr_ethhdr->ethhdr.h_proto != htons(ETH_P_PRP))) { + hsr_fill_tag(hsr_ethhdr, hsr_priv); + skb->protocol = htons(ETH_P_PRP); + } + + skb2 = pskb_copy(skb, GFP_ATOMIC); + + res1 = NET_XMIT_DROP; + if (likely(hsr_priv->slave[HSR_DEV_SLAVE_A])) + res1 = slave_xmit(skb, hsr_priv, HSR_DEV_SLAVE_A); + + res2 = NET_XMIT_DROP; + if (likely(skb2 && hsr_priv->slave[HSR_DEV_SLAVE_B])) + res2 = slave_xmit(skb2, hsr_priv, HSR_DEV_SLAVE_B); + + if (likely(res1 == NET_XMIT_SUCCESS || res1 == NET_XMIT_CN || + res2 == NET_XMIT_SUCCESS || res2 == NET_XMIT_CN)) { + hsr_priv->dev->stats.tx_packets++; + hsr_priv->dev->stats.tx_bytes += skb->len; + } else { + hsr_priv->dev->stats.tx_dropped++; + } + + return NETDEV_TX_OK; +} + + +static int hsr_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + int res; + + /* Make room for the HSR tag now. We will fill it in later (in + * hsr_dev_xmit) + */ + if (skb_headroom(skb) < HSR_TAGLEN + ETH_HLEN) + return -ENOBUFS; + skb_push(skb, HSR_TAGLEN); + + /* To allow VLAN/HSR combos we should probably use + * res = dev_hard_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); + * here instead. It would require other changes too, though - e.g. + * separate headers for each slave etc... + */ + res = eth_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); + if (res <= 0) + return res; + skb_reset_mac_header(skb); + + return res + HSR_TAGLEN; +} + + +static const struct header_ops hsr_header_ops = { + .create = hsr_header_create, + .parse = eth_header_parse, +}; + + +/* HSR:2010 supervision frames should be padded so that the whole frame, + * including headers and FCS, is 64 bytes (without VLAN). + */ +static int hsr_pad(int size) +{ + const int min_size = ETH_ZLEN - HSR_TAGLEN - ETH_HLEN; + + if (size >= min_size) + return size; + return min_size; +} + +static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type) +{ + struct hsr_priv *hsr_priv; + struct sk_buff *skb; + int hlen, tlen; + struct hsr_sup_tag *hsr_stag; + struct hsr_sup_payload *hsr_sp; + unsigned long irqflags; + + hlen = LL_RESERVED_SPACE(hsr_dev); + tlen = hsr_dev->needed_tailroom; + skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen, + GFP_ATOMIC); + + if (skb == NULL) + return; + + hsr_priv = netdev_priv(hsr_dev); + + skb_reserve(skb, hlen); + + skb->dev = hsr_dev; + skb->protocol = htons(ETH_P_PRP); + skb->priority = TC_PRIO_CONTROL; + + if (dev_hard_header(skb, skb->dev, ETH_P_PRP, + hsr_priv->sup_multicast_addr, + skb->dev->dev_addr, skb->len) < 0) + goto out; + + skb_pull(skb, sizeof(struct ethhdr)); + hsr_stag = (typeof(hsr_stag)) skb->data; + + set_hsr_stag_path(hsr_stag, 0xf); + set_hsr_stag_HSR_Ver(hsr_stag, 0); + + spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); + hsr_stag->sequence_nr = htons(hsr_priv->sequence_nr); + hsr_priv->sequence_nr++; + spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + + hsr_stag->HSR_TLV_Type = type; + hsr_stag->HSR_TLV_Length = 12; + + skb_push(skb, sizeof(struct ethhdr)); + + /* Payload: MacAddressA */ + hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); + memcpy(hsr_sp->MacAddressA, hsr_dev->dev_addr, ETH_ALEN); + + dev_queue_xmit(skb); + return; + +out: + kfree_skb(skb); +} + + +/* Announce (supervision frame) timer function + */ +static void hsr_announce(unsigned long data) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = (struct hsr_priv *) data; + + if (hsr_priv->announce_count < 3) { + send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_ANNOUNCE); + hsr_priv->announce_count++; + } else { + send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_LIFE_CHECK); + } + + if (hsr_priv->announce_count < 3) + hsr_priv->announce_timer.expires = jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + else + hsr_priv->announce_timer.expires = jiffies + + msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + + if (is_admin_up(hsr_priv->dev)) + add_timer(&hsr_priv->announce_timer); +} + + +static void restore_slaves(struct net_device *hsr_dev) +{ + struct hsr_priv *hsr_priv; + int i; + int res; + + hsr_priv = netdev_priv(hsr_dev); + + rtnl_lock(); + + /* Restore promiscuity */ + for (i = 0; i < HSR_MAX_SLAVE; i++) { + if (!hsr_priv->slave[i]) + continue; + res = dev_set_promiscuity(hsr_priv->slave[i], -1); + if (res) + netdev_info(hsr_dev, + "Cannot restore slave promiscuity (%s, %d)\n", + hsr_priv->slave[i]->name, res); + } + + rtnl_unlock(); +} + +static void reclaim_hsr_dev(struct rcu_head *rh) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = container_of(rh, struct hsr_priv, rcu_head); + free_netdev(hsr_priv->dev); +} + + +/* According to comments in the declaration of struct net_device, this function + * is "Called from unregister, can be used to call free_netdev". Ok then... + */ +static void hsr_dev_destroy(struct net_device *hsr_dev) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(hsr_dev); + + del_timer(&hsr_priv->announce_timer); + unregister_hsr_master(hsr_priv); /* calls list_del_rcu on hsr_priv */ + restore_slaves(hsr_dev); + call_rcu(&hsr_priv->rcu_head, reclaim_hsr_dev); /* reclaim hsr_priv */ +} + +static const struct net_device_ops hsr_device_ops = { + .ndo_change_mtu = hsr_dev_change_mtu, + .ndo_open = hsr_dev_open, + .ndo_stop = hsr_dev_close, + .ndo_start_xmit = hsr_dev_xmit, +}; + + +void hsr_dev_setup(struct net_device *dev) +{ + random_ether_addr(dev->dev_addr); + + ether_setup(dev); + dev->header_ops = &hsr_header_ops; + dev->netdev_ops = &hsr_device_ops; + dev->tx_queue_len = 0; + + dev->destructor = hsr_dev_destroy; +} + + +/* Return true if dev is a HSR master; return false otherwise. + */ +bool is_hsr_master(struct net_device *dev) +{ + return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit); +} + +static int check_slave_ok(struct net_device *dev) +{ + /* Don't allow HSR on non-ethernet like devices */ + if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) || + (dev->addr_len != ETH_ALEN)) { + netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); + return -EINVAL; + } + + /* Don't allow enslaving hsr devices */ + if (is_hsr_master(dev)) { + netdev_info(dev, "Cannot create trees of HSR devices.\n"); + return -EINVAL; + } + + if (is_hsr_slave(dev)) { + netdev_info(dev, "This device is already a HSR slave.\n"); + return -EINVAL; + } + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); + return -EINVAL; + } + + /* HSR over bonded devices has not been tested, but I'm not sure it + * won't work... + */ + + return 0; +} + + +/* Default multicast address for HSR Supervision frames */ +static const unsigned char def_multicast_addr[ETH_ALEN] = { + 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00 +}; + +int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], + unsigned char multicast_spec) +{ + struct hsr_priv *hsr_priv; + int i; + int res; + + hsr_priv = netdev_priv(hsr_dev); + hsr_priv->dev = hsr_dev; + INIT_LIST_HEAD(&hsr_priv->node_db); + INIT_LIST_HEAD(&hsr_priv->self_node_db); + for (i = 0; i < HSR_MAX_SLAVE; i++) + hsr_priv->slave[i] = slave[i]; + + spin_lock_init(&hsr_priv->seqnr_lock); + /* Overflow soon to find bugs easier: */ + hsr_priv->sequence_nr = USHRT_MAX - 1024; + + init_timer(&hsr_priv->announce_timer); + hsr_priv->announce_timer.function = hsr_announce; + hsr_priv->announce_timer.data = (unsigned long) hsr_priv; + + memcpy(hsr_priv->sup_multicast_addr, def_multicast_addr, ETH_ALEN); + hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; + +/* FIXME: should I modify the value of these? + * + * - hsr_dev->flags - i.e. + * IFF_MASTER/SLAVE? + * - hsr_dev->priv_flags - i.e. + * IFF_EBRIDGE? + * IFF_TX_SKB_SHARING? + * IFF_HSR_MASTER/SLAVE? + */ + + for (i = 0; i < HSR_MAX_SLAVE; i++) { + res = check_slave_ok(slave[i]); + if (res) + return res; + } + + hsr_dev->features = slave[0]->features & slave[1]->features; + /* Prevent recursive tx locking */ + hsr_dev->features |= NETIF_F_LLTX; + /* VLAN on top of HSR needs testing and probably some work on + * hsr_header_create() etc. + */ + hsr_dev->features |= NETIF_F_VLAN_CHALLENGED; + + /* Set hsr_dev's MAC address to that of mac_slave1 */ + memcpy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr, ETH_ALEN); + + /* Set required header length */ + for (i = 0; i < HSR_MAX_SLAVE; i++) { + if (slave[i]->hard_header_len + HSR_TAGLEN > + hsr_dev->hard_header_len) + hsr_dev->hard_header_len = + slave[i]->hard_header_len + HSR_TAGLEN; + } + + /* MTU */ + for (i = 0; i < HSR_MAX_SLAVE; i++) + if (slave[i]->mtu - HSR_TAGLEN < hsr_dev->mtu) + hsr_dev->mtu = slave[i]->mtu - HSR_TAGLEN; + + /* Make sure the 1st call to netif_carrier_on() gets through */ + netif_carrier_off(hsr_dev); + + /* Promiscuity */ + for (i = 0; i < HSR_MAX_SLAVE; i++) { + res = dev_set_promiscuity(slave[i], 1); + if (res) { + netdev_info(hsr_dev, "Cannot set slave promiscuity (%s, %d)\n", + slave[i]->name, res); + goto fail; + } + } + + /* Make sure we recognize frames from ourselves in hsr_rcv() */ + res = hsr_create_self_node(&hsr_priv->self_node_db, + hsr_dev->dev_addr, + hsr_priv->slave[1]->dev_addr); + if (res < 0) + goto fail; + + res = register_netdevice(hsr_dev); + if (res) + goto fail; + + register_hsr_master(hsr_priv); + + return 0; + +fail: + restore_slaves(hsr_dev); + return res; +} diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h new file mode 100644 index 000000000000..2c7148e73914 --- /dev/null +++ b/net/hsr/hsr_device.h @@ -0,0 +1,29 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef __HSR_DEVICE_H +#define __HSR_DEVICE_H + +#include <linux/netdevice.h> +#include "hsr_main.h" + +void hsr_dev_setup(struct net_device *dev); +int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], + unsigned char multicast_spec); +void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2); +void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, + struct net_device *slave2); +void hsr_check_announce(struct net_device *hsr_dev, int old_operstate); +bool is_hsr_master(struct net_device *dev); +int hsr_get_max_mtu(struct hsr_priv *hsr_priv); + +#endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c new file mode 100644 index 000000000000..003f5bb3acd2 --- /dev/null +++ b/net/hsr/hsr_framereg.c @@ -0,0 +1,503 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * The HSR spec says never to forward the same frame twice on the same + * interface. A frame is identified by its source MAC address and its HSR + * sequence number. This code keeps track of senders and their sequence numbers + * to allow filtering of duplicate frames, and to detect HSR ring errors. + */ + +#include <linux/if_ether.h> +#include <linux/etherdevice.h> +#include <linux/slab.h> +#include <linux/rculist.h> +#include "hsr_main.h" +#include "hsr_framereg.h" +#include "hsr_netlink.h" + + +struct node_entry { + struct list_head mac_list; + unsigned char MacAddressA[ETH_ALEN]; + unsigned char MacAddressB[ETH_ALEN]; + enum hsr_dev_idx AddrB_if; /* The local slave through which AddrB + * frames are received from this node + */ + unsigned long time_in[HSR_MAX_SLAVE]; + bool time_in_stale[HSR_MAX_SLAVE]; + u16 seq_out[HSR_MAX_DEV]; + struct rcu_head rcu_head; +}; + +/* TODO: use hash lists for mac addresses (linux/jhash.h)? */ + + + +/* Search for mac entry. Caller must hold rcu read lock. + */ +static struct node_entry *find_node_by_AddrA(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) +{ + struct node_entry *node; + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressA, addr)) + return node; + } + + return NULL; +} + + +/* Search for mac entry. Caller must hold rcu read lock. + */ +static struct node_entry *find_node_by_AddrB(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) +{ + struct node_entry *node; + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressB, addr)) + return node; + } + + return NULL; +} + + +/* Search for mac entry. Caller must hold rcu read lock. + */ +struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb) +{ + struct node_entry *node; + struct ethhdr *ethhdr; + + if (!skb_mac_header_was_set(skb)) + return NULL; + + ethhdr = (struct ethhdr *) skb_mac_header(skb); + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressA, ethhdr->h_source)) + return node; + if (ether_addr_equal(node->MacAddressB, ethhdr->h_source)) + return node; + } + + return NULL; +} + + +/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize + * frames from self that's been looped over the HSR ring. + */ +int hsr_create_self_node(struct list_head *self_node_db, + unsigned char addr_a[ETH_ALEN], + unsigned char addr_b[ETH_ALEN]) +{ + struct node_entry *node, *oldnode; + + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(node->MacAddressA, addr_a, ETH_ALEN); + memcpy(node->MacAddressB, addr_b, ETH_ALEN); + + rcu_read_lock(); + oldnode = list_first_or_null_rcu(self_node_db, + struct node_entry, mac_list); + if (oldnode) { + list_replace_rcu(&oldnode->mac_list, &node->mac_list); + rcu_read_unlock(); + synchronize_rcu(); + kfree(oldnode); + } else { + rcu_read_unlock(); + list_add_tail_rcu(&node->mac_list, self_node_db); + } + + return 0; +} + +static void node_entry_reclaim(struct rcu_head *rh) +{ + kfree(container_of(rh, struct node_entry, rcu_head)); +} + + +/* Add/merge node to the database of nodes. 'skb' must contain an HSR + * supervision frame. + * - If the supervision header's MacAddressA field is not yet in the database, + * this frame is from an hitherto unknown node - add it to the database. + * - If the sender's MAC address is not the same as its MacAddressA address, + * the node is using PICS_SUBS (address substitution). Record the sender's + * address as the node's MacAddressB. + * + * This function needs to work even if the sender node has changed one of its + * slaves' MAC addresses. In this case, there are four different cases described + * by (Addr-changed, received-from) pairs as follows. Note that changing the + * SlaveA address is equal to changing the node's own address: + * + * - (AddrB, SlaveB): The new AddrB will be recorded by PICS_SUBS code since + * node == NULL. + * - (AddrB, SlaveA): Will work as usual (the AddrB change won't be detected + * from this frame). + * + * - (AddrA, SlaveB): The old node will be found. We need to detect this and + * remove the node. + * - (AddrA, SlaveA): A new node will be registered (non-PICS_SUBS at first). + * The old one will be pruned after HSR_NODE_FORGET_TIME. + * + * We also need to detect if the sender's SlaveA and SlaveB cables have been + * swapped. + */ +struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, + struct node_entry *node, + struct sk_buff *skb, + enum hsr_dev_idx dev_idx) +{ + struct hsr_sup_payload *hsr_sp; + struct hsr_ethhdr_sp *hsr_ethsup; + int i; + unsigned long now; + + hsr_ethsup = (struct hsr_ethhdr_sp *) skb_mac_header(skb); + hsr_sp = (struct hsr_sup_payload *) skb->data; + + if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) { + /* Node has changed its AddrA, frame was received from SlaveB */ + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, node_entry_reclaim); + node = NULL; + } + + if (node && (dev_idx == node->AddrB_if) && + !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) { + /* Cables have been swapped */ + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, node_entry_reclaim); + node = NULL; + } + + if (node && (dev_idx != node->AddrB_if) && + (node->AddrB_if != HSR_DEV_NONE) && + !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) { + /* Cables have been swapped */ + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, node_entry_reclaim); + node = NULL; + } + + if (node) + return node; + + node = find_node_by_AddrA(&hsr_priv->node_db, hsr_sp->MacAddressA); + if (node) { + /* Node is known, but frame was received from an unknown + * address. Node is PICS_SUBS capable; merge its AddrB. + */ + memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN); + node->AddrB_if = dev_idx; + return node; + } + + node = kzalloc(sizeof(*node), GFP_ATOMIC); + if (!node) + return NULL; + + memcpy(node->MacAddressA, hsr_sp->MacAddressA, ETH_ALEN); + memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN); + if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source)) + node->AddrB_if = dev_idx; + else + node->AddrB_if = HSR_DEV_NONE; + + /* We are only interested in time diffs here, so use current jiffies + * as initialization. (0 could trigger an spurious ring error warning). + */ + now = jiffies; + for (i = 0; i < HSR_MAX_SLAVE; i++) + node->time_in[i] = now; + for (i = 0; i < HSR_MAX_DEV; i++) + node->seq_out[i] = ntohs(hsr_ethsup->hsr_sup.sequence_nr) - 1; + + list_add_tail_rcu(&node->mac_list, &hsr_priv->node_db); + + return node; +} + + +/* 'skb' is a frame meant for this host, that is to be passed to upper layers. + * + * If the frame was sent by a node's B interface, replace the sender + * address with that node's "official" address (MacAddressA) so that upper + * layers recognize where it came from. + */ +void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb) +{ + struct ethhdr *ethhdr; + struct node_entry *node; + + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: Mac header not set\n", __func__); + return; + } + ethhdr = (struct ethhdr *) skb_mac_header(skb); + + rcu_read_lock(); + node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source); + if (node) + memcpy(ethhdr->h_source, node->MacAddressA, ETH_ALEN); + rcu_read_unlock(); +} + + +/* 'skb' is a frame meant for another host. + * 'hsr_dev_idx' is the HSR index of the outgoing device + * + * Substitute the target (dest) MAC address if necessary, so the it matches the + * recipient interface MAC address, regardless of whether that is the + * recipient's A or B interface. + * This is needed to keep the packets flowing through switches that learn on + * which "side" the different interfaces are. + */ +void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, + enum hsr_dev_idx dev_idx) +{ + struct node_entry *node; + + rcu_read_lock(); + node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest); + if (node && (node->AddrB_if == dev_idx)) + memcpy(ethhdr->h_dest, node->MacAddressB, ETH_ALEN); + rcu_read_unlock(); +} + + +/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, + * false otherwise. + */ +static bool seq_nr_after(u16 a, u16 b) +{ + /* Remove inconsistency where + * seq_nr_after(a, b) == seq_nr_before(a, b) */ + if ((int) b - a == 32768) + return false; + + return (((s16) (b - a)) < 0); +} +#define seq_nr_before(a, b) seq_nr_after((b), (a)) +#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b))) +#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) + + +void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) +{ + if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { + WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); + return; + } + node->time_in[dev_idx] = jiffies; + node->time_in_stale[dev_idx] = false; +} + + +/* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid + * ethhdr->h_source address and skb->mac_header set. + * + * Return: + * 1 if frame can be shown to have been sent recently on this interface, + * 0 otherwise, or + * negative error code on error + */ +int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, + struct sk_buff *skb) +{ + struct hsr_ethhdr *hsr_ethhdr; + u16 sequence_nr; + + if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { + WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); + return -EINVAL; + } + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: Mac header not set\n", __func__); + return -EINVAL; + } + hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + + sequence_nr = ntohs(hsr_ethhdr->hsr_tag.sequence_nr); + if (seq_nr_before_or_eq(sequence_nr, node->seq_out[dev_idx])) + return 1; + + node->seq_out[dev_idx] = sequence_nr; + return 0; +} + + + +static bool is_late(struct node_entry *node, enum hsr_dev_idx dev_idx) +{ + enum hsr_dev_idx other; + + if (node->time_in_stale[dev_idx]) + return true; + + if (dev_idx == HSR_DEV_SLAVE_A) + other = HSR_DEV_SLAVE_B; + else + other = HSR_DEV_SLAVE_A; + + if (node->time_in_stale[other]) + return false; + + if (time_after(node->time_in[other], node->time_in[dev_idx] + + msecs_to_jiffies(MAX_SLAVE_DIFF))) + return true; + + return false; +} + + +/* Remove stale sequence_nr records. Called by timer every + * HSR_LIFE_CHECK_INTERVAL (two seconds or so). + */ +void hsr_prune_nodes(struct hsr_priv *hsr_priv) +{ + struct node_entry *node; + unsigned long timestamp; + unsigned long time_a, time_b; + + rcu_read_lock(); + list_for_each_entry_rcu(node, &hsr_priv->node_db, mac_list) { + /* Shorthand */ + time_a = node->time_in[HSR_DEV_SLAVE_A]; + time_b = node->time_in[HSR_DEV_SLAVE_B]; + + /* Check for timestamps old enough to risk wrap-around */ + if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2)) + node->time_in_stale[HSR_DEV_SLAVE_A] = true; + if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2)) + node->time_in_stale[HSR_DEV_SLAVE_B] = true; + + /* Get age of newest frame from node. + * At least one time_in is OK here; nodes get pruned long + * before both time_ins can get stale + */ + timestamp = time_a; + if (node->time_in_stale[HSR_DEV_SLAVE_A] || + (!node->time_in_stale[HSR_DEV_SLAVE_B] && + time_after(time_b, time_a))) + timestamp = time_b; + + /* Warn of ring error only as long as we get frames at all */ + if (time_is_after_jiffies(timestamp + + msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) { + + if (is_late(node, HSR_DEV_SLAVE_A)) + hsr_nl_ringerror(hsr_priv, node->MacAddressA, + HSR_DEV_SLAVE_A); + else if (is_late(node, HSR_DEV_SLAVE_B)) + hsr_nl_ringerror(hsr_priv, node->MacAddressA, + HSR_DEV_SLAVE_B); + } + + /* Prune old entries */ + if (time_is_before_jiffies(timestamp + + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr_priv, node->MacAddressA); + list_del_rcu(&node->mac_list); + /* Note that we need to free this entry later: */ + call_rcu(&node->rcu_head, node_entry_reclaim); + } + } + rcu_read_unlock(); +} + + +void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, + unsigned char addr[ETH_ALEN]) +{ + struct node_entry *node; + + if (!_pos) { + node = list_first_or_null_rcu(&hsr_priv->node_db, + struct node_entry, mac_list); + if (node) + memcpy(addr, node->MacAddressA, ETH_ALEN); + return node; + } + + node = _pos; + list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) { + memcpy(addr, node->MacAddressA, ETH_ALEN); + return node; + } + + return NULL; +} + + +int hsr_get_node_data(struct hsr_priv *hsr_priv, + const unsigned char *addr, + unsigned char addr_b[ETH_ALEN], + unsigned int *addr_b_ifindex, + int *if1_age, + u16 *if1_seq, + int *if2_age, + u16 *if2_seq) +{ + struct node_entry *node; + unsigned long tdiff; + + + rcu_read_lock(); + node = find_node_by_AddrA(&hsr_priv->node_db, addr); + if (!node) { + rcu_read_unlock(); + return -ENOENT; /* No such entry */ + } + + memcpy(addr_b, node->MacAddressB, ETH_ALEN); + + tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A]; + if (node->time_in_stale[HSR_DEV_SLAVE_A]) + *if1_age = INT_MAX; +#if HZ <= MSEC_PER_SEC + else if (tdiff > msecs_to_jiffies(INT_MAX)) + *if1_age = INT_MAX; +#endif + else + *if1_age = jiffies_to_msecs(tdiff); + + tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_B]; + if (node->time_in_stale[HSR_DEV_SLAVE_B]) + *if2_age = INT_MAX; +#if HZ <= MSEC_PER_SEC + else if (tdiff > msecs_to_jiffies(INT_MAX)) + *if2_age = INT_MAX; +#endif + else + *if2_age = jiffies_to_msecs(tdiff); + + /* Present sequence numbers as if they were incoming on interface */ + *if1_seq = node->seq_out[HSR_DEV_SLAVE_B]; + *if2_seq = node->seq_out[HSR_DEV_SLAVE_A]; + + if ((node->AddrB_if != HSR_DEV_NONE) && hsr_priv->slave[node->AddrB_if]) + *addr_b_ifindex = hsr_priv->slave[node->AddrB_if]->ifindex; + else + *addr_b_ifindex = -1; + + rcu_read_unlock(); + + return 0; +} diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h new file mode 100644 index 000000000000..e6c4022030ad --- /dev/null +++ b/net/hsr/hsr_framereg.h @@ -0,0 +1,53 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef _HSR_FRAMEREG_H +#define _HSR_FRAMEREG_H + +#include "hsr_main.h" + +struct node_entry; + +struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb); + +struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, + struct node_entry *node, + struct sk_buff *skb, + enum hsr_dev_idx dev_idx); + +void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb); +void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, + enum hsr_dev_idx dev_idx); + +void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx); + +int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, + struct sk_buff *skb); + +void hsr_prune_nodes(struct hsr_priv *hsr_priv); + +int hsr_create_self_node(struct list_head *self_node_db, + unsigned char addr_a[ETH_ALEN], + unsigned char addr_b[ETH_ALEN]); + +void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, + unsigned char addr[ETH_ALEN]); + +int hsr_get_node_data(struct hsr_priv *hsr_priv, + const unsigned char *addr, + unsigned char addr_b[ETH_ALEN], + unsigned int *addr_b_ifindex, + int *if1_age, + u16 *if1_seq, + int *if2_age, + u16 *if2_seq); + +#endif /* _HSR_FRAMEREG_H */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c new file mode 100644 index 000000000000..af68dd83a4e3 --- /dev/null +++ b/net/hsr/hsr_main.c @@ -0,0 +1,469 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * In addition to routines for registering and unregistering HSR support, this + * file also contains the receive routine that handles all incoming frames with + * Ethertype (protocol) ETH_P_PRP (HSRv0), and network device event handling. + */ + +#include <linux/netdevice.h> +#include <linux/rculist.h> +#include <linux/timer.h> +#include <linux/etherdevice.h> +#include "hsr_main.h" +#include "hsr_device.h" +#include "hsr_netlink.h" +#include "hsr_framereg.h" + + +/* List of all registered virtual HSR devices */ +static LIST_HEAD(hsr_list); + +void register_hsr_master(struct hsr_priv *hsr_priv) +{ + list_add_tail_rcu(&hsr_priv->hsr_list, &hsr_list); +} + +void unregister_hsr_master(struct hsr_priv *hsr_priv) +{ + struct hsr_priv *hsr_priv_it; + + list_for_each_entry(hsr_priv_it, &hsr_list, hsr_list) + if (hsr_priv_it == hsr_priv) { + list_del_rcu(&hsr_priv_it->hsr_list); + return; + } +} + +bool is_hsr_slave(struct net_device *dev) +{ + struct hsr_priv *hsr_priv_it; + + list_for_each_entry_rcu(hsr_priv_it, &hsr_list, hsr_list) { + if (dev == hsr_priv_it->slave[0]) + return true; + if (dev == hsr_priv_it->slave[1]) + return true; + } + + return false; +} + + +/* If dev is a HSR slave device, return the virtual master device. Return NULL + * otherwise. + */ +static struct hsr_priv *get_hsr_master(struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + + rcu_read_lock(); + list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) + if ((dev == hsr_priv->slave[0]) || + (dev == hsr_priv->slave[1])) { + rcu_read_unlock(); + return hsr_priv; + } + + rcu_read_unlock(); + return NULL; +} + + +/* If dev is a HSR slave device, return the other slave device. Return NULL + * otherwise. + */ +static struct net_device *get_other_slave(struct hsr_priv *hsr_priv, + struct net_device *dev) +{ + if (dev == hsr_priv->slave[0]) + return hsr_priv->slave[1]; + if (dev == hsr_priv->slave[1]) + return hsr_priv->slave[0]; + + return NULL; +} + + +static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct net_device *slave, *other_slave; + struct hsr_priv *hsr_priv; + int old_operstate; + int mtu_max; + int res; + struct net_device *dev; + + dev = netdev_notifier_info_to_dev(ptr); + + hsr_priv = get_hsr_master(dev); + if (hsr_priv) { + /* dev is a slave device */ + slave = dev; + other_slave = get_other_slave(hsr_priv, slave); + } else { + if (!is_hsr_master(dev)) + return NOTIFY_DONE; + hsr_priv = netdev_priv(dev); + slave = hsr_priv->slave[0]; + other_slave = hsr_priv->slave[1]; + } + + switch (event) { + case NETDEV_UP: /* Administrative state DOWN */ + case NETDEV_DOWN: /* Administrative state UP */ + case NETDEV_CHANGE: /* Link (carrier) state changes */ + old_operstate = hsr_priv->dev->operstate; + hsr_set_carrier(hsr_priv->dev, slave, other_slave); + /* netif_stacked_transfer_operstate() cannot be used here since + * it doesn't set IF_OPER_LOWERLAYERDOWN (?) + */ + hsr_set_operstate(hsr_priv->dev, slave, other_slave); + hsr_check_announce(hsr_priv->dev, old_operstate); + break; + case NETDEV_CHANGEADDR: + + /* This should not happen since there's no ndo_set_mac_address() + * for HSR devices - i.e. not supported. + */ + if (dev == hsr_priv->dev) + break; + + if (dev == hsr_priv->slave[0]) + memcpy(hsr_priv->dev->dev_addr, + hsr_priv->slave[0]->dev_addr, ETH_ALEN); + + /* Make sure we recognize frames from ourselves in hsr_rcv() */ + res = hsr_create_self_node(&hsr_priv->self_node_db, + hsr_priv->dev->dev_addr, + hsr_priv->slave[1] ? + hsr_priv->slave[1]->dev_addr : + hsr_priv->dev->dev_addr); + if (res) + netdev_warn(hsr_priv->dev, + "Could not update HSR node address.\n"); + + if (dev == hsr_priv->slave[0]) + call_netdevice_notifiers(NETDEV_CHANGEADDR, hsr_priv->dev); + break; + case NETDEV_CHANGEMTU: + if (dev == hsr_priv->dev) + break; /* Handled in ndo_change_mtu() */ + mtu_max = hsr_get_max_mtu(hsr_priv); + if (hsr_priv->dev->mtu > mtu_max) + dev_set_mtu(hsr_priv->dev, mtu_max); + break; + case NETDEV_UNREGISTER: + if (dev == hsr_priv->slave[0]) + hsr_priv->slave[0] = NULL; + if (dev == hsr_priv->slave[1]) + hsr_priv->slave[1] = NULL; + + /* There should really be a way to set a new slave device... */ + + break; + case NETDEV_PRE_TYPE_CHANGE: + /* HSR works only on Ethernet devices. Refuse slave to change + * its type. + */ + return NOTIFY_BAD; + } + + return NOTIFY_DONE; +} + + +static struct timer_list prune_timer; + +static void prune_nodes_all(unsigned long data) +{ + struct hsr_priv *hsr_priv; + + rcu_read_lock(); + list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) + hsr_prune_nodes(hsr_priv); + rcu_read_unlock(); + + prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); + add_timer(&prune_timer); +} + + +static struct sk_buff *hsr_pull_tag(struct sk_buff *skb) +{ + struct hsr_tag *hsr_tag; + struct sk_buff *skb2; + + skb2 = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + goto err_free; + skb = skb2; + + if (unlikely(!pskb_may_pull(skb, HSR_TAGLEN))) + goto err_free; + + hsr_tag = (struct hsr_tag *) skb->data; + skb->protocol = hsr_tag->encap_proto; + skb_pull(skb, HSR_TAGLEN); + + return skb; + +err_free: + kfree_skb(skb); + return NULL; +} + + +/* The uses I can see for these HSR supervision frames are: + * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = + * 22") to reset any sequence_nr counters belonging to that node. Useful if + * the other node's counter has been reset for some reason. + * -- + * Or not - resetting the counter and bridging the frame would create a + * loop, unfortunately. + * + * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck + * frame is received from a particular node, we know something is wrong. + * We just register these (as with normal frames) and throw them away. + * + * 3) Allow different MAC addresses for the two slave interfaces, using the + * MacAddressA field. + */ +static bool is_supervision_frame(struct hsr_priv *hsr_priv, struct sk_buff *skb) +{ + struct hsr_sup_tag *hsr_stag; + + if (!ether_addr_equal(eth_hdr(skb)->h_dest, + hsr_priv->sup_multicast_addr)) + return false; + + hsr_stag = (struct hsr_sup_tag *) skb->data; + if (get_hsr_stag_path(hsr_stag) != 0x0f) + return false; + if ((hsr_stag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && + (hsr_stag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + return false; + if (hsr_stag->HSR_TLV_Length != 12) + return false; + + return true; +} + + +/* Implementation somewhat according to IEC-62439-3, p. 43 + */ +static int hsr_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct hsr_priv *hsr_priv; + struct net_device *other_slave; + struct node_entry *node; + bool deliver_to_self; + struct sk_buff *skb_deliver; + enum hsr_dev_idx dev_in_idx, dev_other_idx; + bool dup_out; + int ret; + + hsr_priv = get_hsr_master(dev); + + if (!hsr_priv) { + /* Non-HSR-slave device 'dev' is connected to a HSR network */ + kfree_skb(skb); + dev->stats.rx_errors++; + return NET_RX_SUCCESS; + } + + if (dev == hsr_priv->slave[0]) { + dev_in_idx = HSR_DEV_SLAVE_A; + dev_other_idx = HSR_DEV_SLAVE_B; + } else { + dev_in_idx = HSR_DEV_SLAVE_B; + dev_other_idx = HSR_DEV_SLAVE_A; + } + + node = hsr_find_node(&hsr_priv->self_node_db, skb); + if (node) { + /* Always kill frames sent by ourselves */ + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + /* Is this frame a candidate for local reception? */ + deliver_to_self = false; + if ((skb->pkt_type == PACKET_HOST) || + (skb->pkt_type == PACKET_MULTICAST) || + (skb->pkt_type == PACKET_BROADCAST)) + deliver_to_self = true; + else if (ether_addr_equal(eth_hdr(skb)->h_dest, + hsr_priv->dev->dev_addr)) { + skb->pkt_type = PACKET_HOST; + deliver_to_self = true; + } + + + rcu_read_lock(); /* node_db */ + node = hsr_find_node(&hsr_priv->node_db, skb); + + if (is_supervision_frame(hsr_priv, skb)) { + skb_pull(skb, sizeof(struct hsr_sup_tag)); + node = hsr_merge_node(hsr_priv, node, skb, dev_in_idx); + if (!node) { + rcu_read_unlock(); /* node_db */ + kfree_skb(skb); + hsr_priv->dev->stats.rx_dropped++; + return NET_RX_DROP; + } + skb_push(skb, sizeof(struct hsr_sup_tag)); + deliver_to_self = false; + } + + if (!node) { + /* Source node unknown; this might be a HSR frame from + * another net (different multicast address). Ignore it. + */ + rcu_read_unlock(); /* node_db */ + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + /* Register ALL incoming frames as outgoing through the other interface. + * This allows us to register frames as incoming only if they are valid + * for the receiving interface, without using a specific counter for + * incoming frames. + */ + dup_out = hsr_register_frame_out(node, dev_other_idx, skb); + if (!dup_out) + hsr_register_frame_in(node, dev_in_idx); + + /* Forward this frame? */ + if (!dup_out && (skb->pkt_type != PACKET_HOST)) + other_slave = get_other_slave(hsr_priv, dev); + else + other_slave = NULL; + + if (hsr_register_frame_out(node, HSR_DEV_MASTER, skb)) + deliver_to_self = false; + + rcu_read_unlock(); /* node_db */ + + if (!deliver_to_self && !other_slave) { + kfree_skb(skb); + /* Circulated frame; silently remove it. */ + return NET_RX_SUCCESS; + } + + skb_deliver = skb; + if (deliver_to_self && other_slave) { + /* skb_clone() is not enough since we will strip the hsr tag + * and do address substitution below + */ + skb_deliver = pskb_copy(skb, GFP_ATOMIC); + if (!skb_deliver) { + deliver_to_self = false; + hsr_priv->dev->stats.rx_dropped++; + } + } + + if (deliver_to_self) { + bool multicast_frame; + + skb_deliver = hsr_pull_tag(skb_deliver); + if (!skb_deliver) { + hsr_priv->dev->stats.rx_dropped++; + goto forward; + } +#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + /* Move everything in the header that is after the HSR tag, + * to work around alignment problems caused by the 6-byte HSR + * tag. In practice, this removes/overwrites the HSR tag in + * the header and restores a "standard" packet. + */ + memmove(skb_deliver->data - HSR_TAGLEN, skb_deliver->data, + skb_headlen(skb_deliver)); + + /* Adjust skb members so they correspond with the move above. + * This cannot possibly underflow skb->data since hsr_pull_tag() + * above succeeded. + * At this point in the protocol stack, the transport and + * network headers have not been set yet, and we haven't touched + * the mac header nor the head. So we only need to adjust data + * and tail: + */ + skb_deliver->data -= HSR_TAGLEN; + skb_deliver->tail -= HSR_TAGLEN; +#endif + skb_deliver->dev = hsr_priv->dev; + hsr_addr_subst_source(hsr_priv, skb_deliver); + multicast_frame = (skb_deliver->pkt_type == PACKET_MULTICAST); + ret = netif_rx(skb_deliver); + if (ret == NET_RX_DROP) { + hsr_priv->dev->stats.rx_dropped++; + } else { + hsr_priv->dev->stats.rx_packets++; + hsr_priv->dev->stats.rx_bytes += skb->len; + if (multicast_frame) + hsr_priv->dev->stats.multicast++; + } + } + +forward: + if (other_slave) { + skb_push(skb, ETH_HLEN); + skb->dev = other_slave; + dev_queue_xmit(skb); + } + + return NET_RX_SUCCESS; +} + + +static struct packet_type hsr_pt __read_mostly = { + .type = htons(ETH_P_PRP), + .func = hsr_rcv, +}; + +static struct notifier_block hsr_nb = { + .notifier_call = hsr_netdev_notify, /* Slave event notifications */ +}; + + +static int __init hsr_init(void) +{ + int res; + + BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_TAGLEN); + + dev_add_pack(&hsr_pt); + + init_timer(&prune_timer); + prune_timer.function = prune_nodes_all; + prune_timer.data = 0; + prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); + add_timer(&prune_timer); + + register_netdevice_notifier(&hsr_nb); + + res = hsr_netlink_init(); + + return res; +} + +static void __exit hsr_exit(void) +{ + unregister_netdevice_notifier(&hsr_nb); + del_timer(&prune_timer); + hsr_netlink_exit(); + dev_remove_pack(&hsr_pt); +} + +module_init(hsr_init); +module_exit(hsr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h new file mode 100644 index 000000000000..56fe060c0ab1 --- /dev/null +++ b/net/hsr/hsr_main.h @@ -0,0 +1,166 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef _HSR_PRIVATE_H +#define _HSR_PRIVATE_H + +#include <linux/netdevice.h> +#include <linux/list.h> + + +/* Time constants as specified in the HSR specification (IEC-62439-3 2010) + * Table 8. + * All values in milliseconds. + */ +#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */ +#define HSR_NODE_FORGET_TIME 60000 /* ms */ +#define HSR_ANNOUNCE_INTERVAL 100 /* ms */ + + +/* By how much may slave1 and slave2 timestamps of latest received frame from + * each node differ before we notify of communication problem? + */ +#define MAX_SLAVE_DIFF 3000 /* ms */ + + +/* How often shall we check for broken ring and remove node entries older than + * HSR_NODE_FORGET_TIME? + */ +#define PRUNE_PERIOD 3000 /* ms */ + + +#define HSR_TLV_ANNOUNCE 22 +#define HSR_TLV_LIFE_CHECK 23 + + +/* HSR Tag. + * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, + * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, + * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, + * encapsulated protocol } instead. + */ +#define HSR_TAGLEN 6 + +/* Field names below as defined in the IEC:2010 standard for HSR. */ +struct hsr_tag { + __be16 path_and_LSDU_size; + __be16 sequence_nr; + __be16 encap_proto; +} __packed; + + +/* The helper functions below assumes that 'path' occupies the 4 most + * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or + * equivalently, the 4 most significant bits of HSR tag byte 14). + * + * This is unclear in the IEC specification; its definition of MAC addresses + * indicates the spec is written with the least significant bit first (to the + * left). This, however, would mean that the LSDU field would be split in two + * with the path field in-between, which seems strange. I'm guessing the MAC + * address definition is in error. + */ +static inline u16 get_hsr_tag_path(struct hsr_tag *ht) +{ + return ntohs(ht->path_and_LSDU_size) >> 12; +} + +static inline u16 get_hsr_tag_LSDU_size(struct hsr_tag *ht) +{ + return ntohs(ht->path_and_LSDU_size) & 0x0FFF; +} + +static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path) +{ + ht->path_and_LSDU_size = htons( + (ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12)); +} + +static inline void set_hsr_tag_LSDU_size(struct hsr_tag *ht, u16 LSDU_size) +{ + ht->path_and_LSDU_size = htons( + (ntohs(ht->path_and_LSDU_size) & 0xF000) | + (LSDU_size & 0x0FFF)); +} + +struct hsr_ethhdr { + struct ethhdr ethhdr; + struct hsr_tag hsr_tag; +} __packed; + + +/* HSR Supervision Frame data types. + * Field names as defined in the IEC:2010 standard for HSR. + */ +struct hsr_sup_tag { + __be16 path_and_HSR_Ver; + __be16 sequence_nr; + __u8 HSR_TLV_Type; + __u8 HSR_TLV_Length; +} __packed; + +struct hsr_sup_payload { + unsigned char MacAddressA[ETH_ALEN]; +} __packed; + +static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst) +{ + return get_hsr_tag_path((struct hsr_tag *) hst); +} + +static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst) +{ + return get_hsr_tag_LSDU_size((struct hsr_tag *) hst); +} + +static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path) +{ + set_hsr_tag_path((struct hsr_tag *) hst, path); +} + +static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver) +{ + set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver); +} + +struct hsr_ethhdr_sp { + struct ethhdr ethhdr; + struct hsr_sup_tag hsr_sup; +} __packed; + + +enum hsr_dev_idx { + HSR_DEV_NONE = -1, + HSR_DEV_SLAVE_A = 0, + HSR_DEV_SLAVE_B, + HSR_DEV_MASTER, +}; +#define HSR_MAX_SLAVE (HSR_DEV_SLAVE_B + 1) +#define HSR_MAX_DEV (HSR_DEV_MASTER + 1) + +struct hsr_priv { + struct list_head hsr_list; /* List of hsr devices */ + struct rcu_head rcu_head; + struct net_device *dev; + struct net_device *slave[HSR_MAX_SLAVE]; + struct list_head node_db; /* Other HSR nodes */ + struct list_head self_node_db; /* MACs of slaves */ + struct timer_list announce_timer; /* Supervision frame dispatch */ + int announce_count; + u16 sequence_nr; + spinlock_t seqnr_lock; /* locking for sequence_nr */ + unsigned char sup_multicast_addr[ETH_ALEN]; +}; + +void register_hsr_master(struct hsr_priv *hsr_priv); +void unregister_hsr_master(struct hsr_priv *hsr_priv); +bool is_hsr_slave(struct net_device *dev); + +#endif /* _HSR_PRIVATE_H */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c new file mode 100644 index 000000000000..4e66bf61f585 --- /dev/null +++ b/net/hsr/hsr_netlink.c @@ -0,0 +1,457 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * + * Routines for handling Netlink messages for HSR. + */ + +#include "hsr_netlink.h" +#include <linux/kernel.h> +#include <net/rtnetlink.h> +#include <net/genetlink.h> +#include "hsr_main.h" +#include "hsr_device.h" +#include "hsr_framereg.h" + +static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { + [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, + [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, + [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, +}; + + +/* Here, it seems a netdevice has already been allocated for us, and the + * hsr_dev_setup routine has been executed. Nice! + */ +static int hsr_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *link[2]; + unsigned char multicast_spec; + + if (!data[IFLA_HSR_SLAVE1]) { + netdev_info(dev, "IFLA_HSR_SLAVE1 missing!\n"); + return -EINVAL; + } + link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1])); + if (!data[IFLA_HSR_SLAVE2]) { + netdev_info(dev, "IFLA_HSR_SLAVE2 missing!\n"); + return -EINVAL; + } + link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2])); + + if (!link[0] || !link[1]) + return -ENODEV; + if (link[0] == link[1]) + return -EINVAL; + + if (!data[IFLA_HSR_MULTICAST_SPEC]) + multicast_spec = 0; + else + multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); + + return hsr_dev_finalize(dev, link, multicast_spec); +} + +static struct rtnl_link_ops hsr_link_ops __read_mostly = { + .kind = "hsr", + .maxtype = IFLA_HSR_MAX, + .policy = hsr_policy, + .priv_size = sizeof(struct hsr_priv), + .setup = hsr_dev_setup, + .newlink = hsr_newlink, +}; + + + +/* attribute policy */ +/* NLA_BINARY missing in libnl; use NLA_UNSPEC in userspace instead. */ +static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { + [HSR_A_NODE_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [HSR_A_NODE_ADDR_B] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [HSR_A_IFINDEX] = { .type = NLA_U32 }, + [HSR_A_IF1_AGE] = { .type = NLA_U32 }, + [HSR_A_IF2_AGE] = { .type = NLA_U32 }, + [HSR_A_IF1_SEQ] = { .type = NLA_U16 }, + [HSR_A_IF2_SEQ] = { .type = NLA_U16 }, +}; + +static struct genl_family hsr_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "HSR", + .version = 1, + .maxattr = HSR_A_MAX, +}; + +static struct genl_multicast_group hsr_network_genl_mcgrp = { + .name = "hsr-network", +}; + + + +/* This is called if for some node with MAC address addr, we only get frames + * over one of the slave interfaces. This would indicate an open network ring + * (i.e. a link has failed somewhere). + */ +void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], + enum hsr_dev_idx dev_idx) +{ + struct sk_buff *skb; + void *msg_head; + int res; + int ifindex; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + goto fail; + + msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_RING_ERROR); + if (!msg_head) + goto nla_put_failure; + + res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr); + if (res < 0) + goto nla_put_failure; + + if (hsr_priv->slave[dev_idx]) + ifindex = hsr_priv->slave[dev_idx]->ifindex; + else + ifindex = -1; + res = nla_put_u32(skb, HSR_A_IFINDEX, ifindex); + if (res < 0) + goto nla_put_failure; + + genlmsg_end(skb, msg_head); + genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC); + + return; + +nla_put_failure: + kfree_skb(skb); + +fail: + netdev_warn(hsr_priv->dev, "Could not send HSR ring error message\n"); +} + +/* This is called when we haven't heard from the node with MAC address addr for + * some time (just before the node is removed from the node table/list). + */ +void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]) +{ + struct sk_buff *skb; + void *msg_head; + int res; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + goto fail; + + msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_NODE_DOWN); + if (!msg_head) + goto nla_put_failure; + + + res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr); + if (res < 0) + goto nla_put_failure; + + genlmsg_end(skb, msg_head); + genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC); + + return; + +nla_put_failure: + kfree_skb(skb); + +fail: + netdev_warn(hsr_priv->dev, "Could not send HSR node down\n"); +} + + +/* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table + * about the status of a specific node in the network, defined by its MAC + * address. + * + * Input: hsr ifindex, node mac address + * Output: hsr ifindex, node mac address (copied from request), + * age of latest frame from node over slave 1, slave 2 [ms] + */ +static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) +{ + /* For receiving */ + struct nlattr *na; + struct net_device *hsr_dev; + + /* For sending */ + struct sk_buff *skb_out; + void *msg_head; + struct hsr_priv *hsr_priv; + unsigned char hsr_node_addr_b[ETH_ALEN]; + int hsr_node_if1_age; + u16 hsr_node_if1_seq; + int hsr_node_if2_age; + u16 hsr_node_if2_seq; + int addr_b_ifindex; + int res; + + if (!info) + goto invalid; + + na = info->attrs[HSR_A_IFINDEX]; + if (!na) + goto invalid; + na = info->attrs[HSR_A_NODE_ADDR]; + if (!na) + goto invalid; + + hsr_dev = __dev_get_by_index(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); + if (!hsr_dev) + goto invalid; + if (!is_hsr_master(hsr_dev)) + goto invalid; + + + /* Send reply */ + + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb_out) { + res = -ENOMEM; + goto fail; + } + + msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid, + info->snd_seq, &hsr_genl_family, 0, + HSR_C_SET_NODE_STATUS); + if (!msg_head) { + res = -ENOMEM; + goto nla_put_failure; + } + + res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); + if (res < 0) + goto nla_put_failure; + + hsr_priv = netdev_priv(hsr_dev); + res = hsr_get_node_data(hsr_priv, + (unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]), + hsr_node_addr_b, + &addr_b_ifindex, + &hsr_node_if1_age, + &hsr_node_if1_seq, + &hsr_node_if2_age, + &hsr_node_if2_seq); + if (res < 0) + goto fail; + + res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, + nla_data(info->attrs[HSR_A_NODE_ADDR])); + if (res < 0) + goto nla_put_failure; + + if (addr_b_ifindex > -1) { + res = nla_put(skb_out, HSR_A_NODE_ADDR_B, ETH_ALEN, + hsr_node_addr_b); + if (res < 0) + goto nla_put_failure; + + res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX, addr_b_ifindex); + if (res < 0) + goto nla_put_failure; + } + + res = nla_put_u32(skb_out, HSR_A_IF1_AGE, hsr_node_if1_age); + if (res < 0) + goto nla_put_failure; + res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq); + if (res < 0) + goto nla_put_failure; + if (hsr_priv->slave[0]) + res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX, + hsr_priv->slave[0]->ifindex); + if (res < 0) + goto nla_put_failure; + + res = nla_put_u32(skb_out, HSR_A_IF2_AGE, hsr_node_if2_age); + if (res < 0) + goto nla_put_failure; + res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq); + if (res < 0) + goto nla_put_failure; + if (hsr_priv->slave[1]) + res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX, + hsr_priv->slave[1]->ifindex); + + genlmsg_end(skb_out, msg_head); + genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); + + return 0; + +invalid: + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + return 0; + +nla_put_failure: + kfree_skb(skb_out); + /* Fall through */ + +fail: + return res; +} + +static struct genl_ops hsr_ops_get_node_status = { + .cmd = HSR_C_GET_NODE_STATUS, + .flags = 0, + .policy = hsr_genl_policy, + .doit = hsr_get_node_status, + .dumpit = NULL, +}; + + +/* Get a list of MacAddressA of all nodes known to this node (other than self). + */ +static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) +{ + /* For receiving */ + struct nlattr *na; + struct net_device *hsr_dev; + + /* For sending */ + struct sk_buff *skb_out; + void *msg_head; + struct hsr_priv *hsr_priv; + void *pos; + unsigned char addr[ETH_ALEN]; + int res; + + if (!info) + goto invalid; + + na = info->attrs[HSR_A_IFINDEX]; + if (!na) + goto invalid; + + hsr_dev = __dev_get_by_index(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); + if (!hsr_dev) + goto invalid; + if (!is_hsr_master(hsr_dev)) + goto invalid; + + + /* Send reply */ + + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb_out) { + res = -ENOMEM; + goto fail; + } + + msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid, + info->snd_seq, &hsr_genl_family, 0, + HSR_C_SET_NODE_LIST); + if (!msg_head) { + res = -ENOMEM; + goto nla_put_failure; + } + + res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); + if (res < 0) + goto nla_put_failure; + + hsr_priv = netdev_priv(hsr_dev); + + rcu_read_lock(); + pos = hsr_get_next_node(hsr_priv, NULL, addr); + while (pos) { + res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); + if (res < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } + pos = hsr_get_next_node(hsr_priv, pos, addr); + } + rcu_read_unlock(); + + genlmsg_end(skb_out, msg_head); + genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); + + return 0; + +invalid: + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + return 0; + +nla_put_failure: + kfree_skb(skb_out); + /* Fall through */ + +fail: + return res; +} + + +static struct genl_ops hsr_ops_get_node_list = { + .cmd = HSR_C_GET_NODE_LIST, + .flags = 0, + .policy = hsr_genl_policy, + .doit = hsr_get_node_list, + .dumpit = NULL, +}; + +int __init hsr_netlink_init(void) +{ + int rc; + + rc = rtnl_link_register(&hsr_link_ops); + if (rc) + goto fail_rtnl_link_register; + + rc = genl_register_family(&hsr_genl_family); + if (rc) + goto fail_genl_register_family; + + rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_status); + if (rc) + goto fail_genl_register_ops; + + rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_list); + if (rc) + goto fail_genl_register_ops_node_list; + + rc = genl_register_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp); + if (rc) + goto fail_genl_register_mc_group; + + return 0; + +fail_genl_register_mc_group: + genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_list); +fail_genl_register_ops_node_list: + genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status); +fail_genl_register_ops: + genl_unregister_family(&hsr_genl_family); +fail_genl_register_family: + rtnl_link_unregister(&hsr_link_ops); +fail_rtnl_link_register: + + return rc; +} + +void __exit hsr_netlink_exit(void) +{ + genl_unregister_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp); + genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status); + genl_unregister_family(&hsr_genl_family); + + rtnl_link_unregister(&hsr_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("hsr"); diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h new file mode 100644 index 000000000000..d4579dcc3c7d --- /dev/null +++ b/net/hsr/hsr_netlink.h @@ -0,0 +1,30 @@ +/* Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef __HSR_NETLINK_H +#define __HSR_NETLINK_H + +#include <linux/if_ether.h> +#include <linux/module.h> +#include <uapi/linux/hsr_netlink.h> + +struct hsr_priv; + +int __init hsr_netlink_init(void); +void __exit hsr_netlink_exit(void); + +void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], + int dev_idx); +void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]); +void hsr_nl_framedrop(int dropcount, int dev_idx); +void hsr_nl_linkdown(int dev_idx); + +#endif /* __HSR_NETLINK_H */ diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index ff41b4d60d30..9497c6f3276b 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -440,7 +440,6 @@ lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) default: pr_debug("ERROR: unknown UDP format\n"); goto err; - break; } pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", @@ -655,7 +654,9 @@ static int lowpan_header_create(struct sk_buff *skb, head[1] = iphc1; skb_pull(skb, sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + skb_reset_network_header(skb); lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); @@ -738,7 +739,6 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) return -ENOMEM; skb_push(new, sizeof(struct ipv6hdr)); - skb_reset_network_header(new); skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); new->protocol = htons(ETH_P_IPV6); @@ -785,7 +785,6 @@ lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag) goto skb_err; frame->skb->priority = skb->priority; - frame->skb->dev = skb->dev; /* reserve headroom for uncompressed ipv6 header */ skb_reserve(frame->skb, sizeof(struct ipv6hdr)); @@ -1061,7 +1060,6 @@ lowpan_process_data(struct sk_buff *skb) skb = new; skb_push(skb, sizeof(struct udphdr)); - skb_reset_transport_header(skb); skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); lowpan_raw_dump_table(__func__, "raw UDP header dump", @@ -1104,50 +1102,40 @@ static int lowpan_set_address(struct net_device *dev, void *p) return 0; } -static int lowpan_get_mac_header_length(struct sk_buff *skb) -{ - /* - * Currently long addressing mode is supported only, so the overall - * header size is 21: - * FC SeqNum DPAN DA SA Sec - * 2 + 1 + 2 + 8 + 8 + 0 = 21 - */ - return 21; -} - static int lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, int mlen, int plen, int offset, int type) { struct sk_buff *frag; - int hlen, ret; + int hlen; hlen = (type == LOWPAN_DISPATCH_FRAG1) ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE; lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); - frag = dev_alloc_skb(hlen + mlen + plen + IEEE802154_MFR_SIZE); + frag = netdev_alloc_skb(skb->dev, + hlen + mlen + plen + IEEE802154_MFR_SIZE); if (!frag) return -ENOMEM; frag->priority = skb->priority; - frag->dev = skb->dev; /* copy header, MFR and payload */ - memcpy(skb_put(frag, mlen), skb->data, mlen); - memcpy(skb_put(frag, hlen), head, hlen); + skb_put(frag, mlen); + skb_copy_to_linear_data(frag, skb_mac_header(skb), mlen); - if (plen) - skb_copy_from_linear_data_offset(skb, offset + mlen, - skb_put(frag, plen), plen); + skb_put(frag, hlen); + skb_copy_to_linear_data_offset(frag, mlen, head, hlen); + + skb_put(frag, plen); + skb_copy_to_linear_data_offset(frag, mlen + hlen, + skb_network_header(skb) + offset, plen); lowpan_raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len); - ret = dev_queue_xmit(frag); - - return ret; + return dev_queue_xmit(frag); } static int @@ -1156,7 +1144,7 @@ lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) int err, header_length, payload_length, tag, offset = 0; u8 head[5]; - header_length = lowpan_get_mac_header_length(skb); + header_length = skb->mac_len; payload_length = skb->len - header_length; tag = lowpan_dev_info(dev)->fragment_tag++; @@ -1181,7 +1169,7 @@ lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) head[0] &= ~LOWPAN_DISPATCH_FRAG1; head[0] |= LOWPAN_DISPATCH_FRAGN; - while ((payload_length - offset > 0) && (err >= 0)) { + while (payload_length - offset > 0) { int len = LOWPAN_FRAG_SIZE; head[4] = offset / 8; @@ -1327,8 +1315,6 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, /* Pull off the 1-byte of 6lowpan header. */ skb_pull(local_skb, 1); - skb_reset_network_header(local_skb); - skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); lowpan_give_skb_to_devices(local_skb); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 35913fb77dc8..09d78d4a3cff 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -245,29 +245,6 @@ out: } EXPORT_SYMBOL(inet_listen); -u32 inet_ehash_secret __read_mostly; -EXPORT_SYMBOL(inet_ehash_secret); - -u32 ipv6_hash_secret __read_mostly; -EXPORT_SYMBOL(ipv6_hash_secret); - -/* - * inet_ehash_secret must be set exactly once, and to a non nul value - * ipv6_hash_secret must be set exactly once. - */ -void build_ehash_secret(void) -{ - u32 rnd; - - do { - get_random_bytes(&rnd, sizeof(rnd)); - } while (rnd == 0); - - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) - get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); -} -EXPORT_SYMBOL(build_ehash_secret); - /* * Create an inet socket. */ @@ -284,10 +261,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; - if (unlikely(!inet_ehash_secret)) - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) - build_ehash_secret(); - sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ @@ -1254,36 +1227,36 @@ static int inet_gso_send_check(struct sk_buff *skb) if (ihl < sizeof(*iph)) goto out; + proto = iph->protocol; + + /* Warning: after this point, iph might be no longer valid */ if (unlikely(!pskb_may_pull(skb, ihl))) goto out; - __skb_pull(skb, ihl); + skb_reset_transport_header(skb); - iph = ip_hdr(skb); - proto = iph->protocol; err = -EPROTONOSUPPORT; - rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_send_check)) err = ops->callbacks.gso_send_check(skb); - rcu_read_unlock(); out: return err; } static struct sk_buff *inet_gso_segment(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; + unsigned int offset = 0; + bool udpfrag, encap; struct iphdr *iph; int proto; + int nhoff; int ihl; int id; - unsigned int offset = 0; - bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -1291,12 +1264,16 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | 0))) goto out; + skb_reset_network_header(skb); + nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; @@ -1305,42 +1282,48 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (ihl < sizeof(*iph)) goto out; + id = ntohs(iph->id); + proto = iph->protocol; + + /* Warning: after this point, iph might be no longer valid */ if (unlikely(!pskb_may_pull(skb, ihl))) goto out; + __skb_pull(skb, ihl); - tunnel = !!skb->encapsulation; + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) + features = skb->dev->hw_enc_features & netif_skb_features(skb); + SKB_GSO_CB(skb)->encap_level += ihl; - __skb_pull(skb, ihl); skb_reset_transport_header(skb); - iph = ip_hdr(skb); - id = ntohs(iph->id); - proto = iph->protocol; + segs = ERR_PTR(-EPROTONOSUPPORT); - rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) segs = ops->callbacks.gso_segment(skb, features); - rcu_read_unlock(); if (IS_ERR_OR_NULL(segs)) goto out; + udpfrag = !!skb->encapsulation && proto == IPPROTO_UDP; skb = segs; do { - iph = ip_hdr(skb); - if (!tunnel && proto == IPPROTO_UDP) { + iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); + if (udpfrag) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) iph->frag_off |= htons(IP_MF); - offset += (skb->len - skb->mac_len - iph->ihl * 4); - } else { + offset += skb->len - nhoff - ihl; + } else { iph->id = htons(id++); } - iph->tot_len = htons(skb->len - skb->mac_len); - iph->check = 0; - iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); + iph->tot_len = htons(skb->len - nhoff); + ip_send_check(iph); + if (encap) + skb_reset_inner_headers(skb); + skb->network_header = (u8 *)iph - skb->head; } while ((skb = skb->next)); out: @@ -1647,6 +1630,13 @@ static struct packet_offload ip_packet_offload __read_mostly = { }, }; +static const struct net_offload ipip_offload = { + .callbacks = { + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + }, +}; + static int __init ipv4_offload_init(void) { /* @@ -1658,6 +1648,7 @@ static int __init ipv4_offload_init(void) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); + inet_add_offload(&ipip_offload, IPPROTO_IPIP); return 0; } @@ -1706,8 +1697,6 @@ static int __init inet_init(void) ip_static_sysctl_init(); #endif - tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; - /* * Add all the base protocols. */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 109ee89f123e..7785b28061ac 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -121,7 +121,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct aead_givcrypt_request *req; struct scatterlist *sg; struct scatterlist *asg; - struct esp_data *esp; struct sk_buff *trailer; void *tmp; u8 *iv; @@ -139,8 +138,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - esp = x->data; - aead = esp->aead; + aead = x->data; alen = crypto_aead_authsize(aead); tfclen = 0; @@ -154,8 +152,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); - if (esp->padlen) - clen = ALIGN(clen, esp->padlen); plen = clen - skb->len - tfclen; err = skb_cow_data(skb, tfclen + plen + alen, &trailer); @@ -280,8 +276,7 @@ static int esp_input_done2(struct sk_buff *skb, int err) { const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int elen = skb->len - hlen; @@ -376,8 +371,7 @@ static void esp_input_done(struct crypto_async_request *base, int err) static int esp_input(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); @@ -459,9 +453,8 @@ out: static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) { - struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); - u32 align = max_t(u32, blksize, esp->padlen); + struct crypto_aead *aead = x->data; + u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4); unsigned int net_adj; switch (x->props.mode) { @@ -476,8 +469,8 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) BUG(); } - return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + net_adj - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - + net_adj) & ~(blksize - 1)) + net_adj - 2; } static void esp4_err(struct sk_buff *skb, u32 info) @@ -511,18 +504,16 @@ static void esp4_err(struct sk_buff *skb, u32 info) static void esp_destroy(struct xfrm_state *x) { - struct esp_data *esp = x->data; + struct crypto_aead *aead = x->data; - if (!esp) + if (!aead) return; - crypto_free_aead(esp->aead); - kfree(esp); + crypto_free_aead(aead); } static int esp_init_aead(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; int err; @@ -531,7 +522,7 @@ static int esp_init_aead(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; err = crypto_aead_setkey(aead, x->aead->alg_key, (x->aead->alg_key_len + 7) / 8); @@ -548,7 +539,6 @@ error: static int esp_init_authenc(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; @@ -583,7 +573,7 @@ static int esp_init_authenc(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); @@ -638,16 +628,11 @@ error: static int esp_init_state(struct xfrm_state *x) { - struct esp_data *esp; struct crypto_aead *aead; u32 align; int err; - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; - - x->data = esp; + x->data = NULL; if (x->aead) err = esp_init_aead(x); @@ -657,9 +642,7 @@ static int esp_init_state(struct xfrm_state *x) if (err) goto error; - aead = esp->aead; - - esp->padlen = 0; + aead = x->data; x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -683,9 +666,7 @@ static int esp_init_state(struct xfrm_state *x) } align = ALIGN(crypto_aead_blocksize(aead), 4); - if (esp->padlen) - align = max_t(u32, align, esp->padlen); - x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); + x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); error: return err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b3f627ac4ed8..d846304b7b89 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -933,7 +933,6 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) local_bh_disable(); frn->tb_id = tb->tb_id; - rcu_read_lock(); frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); if (!frn->err) { @@ -942,7 +941,6 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) frn->type = res.type; frn->scope = res.scope; } - rcu_read_unlock(); local_bh_enable(); } } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index af0f14aba169..388d113fd289 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -24,21 +24,17 @@ static inline void fib_alias_accessed(struct fib_alias *fa) } /* Exported by fib_semantics.c */ -extern void fib_release_info(struct fib_info *); -extern struct fib_info *fib_create_info(struct fib_config *cfg); -extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); -extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, __be32 dst, - int dst_len, u8 tos, struct fib_info *fi, - unsigned int); -extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info, - unsigned int nlm_flags); -extern struct fib_alias *fib_find_alias(struct list_head *fah, - u8 tos, u32 prio); -extern int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, - int *last_idx, int dflt); +void fib_release_info(struct fib_info *); +struct fib_info *fib_create_info(struct fib_config *cfg); +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); +int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, + u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, + unsigned int); +void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, + u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); +struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); +int fib_detect_death(struct fib_info *fi, int order, + struct fib_info **last_resort, int *last_idx, int dflt); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d5dbca5ecf62..e63f47a4e651 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -380,7 +380,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) } void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info, + int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags) { struct sk_buff *skb; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 736c9fc3ef93..5893e99e8299 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -93,35 +93,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, } EXPORT_SYMBOL_GPL(gre_build_header); -struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) -{ - int err; - - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - - if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - goto error; - skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; - return skb; - } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - return skb; -error: - kfree_skb(skb); - return ERR_PTR(err); -} -EXPORT_SYMBOL_GPL(gre_handle_offloads); - static __sum16 check_checksum(struct sk_buff *skb) { __sum16 csum = 0; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 55e6bfb3a289..e5d436188464 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -39,7 +39,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | - SKB_GSO_GRE))) + SKB_GSO_GRE | + SKB_GSO_IPIP))) goto out; if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c5313a9c019b..bb075fc9a14f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -93,9 +93,6 @@ void inet_frags_init(struct inet_frags *f) } rwlock_init(&f->lock); - f->rnd = (u32) ((totalram_pages ^ (totalram_pages >> 7)) ^ - (jiffies ^ (jiffies >> 6))); - setup_timer(&f->secret_timer, inet_frag_secret_rebuild, (unsigned long)f); f->secret_timer.expires = jiffies + f->secret_interval; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a4b66bbe4f21..8b9cf279450d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -24,6 +24,31 @@ #include <net/secure_seq.h> #include <net/ip.h> +static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) +{ + static u32 inet_ehash_secret __read_mostly; + + net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); + + return __inet_ehashfn(laddr, lport, faddr, fport, + inet_ehash_secret + net_hash_mix(net)); +} + + +static unsigned int inet_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const __be32 laddr = inet->inet_rcv_saddr; + const __u16 lport = inet->inet_num; + const __be32 faddr = inet->inet_daddr; + const __be16 fport = inet->inet_dport; + struct net *net = sock_net(sk); + + return inet_ehashfn(net, laddr, lport, faddr, fport); +} + /* * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b66910aaef4d..2481993a4970 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -106,6 +106,7 @@ struct ip4_create_arg { static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { + net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7d8357bb2ba6..51be64e18e32 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -772,15 +772,20 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + __skb_queue_tail(queue, skb); + } else if (skb_is_gso(skb)) { + goto append; } + skb->ip_summed = CHECKSUM_PARTIAL; + /* specify the length of each IP datagram fragment */ + skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + +append: return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } @@ -805,7 +810,7 @@ static int __ip_append_data(struct sock *sk, int copy; int err; int offset = 0; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; @@ -818,8 +823,10 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; + maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? + mtu : 0xFFFF; - if (cork->length + length > 0xFFFF - fragheaderlen) { + if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu-exthdrlen); return -EMSGSIZE; @@ -1117,7 +1124,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, int mtu; int len; int err; - unsigned int maxfraglen, fragheaderlen, fraggap; + unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize; if (inet->hdrincl) return -EPERM; @@ -1141,8 +1148,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; + maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? + mtu : 0xFFFF; - if (cork->length + size > 0xFFFF - fragheaderlen) { + if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); return -EMSGSIZE; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index c31e3ad98ef2..42ffbc8d65c6 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -116,3 +116,36 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) return 0; } EXPORT_SYMBOL_GPL(iptunnel_pull_header); + +struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, + bool csum_help, + int gso_type_mask) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= gso_type_mask; + return skb; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 91f69bc883fe..5d9c845d288a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -61,8 +61,17 @@ static int vti_rcv(struct sk_buff *skb) iph->saddr, iph->daddr, 0); if (tunnel != NULL) { struct pcpu_tstats *tstats; + u32 oldmark = skb->mark; + int ret; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + + /* temporarily mark the skb with the tunnel o_key, to + * only match policies with this mark. + */ + skb->mark = be32_to_cpu(tunnel->parms.o_key); + ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); + skb->mark = oldmark; + if (!ret) return -1; tstats = this_cpu_ptr(tunnel->dev->tstats); @@ -71,7 +80,6 @@ static int vti_rcv(struct sk_buff *skb) tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - skb->mark = 0; secpath_reset(skb); skb->dev = tunnel->dev; return 1; @@ -103,7 +111,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), + be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, dst, tiph->saddr, 0, 0); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7f80fb4b82d3..fe3e9f7f1f0b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -220,17 +220,17 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto out; ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; tx_error: - dev->stats.tx_errors++; dev_kfree_skb(skb); +out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -275,6 +275,7 @@ static const struct net_device_ops ipip_netdev_ops = { #define IPIP_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ NETIF_F_HW_CSUM) static void ipip_tunnel_setup(struct net_device *dev) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 85a4f21aac1a..59da7cde0724 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -271,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d23118d95ff9..718dfbd30cbe 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -327,6 +327,11 @@ ipt_do_table(struct sk_buff *skb, addend = xt_write_recseq_begin(); private = table->private; cpu = smp_processor_id(); + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a2e2b61cd7da..2510c02c2d21 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -28,6 +28,7 @@ #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <net/netfilter/nf_conntrack.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/checksum.h> #include <net/ip.h> @@ -57,15 +58,21 @@ struct clusterip_config { struct rcu_head rcu; }; -static LIST_HEAD(clusterip_configs); +#ifdef CONFIG_PROC_FS +static const struct file_operations clusterip_proc_fops; +#endif -/* clusterip_lock protects the clusterip_configs list */ -static DEFINE_SPINLOCK(clusterip_lock); +static int clusterip_net_id __read_mostly; + +struct clusterip_net { + struct list_head configs; + /* lock protects the configs list */ + spinlock_t lock; #ifdef CONFIG_PROC_FS -static const struct file_operations clusterip_proc_fops; -static struct proc_dir_entry *clusterip_procdir; + struct proc_dir_entry *procdir; #endif +}; static inline void clusterip_config_get(struct clusterip_config *c) @@ -92,10 +99,13 @@ clusterip_config_put(struct clusterip_config *c) static inline void clusterip_config_entry_put(struct clusterip_config *c) { + struct net *net = dev_net(c->dev); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + local_bh_disable(); - if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { + if (atomic_dec_and_lock(&c->entries, &cn->lock)) { list_del_rcu(&c->list); - spin_unlock(&clusterip_lock); + spin_unlock(&cn->lock); local_bh_enable(); dev_mc_del(c->dev, c->clustermac); @@ -113,11 +123,12 @@ clusterip_config_entry_put(struct clusterip_config *c) } static struct clusterip_config * -__clusterip_config_find(__be32 clusterip) +__clusterip_config_find(struct net *net, __be32 clusterip) { struct clusterip_config *c; + struct clusterip_net *cn = net_generic(net, clusterip_net_id); - list_for_each_entry_rcu(c, &clusterip_configs, list) { + list_for_each_entry_rcu(c, &cn->configs, list) { if (c->clusterip == clusterip) return c; } @@ -126,12 +137,12 @@ __clusterip_config_find(__be32 clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(__be32 clusterip, int entry) +clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) { struct clusterip_config *c; rcu_read_lock_bh(); - c = __clusterip_config_find(clusterip); + c = __clusterip_config_find(net, clusterip); if (c) { if (unlikely(!atomic_inc_not_zero(&c->refcount))) c = NULL; @@ -158,6 +169,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; + struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id); c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) @@ -180,7 +192,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, /* create proc dir entry */ sprintf(buffer, "%pI4", &ip); c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, - clusterip_procdir, + cn->procdir, &clusterip_proc_fops, c); if (!c->pde) { kfree(c); @@ -189,9 +201,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, } #endif - spin_lock_bh(&clusterip_lock); - list_add_rcu(&c->list, &clusterip_configs); - spin_unlock_bh(&clusterip_lock); + spin_lock_bh(&cn->lock); + list_add_rcu(&c->list, &cn->configs); + spin_unlock_bh(&cn->lock); return c; } @@ -370,7 +382,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr, 1); + config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { pr_info("no config found for %pI4, need 'new'\n", @@ -384,7 +396,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - dev = dev_get_by_name(&init_net, e->ip.iniface); + dev = dev_get_by_name(par->net, e->ip.iniface); if (!dev) { pr_info("no such interface %s\n", e->ip.iniface); @@ -492,6 +504,7 @@ arp_mangle(const struct nf_hook_ops *ops, struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; + struct net *net = dev_net(in ? in : out); /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || @@ -508,7 +521,7 @@ arp_mangle(const struct nf_hook_ops *ops, /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip, 0); + c = clusterip_config_find_get(net, payload->src_ip, 0); if (!c) return NF_ACCEPT; @@ -698,48 +711,75 @@ static const struct file_operations clusterip_proc_fops = { #endif /* CONFIG_PROC_FS */ +static int clusterip_net_init(struct net *net) +{ + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + + INIT_LIST_HEAD(&cn->configs); + + spin_lock_init(&cn->lock); + +#ifdef CONFIG_PROC_FS + cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); + if (!cn->procdir) { + pr_err("Unable to proc dir entry\n"); + return -ENOMEM; + } +#endif /* CONFIG_PROC_FS */ + + return 0; +} + +static void clusterip_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + proc_remove(cn->procdir); +#endif +} + +static struct pernet_operations clusterip_net_ops = { + .init = clusterip_net_init, + .exit = clusterip_net_exit, + .id = &clusterip_net_id, + .size = sizeof(struct clusterip_net), +}; + static int __init clusterip_tg_init(void) { int ret; - ret = xt_register_target(&clusterip_tg_reg); + ret = register_pernet_subsys(&clusterip_net_ops); if (ret < 0) return ret; + ret = xt_register_target(&clusterip_tg_reg); + if (ret < 0) + goto cleanup_subsys; + ret = nf_register_hook(&cip_arp_ops); if (ret < 0) goto cleanup_target; -#ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); - if (!clusterip_procdir) { - pr_err("Unable to proc dir entry\n"); - ret = -ENOMEM; - goto cleanup_hook; - } -#endif /* CONFIG_PROC_FS */ - pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); + return 0; -#ifdef CONFIG_PROC_FS -cleanup_hook: - nf_unregister_hook(&cip_arp_ops); -#endif /* CONFIG_PROC_FS */ cleanup_target: xt_unregister_target(&clusterip_tg_reg); +cleanup_subsys: + unregister_pernet_subsys(&clusterip_net_ops); return ret; } static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); -#ifdef CONFIG_PROC_FS - proc_remove(clusterip_procdir); -#endif + nf_unregister_hook(&cip_arp_ops); xt_unregister_target(&clusterip_tg_reg); + unregister_pernet_subsys(&clusterip_net_ops); /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ rcu_barrier_bh(); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index cbc22158af49..9cb993cd224b 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* We might not have a timestamp, get one */ if (skb->tstamp.tv64 == 0) @@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net, } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - else - *(pm->prefix) = '\0'; if (in && in->hard_header_len > 0 && skb->mac_header != skb->network_header && @@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net, if (in) strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - else - pm->indev_name[0] = '\0'; if (out) strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - else - pm->outdev_name[0] = '\0'; /* copy_len <= skb->len, so can't fail. */ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6011615e810d..d2d325382b13 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -295,7 +295,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", dst_entries_get_slow(&ipv4_dst_ops), - st->in_hit, + 0, /* st->in_hit */ st->in_slow_tot, st->in_slow_mc, st->in_no_route, @@ -303,16 +303,16 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) st->in_martian_dst, st->in_martian_src, - st->out_hit, + 0, /* st->out_hit */ st->out_slow_tot, st->out_slow_mc, - st->gc_total, - st->gc_ignored, - st->gc_goal_miss, - st->gc_dst_overflow, - st->in_hlist_search, - st->out_hlist_search + 0, /* st->gc_total */ + 0, /* st->gc_ignored */ + 0, /* st->gc_goal_miss */ + 0, /* st->gc_dst_overflow */ + 0, /* st->in_hlist_search */ + 0 /* st->out_hlist_search */ ); return 0; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3b64c59b4109..b95331e6c077 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -25,15 +25,7 @@ extern int sysctl_tcp_syncookies; -__u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; -EXPORT_SYMBOL(syncookie_secret); - -static __init int init_syncookies(void) -{ - get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); - return 0; -} -__initcall(init_syncookies); +static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) @@ -44,8 +36,11 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u32 count, int c) { - __u32 *tmp = __get_cpu_var(ipv4_cookie_scratch); + __u32 *tmp; + + net_get_random_once(syncookie_secret, sizeof(syncookie_secret)); + tmp = __get_cpu_var(ipv4_cookie_scratch); memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c])); tmp[0] = (__force u32)saddr; tmp[1] = (__force u32)daddr; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c08f096d46b5..d5b1390eebbe 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -200,49 +200,6 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl, return ret; } -static int ipv4_tcp_mem(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) -{ - int ret; - unsigned long vec[3]; - struct net *net = current->nsproxy->net_ns; -#ifdef CONFIG_MEMCG_KMEM - struct mem_cgroup *memcg; -#endif - - struct ctl_table tmp = { - .data = &vec, - .maxlen = sizeof(vec), - .mode = ctl->mode, - }; - - if (!write) { - ctl->data = &net->ipv4.sysctl_tcp_mem; - return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos); - } - - ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); - if (ret) - return ret; - -#ifdef CONFIG_MEMCG_KMEM - rcu_read_lock(); - memcg = mem_cgroup_from_task(current); - - tcp_prot_mem(memcg, vec[0], 0); - tcp_prot_mem(memcg, vec[1], 1); - tcp_prot_mem(memcg, vec[2], 2); - rcu_read_unlock(); -#endif - - net->ipv4.sysctl_tcp_mem[0] = vec[0]; - net->ipv4.sysctl_tcp_mem[1] = vec[1]; - net->ipv4.sysctl_tcp_mem[2] = vec[2]; - - return 0; -} - static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -274,6 +231,11 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, ret = -EINVAL; goto bad_key; } + /* Generate a dummy secret but don't publish it. This + * is needed so we don't regenerate a new key on the + * first invocation of tcp_fastopen_cookie_gen + */ + tcp_fastopen_init_key_once(false); tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); } @@ -552,6 +514,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { + .procname = "tcp_mem", + .maxlen = sizeof(sysctl_tcp_mem), + .data = &sysctl_tcp_mem, + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { .procname = "tcp_wmem", .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), @@ -860,12 +829,6 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_local_port_range, }, - { - .procname = "tcp_mem", - .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), - .mode = 0644, - .proc_handler = ipv4_tcp_mem, - }, { } }; @@ -875,32 +838,15 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table = ipv4_net_table; if (!net_eq(net, &init_net)) { + int i; + table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); if (table == NULL) goto err_alloc; - table[0].data = - &net->ipv4.sysctl_icmp_echo_ignore_all; - table[1].data = - &net->ipv4.sysctl_icmp_echo_ignore_broadcasts; - table[2].data = - &net->ipv4.sysctl_icmp_ignore_bogus_error_responses; - table[3].data = - &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr; - table[4].data = - &net->ipv4.sysctl_icmp_ratelimit; - table[5].data = - &net->ipv4.sysctl_icmp_ratemask; - table[6].data = - &net->ipv4.sysctl_ping_group_range; - table[7].data = - &net->ipv4.sysctl_tcp_ecn; - table[8].data = - &net->ipv4.sysctl_local_ports.range; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; + /* Update the variables to point into the current struct net */ + for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) + table[i].data += (void *)net - (void *)&init_net; } /* @@ -917,8 +863,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_local_ports.range[0] = 32768; net->ipv4.sysctl_local_ports.range[1] = 61000; - tcp_init_mem(net); - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index be4b161802e8..8e8529d3c8c9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -288,9 +288,11 @@ int sysctl_tcp_min_tso_segs __read_mostly = 2; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); +long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; +EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); @@ -3097,13 +3099,13 @@ static int __init set_thash_entries(char *str) } __setup("thash_entries=", set_thash_entries); -void tcp_init_mem(struct net *net) +static void tcp_init_mem(void) { unsigned long limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); - net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; - net->ipv4.sysctl_tcp_mem[1] = limit; - net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; + sysctl_tcp_mem[0] = limit / 4 * 3; + sysctl_tcp_mem[1] = limit; + sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; } void __init tcp_init(void) @@ -3165,7 +3167,7 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - tcp_init_mem(&init_net); + tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); max_wshare = min(4UL*1024*1024, limit); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ab7bd35bb312..766032b4a6c3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -14,6 +14,20 @@ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); +void tcp_fastopen_init_key_once(bool publish) +{ + static u8 key[TCP_FASTOPEN_KEY_LENGTH]; + + /* tcp_fastopen_reset_cipher publishes the new context + * atomically, so we allow this race happening here. + * + * All call sites of tcp_fastopen_cookie_gen also check + * for a valid cookie, so this is an acceptable risk. + */ + if (net_get_random_once(key, sizeof(key)) && publish) + tcp_fastopen_reset_cipher(key, sizeof(key)); +} + static void tcp_fastopen_ctx_free(struct rcu_head *head) { struct tcp_fastopen_context *ctx = @@ -70,6 +84,8 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, __be32 path[4] = { src, dst, 0, 0 }; struct tcp_fastopen_context *ctx; + tcp_fastopen_init_key_once(true); + rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { @@ -78,14 +94,3 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, } rcu_read_unlock(); } - -static int __init tcp_fastopen_init(void) -{ - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; - - get_random_bytes(key, sizeof(key)); - tcp_fastopen_reset_cipher(key, sizeof(key)); - return 0; -} - -late_initcall(tcp_fastopen_init); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb651a069a6c..63095b218b4a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2903,7 +2903,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) + if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + flag & FLAG_ACKED) seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; if (seq_rtt < 0) @@ -2918,14 +2919,19 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, } /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ -static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) +static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) { struct tcp_sock *tp = tcp_sk(sk); s32 seq_rtt = -1; - if (tp->lsndtime && !tp->total_retrans) - seq_rtt = tcp_time_stamp - tp->lsndtime; - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + if (synack_stamp && !tp->total_retrans) + seq_rtt = tcp_time_stamp - synack_stamp; + + /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets + * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() + */ + if (!tp->srtt) + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); } static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) @@ -3028,6 +3034,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, s32 seq_rtt = -1; s32 ca_seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); + bool rtt_update; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); @@ -3104,14 +3111,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || - (flag & FLAG_ACKED)) - tcp_rearm_rto(sk); + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tcp_rearm_rto(sk); if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { tcp_mtup_probe_success(sk); @@ -3150,6 +3156,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_ops->pkts_acked(sk, pkts_acked, rtt_us); } + } else if (skb && rtt_update && sack_rtt >= 0 && + sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { + /* Do not re-arm RTO if the sack RTT is measured from data sent + * after when the head was last (re)transmitted. Otherwise the + * timeout may continue to extend in loss recovery. + */ + tcp_rearm_rto(sk); } #if FASTRETRANS_DEBUG > 0 @@ -3338,7 +3351,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tcp_init_cwnd_reduction(sk, true); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_Open); + tcp_try_keep_open(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBERECOVERY); } @@ -5626,6 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct request_sock *req; int queued = 0; bool acceptable; + u32 synack_stamp; tp->rx_opt.saw_tstamp = 0; @@ -5708,9 +5722,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * so release it. */ if (req) { + synack_stamp = tcp_rsk(req)->snt_synack; tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false); } else { + synack_stamp = tp->lsndtime; /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); tcp_init_congestion_control(sk); @@ -5733,7 +5749,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_synack_rtt_meas(sk, req); + tcp_synack_rtt_meas(sk, synack_stamp); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; @@ -5751,6 +5767,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } else tcp_init_metrics(sk); + tcp_update_pacing_rate(sk); + /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 114d1b748cbb..300ab2c93f29 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2749,6 +2749,7 @@ struct proto tcp_prot = { .orphan_count = &tcp_orphan_count, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, + .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 559d4ae6ebf4..03e9154f7e68 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,15 +6,10 @@ #include <linux/memcontrol.h> #include <linux/module.h> -static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) -{ - return container_of(cg_proto, struct tcp_memcontrol, cg_proto); -} - static void memcg_tcp_enter_memory_pressure(struct sock *sk) { if (sk->sk_cgrp->memory_pressure) - *sk->sk_cgrp->memory_pressure = 1; + sk->sk_cgrp->memory_pressure = 1; } EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); @@ -27,34 +22,24 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) */ struct res_counter *res_parent = NULL; struct cg_proto *cg_proto, *parent_cg; - struct tcp_memcontrol *tcp; struct mem_cgroup *parent = parent_mem_cgroup(memcg); - struct net *net = current->nsproxy->net_ns; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return 0; - tcp = tcp_from_cgproto(cg_proto); - - tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; - tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; - tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; - tcp->tcp_memory_pressure = 0; + cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; + cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; + cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; + cg_proto->memory_pressure = 0; + cg_proto->memcg = memcg; parent_cg = tcp_prot.proto_cgroup(parent); if (parent_cg) - res_parent = parent_cg->memory_allocated; - - res_counter_init(&tcp->tcp_memory_allocated, res_parent); - percpu_counter_init(&tcp->tcp_sockets_allocated, 0); + res_parent = &parent_cg->memory_allocated; - cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; - cg_proto->memory_pressure = &tcp->tcp_memory_pressure; - cg_proto->sysctl_mem = tcp->tcp_prot_mem; - cg_proto->memory_allocated = &tcp->tcp_memory_allocated; - cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; - cg_proto->memcg = memcg; + res_counter_init(&cg_proto->memory_allocated, res_parent); + percpu_counter_init(&cg_proto->sockets_allocated, 0); return 0; } @@ -63,21 +48,17 @@ EXPORT_SYMBOL(tcp_init_cgroup); void tcp_destroy_cgroup(struct mem_cgroup *memcg) { struct cg_proto *cg_proto; - struct tcp_memcontrol *tcp; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return; - tcp = tcp_from_cgproto(cg_proto); - percpu_counter_destroy(&tcp->tcp_sockets_allocated); + percpu_counter_destroy(&cg_proto->sockets_allocated); } EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { - struct net *net = current->nsproxy->net_ns; - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; u64 old_lim; int i; @@ -90,16 +71,14 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - tcp = tcp_from_cgproto(cg_proto); - - old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); - ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); + old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); + ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; for (i = 0; i < 3; i++) - tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, - net->ipv4.sysctl_tcp_mem[i]); + cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT, + sysctl_tcp_mem[i]); if (val == RES_COUNTER_MAX) clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); @@ -156,28 +135,24 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) { - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return default_val; - tcp = tcp_from_cgproto(cg_proto); - return res_counter_read_u64(&tcp->tcp_memory_allocated, type); + return res_counter_read_u64(&cg_proto->memory_allocated, type); } static u64 tcp_read_usage(struct mem_cgroup *memcg) { - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; - tcp = tcp_from_cgproto(cg_proto); - return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); + return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE); } static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) @@ -205,54 +180,25 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) { struct mem_cgroup *memcg; - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; memcg = mem_cgroup_from_css(css); cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return 0; - tcp = tcp_from_cgproto(cg_proto); switch (event) { case RES_MAX_USAGE: - res_counter_reset_max(&tcp->tcp_memory_allocated); + res_counter_reset_max(&cg_proto->memory_allocated); break; case RES_FAILCNT: - res_counter_reset_failcnt(&tcp->tcp_memory_allocated); + res_counter_reset_failcnt(&cg_proto->memory_allocated); break; } return 0; } -unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) -{ - struct tcp_memcontrol *tcp; - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); - if (!cg_proto) - return 0; - - tcp = tcp_from_cgproto(cg_proto); - return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); -} - -void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) -{ - struct tcp_memcontrol *tcp; - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) - return; - - tcp = tcp_from_cgproto(cg_proto); - - tcp->tcp_prot_mem[idx] = val; -} - static struct cftype tcp_files[] = { { .name = "kmem.tcp.limit_in_bytes", diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4a2a84110dfb..2ab09cbae74d 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -671,8 +671,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; write_seqlock_bh(&fastopen_seqlock); - tfom->mss = mss; - if (cookie->len > 0) + if (mss) + tfom->mss = mss; + if (cookie && cookie->len > 0) tfom->cookie = *cookie; if (syn_lost) { ++tfom->syn_loss; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 3a7525e6c086..a2b68a108eae 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -14,10 +14,11 @@ #include <net/tcp.h> #include <net/protocol.h> -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, +struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int sum_truesize = 0; struct tcphdr *th; unsigned int thlen; unsigned int seq; @@ -56,6 +57,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | 0) || @@ -102,13 +105,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { skb->destructor = gso_skb->destructor; skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; + sum_truesize += skb->truesize; } skb = skb->next; th = tcp_hdr(skb); @@ -125,7 +122,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); + sum_truesize += skb->truesize; + atomic_add(sum_truesize - gso_skb->truesize, + &skb->sk->sk_wmem_alloc); } delta = htonl(oldlen + (skb_tail_pointer(skb) - @@ -139,7 +138,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, out: return segs; } -EXPORT_SYMBOL(tcp_tso_segment); +EXPORT_SYMBOL(tcp_gso_segment); struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -320,7 +319,7 @@ static int tcp4_gro_complete(struct sk_buff *skb) static const struct net_offload tcpv4_offload = { .callbacks = { .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, + .gso_segment = tcp_gso_segment, .gro_receive = tcp4_gro_receive, .gro_complete = tcp4_gro_complete, }, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e5ce0e1d13b7..672854664ff5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -986,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk) || - skb->ip_summed == CHECKSUM_NONE) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + + if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1067,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -2344,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } @@ -2351,21 +2353,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tcp_retrans_try_collapse(sk, skb, cur_mss); - /* Some Solaris stacks overoptimize and ignore the FIN on a - * retransmit when old data is attached. So strip it off - * since it is cheap to do so and saves bytes on the network. - */ - if (skb->len > 0 && - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && - tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { - if (!pskb_trim(skb, 0)) { - /* Reuse, even though it does some unnecessary work */ - tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1, - TCP_SKB_CB(skb)->tcp_flags); - skb->ip_summed = CHECKSUM_NONE; - } - } - /* Make a copy, if the first transmission SKB clone we made * is still in somebody's hands, else make a clone. */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index af07b5b23ebf..64f0354c84c7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -156,12 +156,16 @@ static bool retransmits_timed_out(struct sock *sk, static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); int retry_until; bool do_reset, syn_set = false; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (icsk->icsk_retransmits) + if (icsk->icsk_retransmits) { dst_negative_advice(sk); + if (tp->syn_fastopen || tp->syn_data) + tcp_fastopen_cache_set(sk, 0, NULL, true); + } retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; syn_set = true; } else { diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 6c0eea2f8249..0531b99d8637 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -15,10 +15,10 @@ struct vegas { u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ }; -extern void tcp_vegas_init(struct sock *sk); -extern void tcp_vegas_state(struct sock *sk, u8 ca_state); -extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); -extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); -extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); +void tcp_vegas_init(struct sock *sk); +void tcp_vegas_state(struct sock *sk, u8 ca_state); +void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); +void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); +void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); #endif /* __TCP_VEGAS_H */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9f27bb800607..89909dd730dd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -407,6 +407,18 @@ static inline int compute_score2(struct sock *sk, struct net *net, return score; } +static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) +{ + static u32 udp_ehash_secret __read_mostly; + + net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); + + return __inet_ehashfn(laddr, lport, faddr, fport, + udp_ehash_secret + net_hash_mix(net)); +} + /* called with read_rcu_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, @@ -430,8 +442,8 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -511,8 +523,8 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 5a681e298b90..f3c27899f62b 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -5,30 +5,30 @@ #include <net/protocol.h> #include <net/inet_common.h> -extern int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int ); -extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); +int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int); +void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); -extern int udp_v4_get_port(struct sock *sk, unsigned short snum); +int udp_v4_get_port(struct sock *sk, unsigned short snum); -extern int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -extern int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); +int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); +int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); #ifdef CONFIG_COMPAT -extern int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -extern int compat_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); +int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); +int compat_udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); #endif -extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); -extern int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags); -extern int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -extern void udp_destroy_sock(struct sock *sk); +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len); +int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, + int flags); +int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS -extern int udp4_seq_show(struct seq_file *seq, void *v); +int udp4_seq_show(struct seq_file *seq, void *v); #endif #endif /* _UDP4_IMPL_H */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f35eccaa855e..83206de2bc76 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -52,6 +52,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_IPIP | SKB_GSO_GRE | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9a459be24af7..e1a63930a967 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -104,9 +104,14 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) const struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; + fl4->flowi4_oif = reverse ? skb->skb_iif : oif; if (!ip_is_fragment(iph)) { switch (iph->protocol) { @@ -235,7 +240,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 11b13ea69db4..d92e5586783e 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -21,24 +21,6 @@ menuconfig IPV6 if IPV6 -config IPV6_PRIVACY - bool "IPv6: Privacy Extensions (RFC 3041) support" - ---help--- - Privacy Extensions for Stateless Address Autoconfiguration in IPv6 - support. With this option, additional periodically-altered - pseudo-random global-scope unicast address(es) will be assigned to - your interface(s). - - We use our standard pseudo-random algorithm to generate the - randomized interface identifier, instead of one described in RFC 3041. - - By default the kernel does not generate temporary addresses. - To use temporary addresses, do - - echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr - - See <file:Documentation/networking/ip-sysctl.txt> for details. - config IPV6_ROUTER_PREF bool "IPv6: Router Preference (RFC 4191) support" ---help--- @@ -153,6 +135,17 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION ---help--- Support for MIPv6 route optimization mode. +config IPV6_VTI +tristate "Virtual (secure) IPv6: tunneling" + select IPV6_TUNNEL + depends on INET6_XFRM_MODE_TUNNEL + ---help--- + Tunneling means encapsulating data of one protocol type within + another protocol and sending it over a channel that understands the + encapsulating protocol. This can be used with xfrm mode tunnel to give + the notion of a secure tunnel for IPSEC and then use routing protocol + on top. + config IPV6_SIT tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" select INET_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 470a9c008e9b..17bb830872db 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb301da38..542d09561ed6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -83,11 +83,7 @@ #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> #include <linux/netconf.h> - -#ifdef CONFIG_IPV6_PRIVACY #include <linux/random.h> -#endif - #include <linux/uaccess.h> #include <asm/unaligned.h> @@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -#ifdef CONFIG_IPV6_PRIVACY static void __ipv6_regen_rndid(struct inet6_dev *idev); static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); -#endif static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); @@ -183,13 +177,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, -#endif .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_pinfo = 1, @@ -221,13 +213,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, -#endif .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_pinfo = 1, @@ -371,7 +361,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) } #endif -#ifdef CONFIG_IPV6_PRIVACY INIT_LIST_HEAD(&ndev->tempaddr_list); setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || @@ -384,7 +373,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) in6_dev_hold(ndev); ipv6_regen_rndid((unsigned long) ndev); } -#endif + ndev->token = in6addr_any; if (netif_running(dev) && addrconf_qdisc_ok(dev)) @@ -865,12 +854,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); -#ifdef CONFIG_IPV6_PRIVACY if (ifa->flags&IFA_F_TEMPORARY) { list_add(&ifa->tmp_list, &idev->tempaddr_list); in6_ifa_hold(ifa); } -#endif in6_ifa_hold(ifa); write_unlock(&idev->lock); @@ -913,7 +900,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) spin_unlock_bh(&addrconf_hash_lock); write_lock_bh(&idev->lock); -#ifdef CONFIG_IPV6_PRIVACY + if (ifp->flags&IFA_F_TEMPORARY) { list_del(&ifp->tmp_list); if (ifp->ifpub) { @@ -922,7 +909,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } __in6_ifa_put(ifp); } -#endif list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { if (ifa == ifp) { @@ -1013,7 +999,6 @@ out: in6_ifa_put(ifp); } -#ifdef CONFIG_IPV6_PRIVACY static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) { struct inet6_dev *idev = ifp->idev; @@ -1116,7 +1101,6 @@ retry: out: return ret; } -#endif /* * Choose an appropriate source address (RFC3484) @@ -1131,9 +1115,7 @@ enum { #endif IPV6_SADDR_RULE_OIF, IPV6_SADDR_RULE_LABEL, -#ifdef CONFIG_IPV6_PRIVACY IPV6_SADDR_RULE_PRIVACY, -#endif IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, IPV6_SADDR_RULE_MAX @@ -1247,7 +1229,6 @@ static int ipv6_get_saddr_eval(struct net *net, &score->ifa->addr, score->addr_type, score->ifa->idev->dev->ifindex) == dst->label; break; -#ifdef CONFIG_IPV6_PRIVACY case IPV6_SADDR_RULE_PRIVACY: { /* Rule 7: Prefer public address @@ -1259,7 +1240,6 @@ static int ipv6_get_saddr_eval(struct net *net, ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; } -#endif case IPV6_SADDR_RULE_ORCHID: /* Rule 8-: Prefer ORCHID vs ORCHID or * non-ORCHID vs non-ORCHID @@ -1588,7 +1568,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); -#ifdef CONFIG_IPV6_PRIVACY } else if (ifp->flags&IFA_F_TEMPORARY) { struct inet6_ifaddr *ifpub; spin_lock_bh(&ifp->lock); @@ -1602,7 +1581,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); -#endif } else ipv6_del_addr(ifp); } @@ -1851,7 +1829,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) return err; } -#ifdef CONFIG_IPV6_PRIVACY /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ static void __ipv6_regen_rndid(struct inet6_dev *idev) { @@ -1919,7 +1896,6 @@ static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) __ipv6_regen_rndid(idev); } -#endif /* * Add prefix route. @@ -2207,9 +2183,7 @@ ok: if (ifp) { int flags; unsigned long now; -#ifdef CONFIG_IPV6_PRIVACY struct inet6_ifaddr *ift; -#endif u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ @@ -2250,7 +2224,6 @@ ok: } else spin_unlock(&ifp->lock); -#ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ list_for_each_entry(ift, &in6_dev->tempaddr_list, @@ -2315,7 +2288,7 @@ ok: } else { read_unlock_bh(&in6_dev->lock); } -#endif + in6_ifa_put(ifp); addrconf_verify(0); } @@ -2995,7 +2968,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); -#ifdef CONFIG_IPV6_PRIVACY if (how && del_timer(&idev->regen_timer)) in6_dev_put(idev); @@ -3015,7 +2987,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) in6_ifa_put(ifa); write_lock_bh(&idev->lock); } -#endif while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, @@ -3528,7 +3499,6 @@ restart: in6_ifa_put(ifp); goto restart; } -#ifdef CONFIG_IPV6_PRIVACY } else if ((ifp->flags&IFA_F_TEMPORARY) && !(ifp->flags&IFA_F_TENTATIVE)) { unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * @@ -3556,7 +3526,6 @@ restart: } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; spin_unlock(&ifp->lock); -#endif } else { /* ifp->prefered_lft <= ifp->valid_lft */ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) @@ -4128,13 +4097,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval); array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval); -#ifdef CONFIG_IPV6_PRIVACY array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; -#endif array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; @@ -4828,7 +4795,6 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, -#ifdef CONFIG_IPV6_PRIVACY { .procname = "use_tempaddr", .data = &ipv6_devconf.use_tempaddr, @@ -4864,7 +4830,6 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "max_addresses", .data = &ipv6_devconf.max_addresses, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a2cb07cd3850..6468bda1f2b9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -110,11 +110,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; - if (sock->type != SOCK_RAW && - sock->type != SOCK_DGRAM && - !inet_ehash_secret) - build_ehash_secret(); - /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; @@ -870,8 +865,6 @@ static int __init inet6_init(void) if (err) goto out_sock_register_fail; - tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; - /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 73784c3d4642..82e1da3a40b9 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -618,8 +618,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d3618a78fcac..b8719df0366e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -164,10 +164,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) u8 *iv; u8 *tail; __be32 *seqhi; - struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ - aead = esp->aead; + aead = x->data; alen = crypto_aead_authsize(aead); tfclen = 0; @@ -181,8 +180,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); - if (esp->padlen) - clen = ALIGN(clen, esp->padlen); plen = clen - skb->len - tfclen; err = skb_cow_data(skb, tfclen + plen + alen, &trailer); @@ -271,8 +268,7 @@ error: static int esp_input_done2(struct sk_buff *skb, int err) { struct xfrm_state *x = xfrm_input_state(skb); - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int elen = skb->len - hlen; @@ -325,8 +321,7 @@ static void esp_input_done(struct crypto_async_request *base, int err) static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); @@ -414,9 +409,8 @@ out: static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) { - struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); - u32 align = max_t(u32, blksize, esp->padlen); + struct crypto_aead *aead = x->data; + u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4); unsigned int net_adj; if (x->props.mode != XFRM_MODE_TUNNEL) @@ -424,8 +418,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) else net_adj = 0; - return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + net_adj - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - + net_adj) & ~(blksize - 1)) + net_adj - 2; } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -436,8 +430,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; @@ -455,18 +448,16 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static void esp6_destroy(struct xfrm_state *x) { - struct esp_data *esp = x->data; + struct crypto_aead *aead = x->data; - if (!esp) + if (!aead) return; - crypto_free_aead(esp->aead); - kfree(esp); + crypto_free_aead(aead); } static int esp_init_aead(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; int err; @@ -475,7 +466,7 @@ static int esp_init_aead(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; err = crypto_aead_setkey(aead, x->aead->alg_key, (x->aead->alg_key_len + 7) / 8); @@ -492,7 +483,6 @@ error: static int esp_init_authenc(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; @@ -527,7 +517,7 @@ static int esp_init_authenc(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); @@ -582,7 +572,6 @@ error: static int esp6_init_state(struct xfrm_state *x) { - struct esp_data *esp; struct crypto_aead *aead; u32 align; int err; @@ -590,11 +579,7 @@ static int esp6_init_state(struct xfrm_state *x) if (x->encap) return -EINVAL; - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; - - x->data = esp; + x->data = NULL; if (x->aead) err = esp_init_aead(x); @@ -604,9 +589,7 @@ static int esp6_init_state(struct xfrm_state *x) if (err) goto error; - aead = esp->aead; - - esp->padlen = 0; + aead = x->data; x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -626,9 +609,7 @@ static int esp6_init_state(struct xfrm_state *x) } align = ALIGN(crypto_aead_blocksize(aead), 4); - if (esp->padlen) - align = max_t(u32, align, esp->padlen); - x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); + x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); error: return err; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 842d833dfc18..262e13c02ec2 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,6 +23,39 @@ #include <net/secure_seq.h> #include <net/ip.h> +static unsigned int inet6_ehashfn(struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) +{ + static u32 inet6_ehash_secret __read_mostly; + static u32 ipv6_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); + net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + + return __inet6_ehashfn(lhash, lport, fhash, fport, + inet6_ehash_secret + net_hash_mix(net)); +} + +static int inet6_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *faddr = &sk->sk_v6_daddr; + const __u16 lport = inet->inet_num; + const __be16 fport = inet->inet_dport; + struct net *net = sock_net(sk); + + return inet6_ehashfn(net, laddr, lport, faddr, fport); +} + int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1ef1fa2b22a6..bf4a9a084de5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -976,6 +976,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) if (t->parms.o_flags&GRE_SEQ) addend += 4; } + t->hlen = addend; if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & @@ -1002,8 +1003,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) } ip6_rt_put(rt); } - - t->hlen = addend; } static int ip6gre_tnl_change(struct ip6_tnl *t, diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d82de7228100..4b851692b1f6 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -66,7 +66,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb) __skb_pull(skb, sizeof(*ipv6h)); err = -EPROTONOSUPPORT; - rcu_read_lock(); ops = rcu_dereference(inet6_offloads[ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); @@ -74,7 +73,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb) skb_reset_transport_header(skb); err = ops->callbacks.gso_send_check(skb); } - rcu_read_unlock(); out: return err; @@ -92,46 +90,58 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u8 *prevhdr; int offset = 0; bool tunnel; + int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; + skb_reset_network_header(skb); + nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; - tunnel = skb->encapsulation; + tunnel = SKB_GSO_CB(skb)->encap_level > 0; + if (tunnel) + features = skb->dev->hw_enc_features & netif_skb_features(skb); + SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); + ipv6h = ipv6_hdr(skb); __skb_pull(skb, sizeof(*ipv6h)); segs = ERR_PTR(-EPROTONOSUPPORT); proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); } - rcu_read_unlock(); if (IS_ERR(segs)) goto out; for (skb = segs; skb; skb = skb->next) { - ipv6h = ipv6_hdr(skb); - ipv6h->payload_len = htons(skb->len - skb->mac_len - - sizeof(*ipv6h)); + ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); + ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); + if (tunnel) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb->network_header = (u8 *)ipv6h - skb->head; + if (!tunnel && proto == IPPROTO_UDP) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - fptr = (struct frag_hdr *)(skb_network_header(skb) + - unfrag_ip6hlen); + fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); if (skb->next != NULL) fptr->frag_off |= htons(IP6_MF); @@ -267,6 +277,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { }, }; +static const struct net_offload sit_offload = { + .callbacks = { + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, + }, +}; + static int __init ipv6_offload_init(void) { @@ -278,6 +295,9 @@ static int __init ipv6_offload_init(void) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); dev_add_offload(&ipv6_packet_offload); + + inet_add_offload(&sit_offload, IPPROTO_IPV6); + return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a54c45ce4a48..91fb4e8212f5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + nexthop = rt6_nexthop((struct rt6_info *)dst); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -874,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); @@ -1008,6 +1008,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, { struct sk_buff *skb; + struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1015,8 +1016,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { - struct frag_hdr fhdr; - skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1036,20 +1035,24 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->transport_header = skb->network_header + fragheaderlen; skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); + } else if (skb_is_gso(skb)) { + goto append; } + skb->ip_summed = CHECKSUM_PARTIAL; + /* Specify the length of each IPv6 datagram fragment. + * It has to be a multiple of 8. + */ + skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - + sizeof(struct frag_hdr)) & ~7; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + ipv6_select_ident(&fhdr, rt); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append: return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c new file mode 100644 index 000000000000..ed94ba61dda0 --- /dev/null +++ b/net/ipv6/ip6_vti.c @@ -0,0 +1,1056 @@ +/* + * IPv6 virtual tunneling interface + * + * Copyright (C) 2013 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + * + * Based on: + * net/ipv6/ip6_tunnel.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/capability.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/sockios.h> +#include <linux/icmp.h> +#include <linux/if.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/if_tunnel.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/icmpv6.h> +#include <linux/init.h> +#include <linux/route.h> +#include <linux/rtnetlink.h> +#include <linux/netfilter_ipv6.h> +#include <linux/slab.h> +#include <linux/hash.h> + +#include <linux/uaccess.h> +#include <linux/atomic.h> + +#include <net/icmp.h> +#include <net/ip.h> +#include <net/ip_tunnels.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/ip6_tunnel.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + + return hash_32(hash, HASH_SIZE_SHIFT); +} + +static int vti6_dev_init(struct net_device *dev); +static void vti6_dev_setup(struct net_device *dev); +static struct rtnl_link_ops vti6_link_ops __read_mostly; + +static int vti6_net_id __read_mostly; +struct vti6_net { + /* the vti6 tunnel fallback device */ + struct net_device *fb_tnl_dev; + /* lists for storing tunnels in use */ + struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_wc[1]; + struct ip6_tnl __rcu **tnls[2]; +}; + +static struct net_device_stats *vti6_get_stats(struct net_device *dev) +{ + struct pcpu_tstats sum = { 0 }; + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + + sum.rx_packets += tstats->rx_packets; + sum.rx_bytes += tstats->rx_bytes; + sum.tx_packets += tstats->tx_packets; + sum.tx_bytes += tstats->tx_bytes; + } + dev->stats.rx_packets = sum.rx_packets; + dev->stats.rx_bytes = sum.rx_bytes; + dev->stats.tx_packets = sum.tx_packets; + dev->stats.tx_bytes = sum.tx_bytes; + return &dev->stats; +} + +#define for_each_vti6_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +/** + * vti6_tnl_lookup - fetch tunnel matching the end-point addresses + * @net: network namespace + * @remote: the address of the tunnel exit-point + * @local: the address of the tunnel entry-point + * + * Return: + * tunnel matching given end-points if found, + * else fallback tunnel if its device is up, + * else %NULL + **/ +static struct ip6_tnl * +vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, + const struct in6_addr *local) +{ + unsigned int hash = HASH(remote, local); + struct ip6_tnl *t; + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); + if (t && (t->dev->flags & IFF_UP)) + return t; + + return NULL; +} + +/** + * vti6_tnl_bucket - get head of list matching given tunnel parameters + * @p: parameters containing tunnel end-points + * + * Description: + * vti6_tnl_bucket() returns the head of the list matching the + * &struct in6_addr entries laddr and raddr in @p. + * + * Return: head of IPv6 tunnel list + **/ +static struct ip6_tnl __rcu ** +vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = 0; + int prio = 0; + + if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { + prio = 1; + h = HASH(remote, local); + } + return &ip6n->tnls[prio][h]; +} + +static void +vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms); + + rcu_assign_pointer(t->next , rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +static void +vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + for (tp = vti6_tnl_bucket(ip6n, &t->parms); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +static void vti6_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + +static int vti6_tnl_create2(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + int err; + + err = vti6_dev_init(dev); + if (err < 0) + goto out; + + err = register_netdevice(dev); + if (err < 0) + goto out; + + strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &vti6_link_ops; + + dev_hold(dev); + vti6_tnl_link(ip6n, t); + + return 0; + +out: + return err; +} + +static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) +{ + struct net_device *dev; + struct ip6_tnl *t; + char name[IFNAMSIZ]; + int err; + + if (p->name[0]) + strlcpy(name, p->name, IFNAMSIZ); + else + sprintf(name, "ip6_vti%%d"); + + dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup); + if (dev == NULL) + goto failed; + + dev_net_set(dev, net); + + t = netdev_priv(dev); + t->parms = *p; + t->net = dev_net(dev); + + err = vti6_tnl_create2(dev); + if (err < 0) + goto failed_free; + + return t; + +failed_free: + vti6_dev_free(dev); +failed: + return NULL; +} + +/** + * vti6_locate - find or create tunnel matching given parameters + * @net: network namespace + * @p: tunnel parameters + * @create: != 0 if allowed to create new tunnel if no match found + * + * Description: + * vti6_locate() first tries to locate an existing tunnel + * based on @parms. If this is unsuccessful, but @create is set a new + * tunnel device is created and registered for use. + * + * Return: + * matching tunnel or NULL + **/ +static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, + int create) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + struct ip6_tnl __rcu **tp; + struct ip6_tnl *t; + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + for (tp = vti6_tnl_bucket(ip6n, p); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr)) + return t; + } + if (!create) + return NULL; + return vti6_tnl_create(net, p); +} + +/** + * vti6_dev_uninit - tunnel device uninitializer + * @dev: the device to be destroyed + * + * Description: + * vti6_dev_uninit() removes tunnel from its list + **/ +static void vti6_dev_uninit(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + if (dev == ip6n->fb_tnl_dev) + RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); + else + vti6_tnl_unlink(ip6n, t); + ip6_tnl_dst_reset(t); + dev_put(dev); +} + +static int vti6_rcv(struct sk_buff *skb) +{ + struct ip6_tnl *t; + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + rcu_read_lock(); + + if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, + &ipv6h->daddr)) != NULL) { + struct pcpu_tstats *tstats; + + if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { + rcu_read_unlock(); + goto discard; + } + + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + rcu_read_unlock(); + return 0; + } + + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { + t->dev->stats.rx_dropped++; + rcu_read_unlock(); + goto discard; + } + + tstats = this_cpu_ptr(t->dev->tstats); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + + skb->mark = 0; + secpath_reset(skb); + skb->dev = t->dev; + + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + return 1; + +discard: + kfree_skb(skb); + return 0; +} + +/** + * vti6_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ +static inline bool +vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +/** + * vti6_xmit - send a packet + * @skb: the outgoing socket buffer + * @dev: the outgoing tunnel device + **/ +static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + struct dst_entry *dst = NULL, *ndst = NULL; + struct flowi6 fl6; + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct net_device *tdev; + int err = -1; + + if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || + !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + return err; + + dst = ip6_tnl_dst_check(t); + if (!dst) { + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + + ndst = ip6_route_output(net, NULL, &fl6); + + if (ndst->error) + goto tx_err_link_failure; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; + goto tx_err_link_failure; + } + dst = ndst; + } + + if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL) + goto tx_err_link_failure; + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + t->parms.name); + goto tx_err_dst_release; + } + + + skb_dst_drop(skb); + skb_dst_set_noref(skb, dst); + + ip6tunnel_xmit(skb, dev); + if (ndst) { + dev->mtu = dst_mtu(ndst); + ip6_tnl_dst_store(t, ndst); + } + + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(ndst); + return err; +} + +static netdev_tx_t +vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + switch (skb->protocol) { + case htons(ETH_P_IPV6): + ret = vti6_xmit(skb, dev); + break; + default: + goto tx_err; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void vti6_link_config(struct ip6_tnl *t) +{ + struct dst_entry *dst; + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowi6_mark = be32_to_cpu(p->i_key); + fl6->flowi6_proto = p->proto; + fl6->flowlabel = 0; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | + IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + dev->iflink = p->link; + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + + dst = ip6_route_output(dev_net(dev), NULL, fl6); + if (dst->error) + return; + + dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6), + NULL, 0); + if (IS_ERR(dst)) + return; + + if (dst->dev) { + dev->hard_header_len = dst->dev->hard_header_len; + + dev->mtu = dst_mtu(dst); + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + dst_release(dst); + } +} + +/** + * vti6_tnl_change - update the tunnel parameters + * @t: tunnel to be changed + * @p: tunnel configuration parameters + * + * Description: + * vti6_tnl_change() updates the tunnel parameters + **/ +static int +vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.link = p->link; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + t->parms.proto = p->proto; + ip6_tnl_dst_reset(t); + vti6_link_config(t); + return 0; +} + +static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + struct net *net = dev_net(t->dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + int err; + + vti6_tnl_unlink(ip6n, t); + synchronize_net(); + err = vti6_tnl_change(t, p); + vti6_tnl_link(ip6n, t); + netdev_state_change(t->dev); + return err; +} + +static void +vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->link = u->link; + p->i_key = u->i_key; + p->o_key = u->o_key; + p->proto = u->proto; + + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) +{ + u->laddr = p->laddr; + u->raddr = p->raddr; + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; + u->proto = p->proto; + + memcpy(u->name, p->name, sizeof(u->name)); +} + +/** + * vti6_tnl_ioctl - configure vti6 tunnels from userspace + * @dev: virtual device associated with tunnel + * @ifr: parameters passed from userspace + * @cmd: command to be performed + * + * Description: + * vti6_ioctl() is used for managing vti6 tunnels + * from userspace. + * + * The possible commands are the following: + * %SIOCGETTUNNEL: get tunnel parameters for device + * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters + * %SIOCCHGTUNNEL: change tunnel parameters to those given + * %SIOCDELTUNNEL: delete tunnel + * + * The fallback device "ip6_vti0", created during module + * initialization, can be used for creating other tunnel devices. + * + * Return: + * 0 on success, + * %-EFAULT if unable to copy data to or from userspace, + * %-EPERM if current process hasn't %CAP_NET_ADMIN set + * %-EINVAL if passed tunnel parameters are invalid, + * %-EEXIST if changing a tunnel's parameters would cause a conflict + * %-ENODEV if attempting to change or delete a nonexisting device + **/ +static int +vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm2 p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t = NULL; + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + switch (cmd) { + case SIOCGETTUNNEL: + if (dev == ip6n->fb_tnl_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + vti6_parm_from_user(&p1, &p); + t = vti6_locate(net, &p1, 0); + } else { + memset(&p, 0, sizeof(p)); + } + if (t == NULL) + t = netdev_priv(dev); + vti6_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + break; + err = -EINVAL; + if (p.proto != IPPROTO_IPV6 && p.proto != 0) + break; + vti6_parm_from_user(&p1, &p); + t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); + if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else + t = netdev_priv(dev); + + err = vti6_update(t, &p1); + } + if (t) { + err = 0; + vti6_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + + if (dev == ip6n->fb_tnl_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + break; + err = -ENOENT; + vti6_parm_from_user(&p1, &p); + t = vti6_locate(net, &p1, 0); + if (t == NULL) + break; + err = -EPERM; + if (t->dev == ip6n->fb_tnl_dev) + break; + dev = t->dev; + } + err = 0; + unregister_netdevice(dev); + break; + default: + err = -EINVAL; + } + return err; +} + +/** + * vti6_tnl_change_mtu - change mtu manually for tunnel device + * @dev: virtual device associated with tunnel + * @new_mtu: the new mtu + * + * Return: + * 0 on success, + * %-EINVAL if mtu too small + **/ +static int vti6_change_mtu(struct net_device *dev, int new_mtu) +{ + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; + + dev->mtu = new_mtu; + return 0; +} + +static const struct net_device_ops vti6_netdev_ops = { + .ndo_uninit = vti6_dev_uninit, + .ndo_start_xmit = vti6_tnl_xmit, + .ndo_do_ioctl = vti6_ioctl, + .ndo_change_mtu = vti6_change_mtu, + .ndo_get_stats = vti6_get_stats, +}; + +/** + * vti6_dev_setup - setup virtual tunnel device + * @dev: virtual device associated with tunnel + * + * Description: + * Initialize function pointers and device parameters + **/ +static void vti6_dev_setup(struct net_device *dev) +{ + struct ip6_tnl *t; + + dev->netdev_ops = &vti6_netdev_ops; + dev->destructor = vti6_dev_free; + + dev->type = ARPHRD_TUNNEL6; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); + dev->mtu = ETH_DATA_LEN; + t = netdev_priv(dev); + dev->flags |= IFF_NOARP; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} + +/** + * vti6_dev_init_gen - general initializer for all tunnel devices + * @dev: virtual device associated with tunnel + **/ +static inline int vti6_dev_init_gen(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + t->dev = dev; + t->net = dev_net(dev); + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + return 0; +} + +/** + * vti6_dev_init - initializer for all non fallback tunnel devices + * @dev: virtual device associated with tunnel + **/ +static int vti6_dev_init(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int err = vti6_dev_init_gen(dev); + + if (err) + return err; + vti6_link_config(t); + return 0; +} + +/** + * vti6_fb_tnl_dev_init - initializer for fallback tunnel device + * @dev: fallback device + * + * Return: 0 + **/ +static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + int err = vti6_dev_init_gen(dev); + + if (err) + return err; + + t->parms.proto = IPPROTO_IPV6; + dev_hold(dev); + + vti6_link_config(t); + + rcu_assign_pointer(ip6n->tnls_wc[0], t); + return 0; +} + +static int vti6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + return 0; +} + +static void vti6_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_VTI_LINK]) + parms->link = nla_get_u32(data[IFLA_VTI_LINK]); + + if (data[IFLA_VTI_LOCAL]) + nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL], + sizeof(struct in6_addr)); + + if (data[IFLA_VTI_REMOTE]) + nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE], + sizeof(struct in6_addr)); + + if (data[IFLA_VTI_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); + + if (data[IFLA_VTI_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); +} + +static int vti6_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *nt; + + nt = netdev_priv(dev); + vti6_netlink_parms(data, &nt->parms); + + nt->parms.proto = IPPROTO_IPV6; + + if (vti6_locate(net, &nt->parms, 0)) + return -EEXIST; + + return vti6_tnl_create2(dev); +} + +static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip6_tnl *t; + struct __ip6_tnl_parm p; + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + if (dev == ip6n->fb_tnl_dev) + return -EINVAL; + + vti6_netlink_parms(data, &p); + + t = vti6_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else + t = netdev_priv(dev); + + return vti6_update(t, &p); +} + +static size_t vti6_get_size(const struct net_device *dev) +{ + return + /* IFLA_VTI_LINK */ + nla_total_size(4) + + /* IFLA_VTI_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_VTI_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_VTI_IKEY */ + nla_total_size(4) + + /* IFLA_VTI_OKEY */ + nla_total_size(4) + + 0; +} + +static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || + nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr), + &parm->laddr) || + nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr), + &parm->raddr) || + nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || + nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = { + [IFLA_VTI_LINK] = { .type = NLA_U32 }, + [IFLA_VTI_LOCAL] = { .len = sizeof(struct in6_addr) }, + [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) }, + [IFLA_VTI_IKEY] = { .type = NLA_U32 }, + [IFLA_VTI_OKEY] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops vti6_link_ops __read_mostly = { + .kind = "vti6", + .maxtype = IFLA_VTI_MAX, + .policy = vti6_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = vti6_dev_setup, + .validate = vti6_validate, + .newlink = vti6_newlink, + .changelink = vti6_changelink, + .get_size = vti6_get_size, + .fill_info = vti6_fill_info, +}; + +static struct xfrm_tunnel_notifier vti6_handler __read_mostly = { + .handler = vti6_rcv, + .priority = 1, +}; + +static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) +{ + int h; + struct ip6_tnl *t; + LIST_HEAD(list); + + for (h = 0; h < HASH_SIZE; h++) { + t = rtnl_dereference(ip6n->tnls_r_l[h]); + while (t != NULL) { + unregister_netdevice_queue(t->dev, &list); + t = rtnl_dereference(t->next); + } + } + + t = rtnl_dereference(ip6n->tnls_wc[0]); + unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_many(&list); +} + +static int __net_init vti6_init_net(struct net *net) +{ + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct ip6_tnl *t = NULL; + int err; + + ip6n->tnls[0] = ip6n->tnls_wc; + ip6n->tnls[1] = ip6n->tnls_r_l; + + err = -ENOMEM; + ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0", + vti6_dev_setup); + + if (!ip6n->fb_tnl_dev) + goto err_alloc_dev; + dev_net_set(ip6n->fb_tnl_dev, net); + + err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + if (err < 0) + goto err_register; + + err = register_netdev(ip6n->fb_tnl_dev); + if (err < 0) + goto err_register; + + t = netdev_priv(ip6n->fb_tnl_dev); + + strcpy(t->parms.name, ip6n->fb_tnl_dev->name); + return 0; + +err_register: + vti6_dev_free(ip6n->fb_tnl_dev); +err_alloc_dev: + return err; +} + +static void __net_exit vti6_exit_net(struct net *net) +{ + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + rtnl_lock(); + vti6_destroy_tunnels(ip6n); + rtnl_unlock(); +} + +static struct pernet_operations vti6_net_ops = { + .init = vti6_init_net, + .exit = vti6_exit_net, + .id = &vti6_net_id, + .size = sizeof(struct vti6_net), +}; + +/** + * vti6_tunnel_init - register protocol and reserve needed resources + * + * Return: 0 on success + **/ +static int __init vti6_tunnel_init(void) +{ + int err; + + err = register_pernet_device(&vti6_net_ops); + if (err < 0) + goto out_pernet; + + err = xfrm6_mode_tunnel_input_register(&vti6_handler); + if (err < 0) { + pr_err("%s: can't register vti6\n", __func__); + goto out; + } + err = rtnl_link_register(&vti6_link_ops); + if (err < 0) + goto rtnl_link_failed; + + return 0; + +rtnl_link_failed: + xfrm6_mode_tunnel_input_deregister(&vti6_handler); +out: + unregister_pernet_device(&vti6_net_ops); +out_pernet: + return err; +} + +/** + * vti6_tunnel_cleanup - free resources and unregister protocol + **/ +static void __exit vti6_tunnel_cleanup(void) +{ + rtnl_link_unregister(&vti6_link_ops); + if (xfrm6_mode_tunnel_input_deregister(&vti6_handler)) + pr_info("%s: can't deregister vti6\n", __func__); + + unregister_pernet_device(&vti6_net_ops); +} + +module_init(vti6_tunnel_init); +module_exit(vti6_tunnel_cleanup); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("vti6"); +MODULE_ALIAS_NETDEV("ip6_vti0"); +MODULE_AUTHOR("Steffen Klassert"); +MODULE_DESCRIPTION("IPv6 virtual tunnel interface"); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5636a912074a..ce507d9e1c90 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,8 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 44400c216dc6..710238f58aa9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 56eef30ee5f6..da00a2ecde55 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); /* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb) +static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct tcphdr otcph, *tcph; @@ -88,8 +88,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) } /* Check checksum. */ - if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, - skb_checksum(oldskb, tcphoff, otcplen, 0))) { + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); return; } @@ -227,7 +226,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb); + send_reset(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index dffdc1a389c5..4a258263d8ec 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -144,12 +144,24 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } +static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) +{ + u32 c; + + net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); + c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, nf_frags.rnd); + return c & (INETFRAGS_HASHSZ - 1); +} + + static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct frag_queue *nq; nq = container_of(q, struct frag_queue, q); - return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); + return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } static void nf_skb_free(struct sk_buff *skb) @@ -185,7 +197,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.ecn = ecn; read_lock_bh(&nf_frags.lock); - hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); + hash = nf_hash_frag(id, src, dst); q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1aeb473b2cc6..cc85a9ba5010 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -82,24 +82,24 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr, u32 rnd) +static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) { u32 c; + net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, rnd); + (__force u32)id, ip6_frags.rnd); return c & (INETFRAGS_HASHSZ - 1); } -EXPORT_SYMBOL_GPL(inet6_hash_frag); static unsigned int ip6_hashfn(struct inet_frag_queue *q) { struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); - return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); } bool ip6_frag_match(struct inet_frag_queue *q, void *a) @@ -193,7 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.ecn = ecn; read_lock(&ip6_frags.lock); - hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); + hash = inet6_hash_frag(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (IS_ERR_OR_NULL(q)) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3130ffc3bca..fd399ac6c1f7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -476,6 +476,24 @@ out: } #ifdef CONFIG_IPV6_ROUTER_PREF +struct __rt6_probe_work { + struct work_struct work; + struct in6_addr target; + struct net_device *dev; +}; + +static void rt6_probe_deferred(struct work_struct *w) +{ + struct in6_addr mcaddr; + struct __rt6_probe_work *work = + container_of(w, struct __rt6_probe_work, work); + + addrconf_addr_solict_mult(&work->target, &mcaddr); + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); + dev_put(work->dev); + kfree(w); +} + static void rt6_probe(struct rt6_info *rt) { struct neighbour *neigh; @@ -499,17 +517,23 @@ static void rt6_probe(struct rt6_info *rt) if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { - struct in6_addr mcaddr; - struct in6_addr *target; + struct __rt6_probe_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (neigh) { + if (neigh && work) neigh->updated = jiffies; + + if (neigh) write_unlock(&neigh->lock); - } - target = (struct in6_addr *)&rt->rt6i_gateway; - addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); + if (work) { + INIT_WORK(&work->work, rt6_probe_deferred); + work->target = rt->rt6i_gateway; + dev_hold(rt->dst.dev); + work->dev = rt->dst.dev; + schedule_work(&work->work); + } } else { out: write_unlock(&neigh->lock); @@ -595,7 +619,7 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, goto out; m = rt6_score_route(rt, oif, strict); - if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + if (m == RT6_NUD_FAIL_SOFT) { match_do_rr = true; m = 0; /* lowest valid score */ } else if (m < 0) { @@ -851,7 +875,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; - rt->rt6i_gateway = *daddr; } rt->rt6i_flags |= RTF_CACHE; @@ -1064,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) - return dst; + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; - return NULL; + if (rt6_check_expired(rt)) + return NULL; + + return dst; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1335,6 +1361,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; atomic_set(&rt->dst.__refcnt, 1); + rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; @@ -1870,7 +1897,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; + if (ort->rt6i_flags & RTF_GATEWAY) + rt->rt6i_gateway = ort->rt6i_gateway; + else + rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) @@ -2157,6 +2187,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, else rt->rt6i_flags |= RTF_LOCAL; + rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 19269453a8ea..3a9038dd818d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -933,10 +933,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); + if (IS_ERR(skb)) + goto out; err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); @@ -946,8 +945,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev->stats.tx_errors++; dev_kfree_skb(skb); +out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -956,13 +956,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto out; ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; +out: + dev->stats.tx_errors++; + return NETDEV_TX_OK; } static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, @@ -1292,6 +1294,12 @@ static void ipip6_dev_free(struct net_device *dev) free_netdev(dev); } +#define SIT_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) + static void ipip6_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip6_netdev_ops; @@ -1305,6 +1313,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; + dev->features |= SIT_FEATURES; + dev->hw_features |= SIT_FEATURES; } static int ipip6_tunnel_init(struct net_device *dev) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d04d3f1dd9b7..535a3ad262f1 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,6 +24,8 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) +static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS]; + /* RFC 2460, Section 8.3: * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] * @@ -61,14 +63,18 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, u32 count, int c) { - __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch); + __u32 *tmp; + + net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); + + tmp = __get_cpu_var(ipv6_cookie_scratch); /* * we have 320 bits of information to hash, copy in the remaining - * 192 bits required for sha_transform, from the syncookie_secret + * 192 bits required for sha_transform, from the syncookie6_secret * and overwrite the digest with the secret */ - memcpy(tmp + 10, syncookie_secret[c], 44); + memcpy(tmp + 10, syncookie6_secret[c], 44); memcpy(tmp, saddr, 16); memcpy(tmp + 4, daddr, 16); tmp[8] = ((__force u32)sport << 16) + (__force u32)dport; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b996ee2005a9..0740f93a114a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1929,6 +1929,7 @@ struct proto tcpv6_prot = { .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, + .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 2ec6bf6a0aa0..c1097c798900 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -83,7 +83,7 @@ static int tcp6_gro_complete(struct sk_buff *skb) static const struct net_offload tcpv6_offload = { .callbacks = { .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, + .gso_segment = tcp_gso_segment, .gro_receive = tcp6_gro_receive, .gro_complete = tcp6_gro_complete, }, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b496de19a341..f3893e897f72 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,6 +53,29 @@ #include <trace/events/skb.h> #include "udp_impl.h" +static unsigned int udp6_ehashfn(struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) +{ + static u32 udp6_ehash_secret __read_mostly; + static u32 udp_ipv6_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&udp6_ehash_secret, + sizeof(udp6_ehash_secret)); + net_get_random_once(&udp_ipv6_hash_secret, + sizeof(udp_ipv6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); + + return __inet6_ehashfn(lhash, lport, fhash, fport, + udp_ipv6_hash_secret + net_hash_mix(net)); +} + int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); @@ -214,8 +237,8 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } else if (score == SCORE2_MAX) goto exact_match; @@ -295,8 +318,8 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -1220,9 +1243,6 @@ do_udp_sendmsg: if (tclass < 0) tclass = np->tclass; - if (dontfrag < 0) - dontfrag = np->dontfrag; - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -1241,6 +1261,8 @@ back_from_confirm: up->pending = AF_INET6; do_append_data: + if (dontfrag < 0) + dontfrag = np->dontfrag; up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 4691ed50a928..c779c3c90b9d 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -7,33 +7,32 @@ #include <net/inet_common.h> #include <net/transp_v6.h> -extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); -extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - u8 , u8 , int , __be32 , struct udp_table *); +int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); +void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, + __be32, struct udp_table *); -extern int udp_v6_get_port(struct sock *sk, unsigned short snum); +int udp_v6_get_port(struct sock *sk, unsigned short snum); -extern int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -extern int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +int udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); #ifdef CONFIG_COMPAT -extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); +int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); +int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); #endif -extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len); -extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len); -extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern void udpv6_destroy_sock(struct sock *sk); +int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len); +int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len); +int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +void udpv6_destroy_sock(struct sock *sk); -extern void udp_v6_clear_sk(struct sock *sk, int size); +void udp_v6_clear_sk(struct sock *sk, int size); #ifdef CONFIG_PROC_FS -extern int udp6_seq_show(struct seq_file *seq, void *v); +int udp6_seq_show(struct seq_file *seq, void *v); #endif #endif /* _UDP6_IMPL_H */ diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 60559511bd9c..08e23b0bf302 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -64,6 +64,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4770d515c2c8..cb04f7a16b5e 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -18,6 +18,65 @@ #include <net/ipv6.h> #include <net/xfrm.h> +/* Informational hook. The decap is still done here. */ +static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; +static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex); + +int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) +{ + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; + int ret = -EEXIST; + int priority = handler->priority; + + mutex_lock(&xfrm6_mode_tunnel_input_mutex); + + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t->priority > priority) + break; + if (t->priority == priority) + goto err; + + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm6_mode_tunnel_input_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register); + +int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) +{ + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; + int ret = -ENOENT; + + mutex_lock(&xfrm6_mode_tunnel_input_mutex); + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + mutex_unlock(&xfrm6_mode_tunnel_input_mutex); + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister); + static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) { const struct ipv6hdr *outer_iph = ipv6_hdr(skb); @@ -63,8 +122,15 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#define for_each_input_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) + + static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) @@ -72,6 +138,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; + for_each_input_rcu(rcv_notify_handlers, handler) + handler->handler(skb); + err = skb_unclone(skb, GFP_ATOMIC); if (err) goto out; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 23ed03d786c8..5f8e128c512d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -135,9 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; @@ -284,7 +289,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index 564eb0b8afa3..8d65bb9477fc 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -509,16 +509,11 @@ typedef struct irnet_ctrl_channel */ /* -------------------------- IRDA PART -------------------------- */ -extern int - irda_irnet_create(irnet_socket *); /* Initialise a IrNET socket */ -extern int - irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */ -extern void - irda_irnet_destroy(irnet_socket *); /* Teardown a IrNET socket */ -extern int - irda_irnet_init(void); /* Initialise IrDA part of IrNET */ -extern void - irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ +int irda_irnet_create(irnet_socket *); /* Initialise an IrNET socket */ +int irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */ +void irda_irnet_destroy(irnet_socket *); /* Teardown an IrNET socket */ +int irda_irnet_init(void); /* Initialise IrDA part of IrNET */ +void irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ /**************************** VARIABLES ****************************/ diff --git a/net/key/af_key.c b/net/key/af_key.c index 9d585370c5b4..911ef03bf8fb 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1098,7 +1098,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->id.proto = proto; x->id.spi = sa->sadb_sa_spi; - x->props.replay_window = sa->sadb_sa_replay; + x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay, + (sizeof(x->replay.bitmap) * 8)); if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN) x->props.flags |= XFRM_STATE_NOECN; if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 6f251cbc2ed7..1ee9f6965d68 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -238,29 +238,40 @@ out: return tunnel; } -extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); -extern void l2tp_tunnel_sock_put(struct sock *sk); -extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); -extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); -extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); -extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); -extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); - -extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); -extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); -extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); -extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); -extern void __l2tp_session_unhash(struct l2tp_session *session); -extern int l2tp_session_delete(struct l2tp_session *session); -extern void l2tp_session_free(struct l2tp_session *session); -extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); -extern int l2tp_session_queue_purge(struct l2tp_session *session); -extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); - -extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); - -extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); -extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); +struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); +void l2tp_tunnel_sock_put(struct sock *sk); +struct l2tp_session *l2tp_session_find(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id); +struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); +struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); +struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); +struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); + +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, + u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, + struct l2tp_tunnel **tunnelp); +void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); +int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); +struct l2tp_session *l2tp_session_create(int priv_size, + struct l2tp_tunnel *tunnel, + u32 session_id, u32 peer_session_id, + struct l2tp_session_cfg *cfg); +void __l2tp_session_unhash(struct l2tp_session *session); +int l2tp_session_delete(struct l2tp_session *session); +void l2tp_session_free(struct l2tp_session *session); +void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, + unsigned char *ptr, unsigned char *optr, u16 hdrflags, + int length, int (*payload_hook)(struct sk_buff *skb)); +int l2tp_session_queue_purge(struct l2tp_session *session); +int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); + +int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, + int hdr_len); + +int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, + const struct l2tp_nl_cmd_ops *ops); +void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); /* Session reference counts. Incremented when code obtains a reference * to a session. diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f0a7adaef2ea..ffda81ef1a70 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh goto error_put_sess_tun; } + local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); + local_bh_enable(); sock_put(ps->tunnel_sock); sock_put(sk); @@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb->data[0] = ppph[0]; skb->data[1] = ppph[1]; + local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); + local_bh_enable(); sock_put(sk_tun); sock_put(sk); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2e7855a1b10d..b0a651cc389f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2865,30 +2865,43 @@ void ieee80211_csa_finalize_work(struct work_struct *work) if (!ieee80211_sdata_running(sdata)) return; - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) - return; - sdata->radar_required = sdata->csa_radar_required; err = ieee80211_vif_change_channel(sdata, &local->csa_chandef, &changed); if (WARN_ON(err < 0)) return; - err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon); - if (err < 0) - return; + if (!local->use_chanctx) { + local->_oper_chandef = local->csa_chandef; + ieee80211_hw_config(local, 0); + } - changed |= err; - kfree(sdata->u.ap.next_beacon); - sdata->u.ap.next_beacon = NULL; + ieee80211_bss_info_change_notify(sdata, changed); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon); + if (err < 0) + return; + changed |= err; + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; + + ieee80211_bss_info_change_notify(sdata, err); + break; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_finish_csa(sdata); + break; + default: + WARN_ON(1); + return; + } sdata->vif.csa_active = false; ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - ieee80211_bss_info_change_notify(sdata, changed); - cfg80211_ch_switch_notify(sdata->dev, &local->csa_chandef); } @@ -2936,20 +2949,56 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.csa_active) return -EBUSY; - /* only handle AP for now. */ switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + sdata->csa_counter_offset_beacon = + params->counter_offset_beacon; + sdata->csa_counter_offset_presp = params->counter_offset_presp; + sdata->u.ap.next_beacon = + cfg80211_beacon_dup(¶ms->beacon_after); + if (!sdata->u.ap.next_beacon) + return -ENOMEM; + + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa); + if (err < 0) { + kfree(sdata->u.ap.next_beacon); + return err; + } + break; + case NL80211_IFTYPE_ADHOC: + if (!sdata->vif.bss_conf.ibss_joined) + return -EINVAL; + + if (params->chandef.width != sdata->u.ibss.chandef.width) + return -EINVAL; + + switch (params->chandef.width) { + case NL80211_CHAN_WIDTH_40: + if (cfg80211_get_chandef_type(¶ms->chandef) != + cfg80211_get_chandef_type(&sdata->u.ibss.chandef)) + return -EINVAL; + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + break; + default: + return -EINVAL; + } + + /* changes into another band are not supported */ + if (sdata->u.ibss.chandef.chan->band != + params->chandef.chan->band) + return -EINVAL; + + err = ieee80211_ibss_csa_beacon(sdata, params); + if (err < 0) + return err; break; default: return -EOPNOTSUPP; } - sdata->u.ap.next_beacon = cfg80211_beacon_dup(¶ms->beacon_after); - if (!sdata->u.ap.next_beacon) - return -ENOMEM; - - sdata->csa_counter_offset_beacon = params->counter_offset_beacon; - sdata->csa_counter_offset_presp = params->counter_offset_presp; sdata->csa_radar_required = params->radar_required; if (params->block_tx) @@ -2957,10 +3006,6 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa); - if (err < 0) - return err; - local->csa_chandef = params->chandef; sdata->vif.csa_active = true; @@ -3014,7 +3059,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, need_offchan = true; if (!ieee80211_is_action(mgmt->frame_control) || mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || - mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED) + mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED || + mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) break; rcu_read_lock(); sta = sta_info_get(sdata, mgmt->da); @@ -3518,7 +3564,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } band = chanctx_conf->def.chan->band; - sta = sta_info_get(sdata, peer); + sta = sta_info_get_bss(sdata, peer); if (sta) { qos = test_sta_flag(sta, WLAN_STA_WME); } else { diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 3a4764b2869e..03ba6b5c5373 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -453,11 +453,6 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL; drv_change_chanctx(local, ctx, chanctx_changed); - if (!local->use_chanctx) { - local->_oper_chandef = *chandef; - ieee80211_hw_config(local, 0); - } - ieee80211_recalc_chanctx_chantype(local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index b0e32d628114..5c090e41d9bb 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -103,54 +103,57 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, if (!buf) return 0; - sf += snprintf(buf, mxln - sf, "0x%x\n", local->hw.flags); + sf += scnprintf(buf, mxln - sf, "0x%x\n", local->hw.flags); if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) - sf += snprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n"); + sf += scnprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n"); if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) - sf += snprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n"); + sf += scnprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n"); if (local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) - sf += snprintf(buf + sf, mxln - sf, - "HOST_BCAST_PS_BUFFERING\n"); + sf += scnprintf(buf + sf, mxln - sf, + "HOST_BCAST_PS_BUFFERING\n"); if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE) - sf += snprintf(buf + sf, mxln - sf, - "2GHZ_SHORT_SLOT_INCAPABLE\n"); + sf += scnprintf(buf + sf, mxln - sf, + "2GHZ_SHORT_SLOT_INCAPABLE\n"); if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE) - sf += snprintf(buf + sf, mxln - sf, - "2GHZ_SHORT_PREAMBLE_INCAPABLE\n"); + sf += scnprintf(buf + sf, mxln - sf, + "2GHZ_SHORT_PREAMBLE_INCAPABLE\n"); if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) - sf += snprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n"); + sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n"); if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) - sf += snprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n"); + sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n"); if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC) - sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_BEFORE_ASSOC\n"); + sf += scnprintf(buf + sf, mxln - sf, + "NEED_DTIM_BEFORE_ASSOC\n"); if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT) - sf += snprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n"); + sf += scnprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n"); if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) - sf += snprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n"); + sf += scnprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n"); if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS) - sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n"); + sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n"); if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) - sf += snprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n"); + sf += scnprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n"); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) - sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n"); + sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n"); if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE) - sf += snprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n"); + sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n"); if (local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS) - sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_STATIC_SMPS\n"); + sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_STATIC_SMPS\n"); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) - sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_SMPS\n"); + sf += scnprintf(buf + sf, mxln - sf, + "SUPPORTS_DYNAMIC_SMPS\n"); if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) - sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n"); + sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n"); if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - sf += snprintf(buf + sf, mxln - sf, "REPORTS_TX_ACK_STATUS\n"); + sf += scnprintf(buf + sf, mxln - sf, + "REPORTS_TX_ACK_STATUS\n"); if (local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) - sf += snprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n"); + sf += scnprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n"); if (local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK) - sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n"); + sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n"); if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) - sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n"); + sf += scnprintf(buf + sf, mxln - sf, "AP_LINK_PS\n"); if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW) - sf += snprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n"); + sf += scnprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n"); rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index b3ea11f3d526..5d03c47c0a4c 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1085,4 +1085,31 @@ drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata, } } +static inline int drv_join_ibss(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + int ret = 0; + + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf); + if (local->ops->join_ibss) + ret = local->ops->join_ibss(&local->hw, &sdata->vif); + trace_drv_return_int(local, ret); + return ret; +} + +static inline void drv_leave_ibss(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_leave_ibss(local, sdata); + if (local->ops->leave_ibss) + local->ops->leave_ibss(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a12afe77bb26..21a0b8835cb3 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -39,7 +39,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, const int beacon_int, const u32 basic_rates, const u16 capability, u64 tsf, struct cfg80211_chan_def *chandef, - bool *have_higher_than_11mbit) + bool *have_higher_than_11mbit, + struct cfg80211_csa_settings *csa_settings) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; @@ -59,6 +60,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, 2 + 8 /* max Supported Rates */ + 3 /* max DS params */ + 4 /* IBSS params */ + + 5 /* Channel Switch Announcement */ + 2 + (IEEE80211_MAX_SUPP_RATES - 8) + 2 + sizeof(struct ieee80211_ht_cap) + 2 + sizeof(struct ieee80211_ht_operation) + @@ -135,6 +137,16 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = 0; *pos++ = 0; + if (csa_settings) { + *pos++ = WLAN_EID_CHANNEL_SWITCH; + *pos++ = 3; + *pos++ = csa_settings->block_tx ? 1 : 0; + *pos++ = ieee80211_frequency_to_channel( + csa_settings->chandef.chan->center_freq); + sdata->csa_counter_offset_beacon = (pos - presp->head); + *pos++ = csa_settings->count; + } + /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */ if (rates_n > 8) { *pos++ = WLAN_EID_EXT_SUPP_RATES; @@ -217,6 +229,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; enum nl80211_bss_scan_width scan_width; bool have_higher_than_11mbit; + int err; sdata_assert_lock(sdata); @@ -235,6 +248,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS | BSS_CHANGED_BEACON_ENABLED); + drv_leave_ibss(local, sdata); } presp = rcu_dereference_protected(ifibss->presp, @@ -276,7 +290,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates, capability, tsf, &chandef, - &have_higher_than_11mbit); + &have_higher_than_11mbit, NULL); if (!presp) return; @@ -317,11 +331,26 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; + ieee80211_set_wmm_default(sdata, true); + sdata->vif.bss_conf.ibss_joined = true; sdata->vif.bss_conf.ibss_creator = creator; - ieee80211_bss_info_change_notify(sdata, bss_change); - ieee80211_set_wmm_default(sdata, true); + err = drv_join_ibss(local, sdata); + if (err) { + sdata->vif.bss_conf.ibss_joined = false; + sdata->vif.bss_conf.ibss_creator = false; + sdata->vif.bss_conf.enable_beacon = false; + sdata->vif.bss_conf.ssid_len = 0; + RCU_INIT_POINTER(ifibss->presp, NULL); + kfree_rcu(presp, rcu_head); + ieee80211_vif_release_channel(sdata); + sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n", + err); + return; + } + + ieee80211_bss_info_change_notify(sdata, bss_change); ifibss->state = IEEE80211_IBSS_MLME_JOINED; mod_timer(&ifibss->timer, @@ -416,6 +445,169 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, tsf, false); } +static int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, + struct cfg80211_csa_settings *csa_settings) +{ + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + int freq; + int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) + + sizeof(mgmt->u.action.u.chan_switch); + u8 *pos; + + skb = dev_alloc_skb(local->tx_headroom + hdr_len + + 5 + /* channel switch announcement element */ + 3); /* secondary channel offset element */ + if (!skb) + return -1; + + skb_reserve(skb, local->tx_headroom); + mgmt = (struct ieee80211_mgmt *)skb_put(skb, hdr_len); + memset(mgmt, 0, hdr_len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + eth_broadcast_addr(mgmt->da); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); + mgmt->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; + mgmt->u.action.u.chan_switch.action_code = WLAN_ACTION_SPCT_CHL_SWITCH; + pos = skb_put(skb, 5); + *pos++ = WLAN_EID_CHANNEL_SWITCH; /* EID */ + *pos++ = 3; /* IE length */ + *pos++ = csa_settings->block_tx ? 1 : 0; /* CSA mode */ + freq = csa_settings->chandef.chan->center_freq; + *pos++ = ieee80211_frequency_to_channel(freq); /* channel */ + *pos++ = csa_settings->count; /* count */ + + if (csa_settings->chandef.width == NL80211_CHAN_WIDTH_40) { + enum nl80211_channel_type ch_type; + + skb_put(skb, 3); + *pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; /* EID */ + *pos++ = 1; /* IE length */ + ch_type = cfg80211_get_chandef_type(&csa_settings->chandef); + if (ch_type == NL80211_CHAN_HT40PLUS) + *pos++ = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + else + *pos++ = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + } + + ieee80211_tx_skb(sdata, skb); + return 0; +} + +int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_csa_settings *csa_settings) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct beacon_data *presp, *old_presp; + struct cfg80211_bss *cbss; + const struct cfg80211_bss_ies *ies; + u16 capability; + u64 tsf; + int ret = 0; + + sdata_assert_lock(sdata); + + capability = WLAN_CAPABILITY_IBSS; + + if (ifibss->privacy) + capability |= WLAN_CAPABILITY_PRIVACY; + + cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan, + ifibss->bssid, ifibss->ssid, + ifibss->ssid_len, WLAN_CAPABILITY_IBSS | + WLAN_CAPABILITY_PRIVACY, + capability); + + if (WARN_ON(!cbss)) { + ret = -EINVAL; + goto out; + } + + rcu_read_lock(); + ies = rcu_dereference(cbss->ies); + tsf = ies->tsf; + rcu_read_unlock(); + cfg80211_put_bss(sdata->local->hw.wiphy, cbss); + + old_presp = rcu_dereference_protected(ifibss->presp, + lockdep_is_held(&sdata->wdev.mtx)); + + presp = ieee80211_ibss_build_presp(sdata, + sdata->vif.bss_conf.beacon_int, + sdata->vif.bss_conf.basic_rates, + capability, tsf, &ifibss->chandef, + NULL, csa_settings); + if (!presp) { + ret = -ENOMEM; + goto out; + } + + rcu_assign_pointer(ifibss->presp, presp); + if (old_presp) + kfree_rcu(old_presp, rcu_head); + + /* it might not send the beacon for a while. send an action frame + * immediately to announce the channel switch. + */ + if (csa_settings) + ieee80211_send_action_csa(sdata, csa_settings); + + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + out: + return ret; +} + +int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct cfg80211_bss *cbss; + int err; + u16 capability; + + sdata_lock(sdata); + /* update cfg80211 bss information with the new channel */ + if (!is_zero_ether_addr(ifibss->bssid)) { + capability = WLAN_CAPABILITY_IBSS; + + if (ifibss->privacy) + capability |= WLAN_CAPABILITY_PRIVACY; + + cbss = cfg80211_get_bss(sdata->local->hw.wiphy, + ifibss->chandef.chan, + ifibss->bssid, ifibss->ssid, + ifibss->ssid_len, WLAN_CAPABILITY_IBSS | + WLAN_CAPABILITY_PRIVACY, + capability); + /* XXX: should not really modify cfg80211 data */ + if (cbss) { + cbss->channel = sdata->local->csa_chandef.chan; + cfg80211_put_bss(sdata->local->hw.wiphy, cbss); + } + } + + ifibss->chandef = sdata->local->csa_chandef; + + /* generate the beacon */ + err = ieee80211_ibss_csa_beacon(sdata, NULL); + sdata_unlock(sdata); + if (err < 0) + return err; + + return 0; +} + +void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + cancel_work_sync(&ifibss->csa_connection_drop_work); +} + static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) __acquires(RCU) { @@ -499,6 +691,295 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, return ieee80211_ibss_finish_sta(sta); } +static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + int active = 0; + struct sta_info *sta; + + sdata_assert_lock(sdata); + + rcu_read_lock(); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sta->sdata == sdata && + time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, + jiffies)) { + active++; + break; + } + } + + rcu_read_unlock(); + + return active; +} + +static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + struct cfg80211_bss *cbss; + struct beacon_data *presp; + struct sta_info *sta; + int active_ibss; + u16 capability; + + active_ibss = ieee80211_sta_active_ibss(sdata); + + if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { + capability = WLAN_CAPABILITY_IBSS; + + if (ifibss->privacy) + capability |= WLAN_CAPABILITY_PRIVACY; + + cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, + ifibss->bssid, ifibss->ssid, + ifibss->ssid_len, WLAN_CAPABILITY_IBSS | + WLAN_CAPABILITY_PRIVACY, + capability); + + if (cbss) { + cfg80211_unlink_bss(local->hw.wiphy, cbss); + cfg80211_put_bss(sdata->local->hw.wiphy, cbss); + } + } + + ifibss->state = IEEE80211_IBSS_MLME_SEARCH; + + sta_info_flush(sdata); + + spin_lock_bh(&ifibss->incomplete_lock); + while (!list_empty(&ifibss->incomplete_stations)) { + sta = list_first_entry(&ifibss->incomplete_stations, + struct sta_info, list); + list_del(&sta->list); + spin_unlock_bh(&ifibss->incomplete_lock); + + sta_info_free(local, sta); + spin_lock_bh(&ifibss->incomplete_lock); + } + spin_unlock_bh(&ifibss->incomplete_lock); + + netif_carrier_off(sdata->dev); + + sdata->vif.bss_conf.ibss_joined = false; + sdata->vif.bss_conf.ibss_creator = false; + sdata->vif.bss_conf.enable_beacon = false; + sdata->vif.bss_conf.ssid_len = 0; + + /* remove beacon */ + presp = rcu_dereference_protected(ifibss->presp, + lockdep_is_held(&sdata->wdev.mtx)); + RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); + if (presp) + kfree_rcu(presp, rcu_head); + + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_IBSS); + drv_leave_ibss(local, sdata); + ieee80211_vif_release_channel(sdata); +} + +static void ieee80211_csa_connection_drop_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.ibss.csa_connection_drop_work); + + ieee80211_ibss_disconnect(sdata); + synchronize_rcu(); + skb_queue_purge(&sdata->skb_queue); + + /* trigger a scan to find another IBSS network to join */ + ieee80211_queue_work(&sdata->local->hw, &sdata->work); +} + +static bool +ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, + struct ieee802_11_elems *elems, + bool beacon) +{ + struct cfg80211_csa_settings params; + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_chanctx *chanctx; + enum nl80211_channel_type ch_type; + int err, num_chanctx; + u32 sta_flags; + u8 mode; + + if (sdata->vif.csa_active) + return true; + + if (!sdata->vif.bss_conf.ibss_joined) + return false; + + sta_flags = IEEE80211_STA_DISABLE_VHT; + switch (ifibss->chandef.width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + case NL80211_CHAN_WIDTH_20_NOHT: + sta_flags |= IEEE80211_STA_DISABLE_HT; + /* fall through */ + case NL80211_CHAN_WIDTH_20: + sta_flags |= IEEE80211_STA_DISABLE_40MHZ; + break; + default: + break; + } + + memset(¶ms, 0, sizeof(params)); + err = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, + ifibss->chandef.chan->band, + sta_flags, ifibss->bssid, + ¶ms.count, &mode, + ¶ms.chandef); + + /* can't switch to destination channel, fail */ + if (err < 0) + goto disconnect; + + /* did not contain a CSA */ + if (err) + return false; + + if (ifibss->chandef.chan->band != params.chandef.chan->band) + goto disconnect; + + switch (ifibss->chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + /* keep our current HT mode (HT20/HT40+/HT40-), even if + * another mode has been announced. The mode is not adopted + * within the beacon while doing CSA and we should therefore + * keep the mode which we announce. + */ + ch_type = cfg80211_get_chandef_type(&ifibss->chandef); + cfg80211_chandef_create(¶ms.chandef, params.chandef.chan, + ch_type); + break; + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + if (params.chandef.width != ifibss->chandef.width) { + sdata_info(sdata, + "IBSS %pM received channel switch from incompatible channel width (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", + ifibss->bssid, + params.chandef.chan->center_freq, + params.chandef.width, + params.chandef.center_freq1, + params.chandef.center_freq2); + goto disconnect; + } + break; + default: + /* should not happen, sta_flags should prevent VHT modes. */ + WARN_ON(1); + goto disconnect; + } + + if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, ¶ms.chandef, + IEEE80211_CHAN_DISABLED)) { + sdata_info(sdata, + "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", + ifibss->bssid, + params.chandef.chan->center_freq, + params.chandef.width, + params.chandef.center_freq1, + params.chandef.center_freq2); + goto disconnect; + } + + err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, + ¶ms.chandef); + if (err < 0) + goto disconnect; + if (err) { + params.radar_required = true; + + /* TODO: IBSS-DFS not (yet) supported, disconnect. */ + goto disconnect; + } + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) { + rcu_read_unlock(); + goto disconnect; + } + + /* don't handle for multi-VIF cases */ + chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); + if (chanctx->refcount > 1) { + rcu_read_unlock(); + goto disconnect; + } + num_chanctx = 0; + list_for_each_entry_rcu(chanctx, &sdata->local->chanctx_list, list) + num_chanctx++; + + if (num_chanctx > 1) { + rcu_read_unlock(); + goto disconnect; + } + rcu_read_unlock(); + + /* all checks done, now perform the channel switch. */ + ibss_dbg(sdata, + "received channel switch announcement to go to channel %d MHz\n", + params.chandef.chan->center_freq); + + params.block_tx = !!mode; + + ieee80211_ibss_csa_beacon(sdata, ¶ms); + sdata->csa_radar_required = params.radar_required; + + if (params.block_tx) + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + + sdata->local->csa_chandef = params.chandef; + sdata->vif.csa_active = true; + + ieee80211_bss_info_change_notify(sdata, err); + drv_channel_switch_beacon(sdata, ¶ms.chandef); + + return true; +disconnect: + ibss_dbg(sdata, "Can't handle channel switch, disconnect\n"); + ieee80211_queue_work(&sdata->local->hw, + &ifibss->csa_connection_drop_work); + + return true; +} + +static void +ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status, + struct ieee802_11_elems *elems) +{ + int required_len; + + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + return; + + /* CSA is the only action we handle for now */ + if (mgmt->u.action.u.measurement.action_code != + WLAN_ACTION_SPCT_CHL_SWITCH) + return; + + required_len = IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.chan_switch); + if (len < required_len) + return; + + ieee80211_ibss_process_chanswitch(sdata, elems, false); +} + static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) @@ -661,10 +1142,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* check if we need to merge IBSS */ - /* we use a fixed BSSID */ - if (sdata->u.ibss.fixed_bssid) - goto put_bss; - /* not an IBSS */ if (!(cbss->capability & WLAN_CAPABILITY_IBSS)) goto put_bss; @@ -680,10 +1157,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.ssid_len)) goto put_bss; + /* process channel switch */ + if (ieee80211_ibss_process_chanswitch(sdata, elems, true)) + goto put_bss; + /* same BSSID */ if (ether_addr_equal(cbss->bssid, sdata->u.ibss.bssid)) goto put_bss; + /* we use a fixed BSSID */ + if (sdata->u.ibss.fixed_bssid) + goto put_bss; + if (ieee80211_have_rx_timestamp(rx_status)) { /* time when timestamp field was received */ rx_timestamp = @@ -775,30 +1260,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, ieee80211_queue_work(&local->hw, &sdata->work); } -static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - int active = 0; - struct sta_info *sta; - - sdata_assert_lock(sdata); - - rcu_read_lock(); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sta->sdata == sdata && - time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, - jiffies)) { - active++; - break; - } - } - - rcu_read_unlock(); - - return active; -} - static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -1076,6 +1537,8 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; + struct ieee802_11_elems elems; + int ies_len; rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; @@ -1101,6 +1564,27 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case IEEE80211_STYPE_DEAUTH: ieee80211_rx_mgmt_deauth_ibss(sdata, mgmt, skb->len); break; + case IEEE80211_STYPE_ACTION: + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_SPECTRUM_MGMT: + ies_len = skb->len - + offsetof(struct ieee80211_mgmt, + u.action.u.chan_switch.variable); + + if (ies_len < 0) + break; + + ieee802_11_parse_elems( + mgmt->u.action.u.chan_switch.variable, + ies_len, true, &elems); + + if (elems.parse_error) + break; + + ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len, + rx_status, &elems); + break; + } } mgmt_out: @@ -1167,6 +1651,8 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) (unsigned long) sdata); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); + INIT_WORK(&ifibss->csa_connection_drop_work, + ieee80211_csa_connection_drop_work); } /* scan finished notification */ @@ -1265,73 +1751,19 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - struct ieee80211_local *local = sdata->local; - struct cfg80211_bss *cbss; - u16 capability; - int active_ibss; - struct sta_info *sta; - struct beacon_data *presp; - - active_ibss = ieee80211_sta_active_ibss(sdata); - - if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { - capability = WLAN_CAPABILITY_IBSS; - - if (ifibss->privacy) - capability |= WLAN_CAPABILITY_PRIVACY; - - cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, - ifibss->bssid, ifibss->ssid, - ifibss->ssid_len, WLAN_CAPABILITY_IBSS | - WLAN_CAPABILITY_PRIVACY, - capability); - if (cbss) { - cfg80211_unlink_bss(local->hw.wiphy, cbss); - cfg80211_put_bss(local->hw.wiphy, cbss); - } - } - - ifibss->state = IEEE80211_IBSS_MLME_SEARCH; - memset(ifibss->bssid, 0, ETH_ALEN); + ieee80211_ibss_disconnect(sdata); ifibss->ssid_len = 0; - - sta_info_flush(sdata); - - spin_lock_bh(&ifibss->incomplete_lock); - while (!list_empty(&ifibss->incomplete_stations)) { - sta = list_first_entry(&ifibss->incomplete_stations, - struct sta_info, list); - list_del(&sta->list); - spin_unlock_bh(&ifibss->incomplete_lock); - - sta_info_free(local, sta); - spin_lock_bh(&ifibss->incomplete_lock); - } - spin_unlock_bh(&ifibss->incomplete_lock); - - netif_carrier_off(sdata->dev); + memset(ifibss->bssid, 0, ETH_ALEN); /* remove beacon */ kfree(sdata->u.ibss.ie); - presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&sdata->wdev.mtx)); - RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); /* on the next join, re-program HT parameters */ memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa)); memset(&ifibss->ht_capa_mask, 0, sizeof(ifibss->ht_capa_mask)); - sdata->vif.bss_conf.ibss_joined = false; - sdata->vif.bss_conf.ibss_creator = false; - sdata->vif.bss_conf.enable_beacon = false; - sdata->vif.bss_conf.ssid_len = 0; - clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | - BSS_CHANGED_IBSS); - ieee80211_vif_release_channel(sdata); synchronize_rcu(); - kfree(presp); skb_queue_purge(&sdata->skb_queue); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b6186517ec56..e73cd0637f3b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -322,7 +322,6 @@ struct ieee80211_roc_work { /* flags used in struct ieee80211_if_managed.flags */ enum ieee80211_sta_flags { - IEEE80211_STA_BEACON_POLL = BIT(0), IEEE80211_STA_CONNECTION_POLL = BIT(1), IEEE80211_STA_CONTROL_PORT = BIT(2), IEEE80211_STA_DISABLE_HT = BIT(4), @@ -487,6 +486,7 @@ struct ieee80211_if_managed { struct ieee80211_if_ibss { struct timer_list timer; + struct work_struct csa_connection_drop_work; unsigned long last_scan_completed; @@ -893,6 +893,8 @@ struct tpt_led_trigger { * that the scan completed. * @SCAN_ABORTED: Set for our scan work function when the driver reported * a scan complete for an aborted scan. + * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being + * cancelled. */ enum { SCAN_SW_SCANNING, @@ -900,6 +902,7 @@ enum { SCAN_ONCHANNEL_SCANNING, SCAN_COMPLETED, SCAN_ABORTED, + SCAN_HW_CANCELLED, }; /** @@ -1330,6 +1333,10 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_csa_settings *csa_settings); +int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata); +void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata); /* mesh code */ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata); @@ -1481,6 +1488,29 @@ void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); +/** + * ieee80211_parse_ch_switch_ie - parses channel switch IEs + * @sdata: the sdata of the interface which has received the frame + * @elems: parsed 802.11 elements received with the frame + * @beacon: indicates if the frame was a beacon or probe response + * @current_band: indicates the current band + * @sta_flags: contains information about own capabilities and restrictions + * to decide which channel switch announcements can be accepted. Only the + * following subset of &enum ieee80211_sta_flags are evaluated: + * %IEEE80211_STA_DISABLE_HT, %IEEE80211_STA_DISABLE_VHT, + * %IEEE80211_STA_DISABLE_40MHZ, %IEEE80211_STA_DISABLE_80P80MHZ, + * %IEEE80211_STA_DISABLE_160MHZ. + * @count: to be filled with the counter until the switch (on success only) + * @bssid: the currently connected bssid (for reporting) + * @mode: to be filled with CSA mode (on success only) + * @new_chandef: to be filled with destination chandef (on success only) + * Return: 0 on success, <0 on error and >0 if there is nothing to parse. + */ +int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, + struct ieee802_11_elems *elems, bool beacon, + enum ieee80211_band current_band, + u32 sta_flags, u8 *bssid, u8 *count, u8 *mode, + struct cfg80211_chan_def *new_chandef); /* Suspend/resume and hw reconfiguration */ int ieee80211_reconfig(struct ieee80211_local *local); @@ -1654,6 +1684,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); +u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c); int __must_check ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fcecd633514e..e48f103b9ade 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -766,6 +766,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_mgd_stop(sdata); + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + ieee80211_ibss_stop(sdata); + + /* * Remove all stations associated with this interface. * diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 620677e897bd..3e51dd7d98b3 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -879,7 +879,7 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, keyconf->keylen, keyconf->key, 0, NULL); if (IS_ERR(key)) - return ERR_PTR(PTR_ERR(key)); + return ERR_CAST(key); if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 86e4ad56b573..91cc8281e266 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -145,66 +145,6 @@ static int ecw2cw(int ecw) return (1 << ecw) - 1; } -static u32 chandef_downgrade(struct cfg80211_chan_def *c) -{ - u32 ret; - int tmp; - - switch (c->width) { - case NL80211_CHAN_WIDTH_20: - c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_40: - c->width = NL80211_CHAN_WIDTH_20; - c->center_freq1 = c->chan->center_freq; - ret = IEEE80211_STA_DISABLE_40MHZ | - IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_80: - tmp = (30 + c->chan->center_freq - c->center_freq1)/20; - /* n_P40 */ - tmp /= 2; - /* freq_P40 */ - c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; - c->width = NL80211_CHAN_WIDTH_40; - ret = IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_80P80: - c->center_freq2 = 0; - c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_STA_DISABLE_80P80MHZ | - IEEE80211_STA_DISABLE_160MHZ; - break; - case NL80211_CHAN_WIDTH_160: - /* n_P20 */ - tmp = (70 + c->chan->center_freq - c->center_freq1)/20; - /* n_P80 */ - tmp /= 4; - c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; - c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_STA_DISABLE_80P80MHZ | - IEEE80211_STA_DISABLE_160MHZ; - break; - default: - case NL80211_CHAN_WIDTH_20_NOHT: - WARN_ON_ONCE(1); - c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_5: - case NL80211_CHAN_WIDTH_10: - WARN_ON_ONCE(1); - /* keep c->width */ - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - break; - } - - WARN_ON_ONCE(!cfg80211_chandef_valid(c)); - - return ret; -} - static u32 ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, @@ -352,7 +292,7 @@ out: break; } - ret |= chandef_downgrade(chandef); + ret |= ieee80211_chandef_downgrade(chandef); } if (chandef->width != vht_chandef.width && !tracking) @@ -406,13 +346,13 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, */ if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ && chandef.width == NL80211_CHAN_WIDTH_80P80) - flags |= chandef_downgrade(&chandef); + flags |= ieee80211_chandef_downgrade(&chandef); if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ && chandef.width == NL80211_CHAN_WIDTH_160) - flags |= chandef_downgrade(&chandef); + flags |= ieee80211_chandef_downgrade(&chandef); if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ && chandef.width > NL80211_CHAN_WIDTH_20) - flags |= chandef_downgrade(&chandef); + flags |= ieee80211_chandef_downgrade(&chandef); if (cfg80211_chandef_identical(&chandef, &sdata->vif.bss_conf.chandef)) return 0; @@ -893,8 +833,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; - if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL)) + if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; ieee80211_tx_skb(sdata, skb); @@ -937,6 +876,8 @@ static void ieee80211_chswitch_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + u32 changed = 0; + int ret; if (!ieee80211_sdata_running(sdata)) return; @@ -945,24 +886,39 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - local->_oper_chandef = local->csa_chandef; + ret = ieee80211_vif_change_channel(sdata, &local->csa_chandef, + &changed); + if (ret) { + sdata_info(sdata, + "vif channel switch failed, disconnecting\n"); + ieee80211_queue_work(&sdata->local->hw, + &ifmgd->csa_connection_drop_work); + goto out; + } - if (!local->ops->channel_switch) { - /* call "hw_config" only if doing sw channel switch */ - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } else { - /* update the device channel directly */ - local->hw.conf.chandef = local->_oper_chandef; + if (!local->use_chanctx) { + local->_oper_chandef = local->csa_chandef; + /* Call "hw_config" only if doing sw channel switch. + * Otherwise update the channel directly + */ + if (!local->ops->channel_switch) + ieee80211_hw_config(local, 0); + else + local->hw.conf.chandef = local->_oper_chandef; } /* XXX: shouldn't really modify cfg80211-owned data! */ - ifmgd->associated->channel = local->_oper_chandef.chan; + ifmgd->associated->channel = local->csa_chandef.chan; /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); + + ieee80211_bss_info_change_notify(sdata, changed); + out: + sdata->vif.csa_active = false; ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; sdata_unlock(sdata); } @@ -1000,20 +956,12 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_bss *cbss = ifmgd->associated; - struct ieee80211_bss *bss; struct ieee80211_chanctx *chanctx; - enum ieee80211_band new_band; - int new_freq; - u8 new_chan_no; + enum ieee80211_band current_band; u8 count; u8 mode; - struct ieee80211_channel *new_chan; struct cfg80211_chan_def new_chandef = {}; - struct cfg80211_chan_def new_vht_chandef = {}; - const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; - const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; - const struct ieee80211_ht_operation *ht_oper; - int secondary_channel_offset = -1; + int res; sdata_assert_lock(sdata); @@ -1027,162 +975,23 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) return; - sec_chan_offs = elems->sec_chan_offs; - wide_bw_chansw_ie = elems->wide_bw_chansw_ie; - ht_oper = elems->ht_operation; - - if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | - IEEE80211_STA_DISABLE_40MHZ)) { - sec_chan_offs = NULL; - wide_bw_chansw_ie = NULL; - /* only used for bandwidth here */ - ht_oper = NULL; - } - - if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) - wide_bw_chansw_ie = NULL; - - if (elems->ext_chansw_ie) { - if (!ieee80211_operating_class_to_band( - elems->ext_chansw_ie->new_operating_class, - &new_band)) { - sdata_info(sdata, - "cannot understand ECSA IE operating class %d, disconnecting\n", - elems->ext_chansw_ie->new_operating_class); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - } - new_chan_no = elems->ext_chansw_ie->new_ch_num; - count = elems->ext_chansw_ie->count; - mode = elems->ext_chansw_ie->mode; - } else if (elems->ch_switch_ie) { - new_band = cbss->channel->band; - new_chan_no = elems->ch_switch_ie->new_ch_num; - count = elems->ch_switch_ie->count; - mode = elems->ch_switch_ie->mode; - } else { - /* nothing here we understand */ - return; - } - - bss = (void *)cbss->priv; - - new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); - new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); - if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) { - sdata_info(sdata, - "AP %pM switches to unsupported channel (%d MHz), disconnecting\n", - ifmgd->associated->bssid, new_freq); + current_band = cbss->channel->band; + res = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, current_band, + ifmgd->flags, + ifmgd->associated->bssid, &count, + &mode, &new_chandef); + if (res < 0) ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); + if (res) return; - } - - if (!beacon && sec_chan_offs) { - secondary_channel_offset = sec_chan_offs->sec_chan_offs; - } else if (beacon && ht_oper) { - secondary_channel_offset = - ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; - } else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - /* - * If it's not a beacon, HT is enabled and the IE not present, - * it's 20 MHz, 802.11-2012 8.5.2.6: - * This element [the Secondary Channel Offset Element] is - * present when switching to a 40 MHz channel. It may be - * present when switching to a 20 MHz channel (in which - * case the secondary channel offset is set to SCN). - */ - secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; - } - - switch (secondary_channel_offset) { - default: - /* secondary_channel_offset was present but is invalid */ - case IEEE80211_HT_PARAM_CHA_SEC_NONE: - cfg80211_chandef_create(&new_chandef, new_chan, - NL80211_CHAN_HT20); - break; - case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: - cfg80211_chandef_create(&new_chandef, new_chan, - NL80211_CHAN_HT40PLUS); - break; - case IEEE80211_HT_PARAM_CHA_SEC_BELOW: - cfg80211_chandef_create(&new_chandef, new_chan, - NL80211_CHAN_HT40MINUS); - break; - case -1: - cfg80211_chandef_create(&new_chandef, new_chan, - NL80211_CHAN_NO_HT); - /* keep width for 5/10 MHz channels */ - switch (sdata->vif.bss_conf.chandef.width) { - case NL80211_CHAN_WIDTH_5: - case NL80211_CHAN_WIDTH_10: - new_chandef.width = sdata->vif.bss_conf.chandef.width; - break; - default: - break; - } - break; - } - - if (wide_bw_chansw_ie) { - new_vht_chandef.chan = new_chan; - new_vht_chandef.center_freq1 = - ieee80211_channel_to_frequency( - wide_bw_chansw_ie->new_center_freq_seg0, - new_band); - - switch (wide_bw_chansw_ie->new_channel_width) { - default: - /* hmmm, ignore VHT and use HT if present */ - case IEEE80211_VHT_CHANWIDTH_USE_HT: - new_vht_chandef.chan = NULL; - break; - case IEEE80211_VHT_CHANWIDTH_80MHZ: - new_vht_chandef.width = NL80211_CHAN_WIDTH_80; - break; - case IEEE80211_VHT_CHANWIDTH_160MHZ: - new_vht_chandef.width = NL80211_CHAN_WIDTH_160; - break; - case IEEE80211_VHT_CHANWIDTH_80P80MHZ: - /* field is otherwise reserved */ - new_vht_chandef.center_freq2 = - ieee80211_channel_to_frequency( - wide_bw_chansw_ie->new_center_freq_seg1, - new_band); - new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80; - break; - } - if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ && - new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80) - chandef_downgrade(&new_vht_chandef); - if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ && - new_vht_chandef.width == NL80211_CHAN_WIDTH_160) - chandef_downgrade(&new_vht_chandef); - if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ && - new_vht_chandef.width > NL80211_CHAN_WIDTH_20) - chandef_downgrade(&new_vht_chandef); - } - - /* if VHT data is there validate & use it */ - if (new_vht_chandef.chan) { - if (!cfg80211_chandef_compatible(&new_vht_chandef, - &new_chandef)) { - sdata_info(sdata, - "AP %pM CSA has inconsistent channel data, disconnecting\n", - ifmgd->associated->bssid); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - return; - } - new_chandef = new_vht_chandef; - } if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef, IEEE80211_CHAN_DISABLED)) { sdata_info(sdata, "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", - ifmgd->associated->bssid, new_freq, + ifmgd->associated->bssid, + new_chandef.chan->center_freq, new_chandef.width, new_chandef.center_freq1, new_chandef.center_freq2); ieee80211_queue_work(&local->hw, @@ -1191,17 +1000,28 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + sdata->vif.csa_active = true; + mutex_lock(&local->chanctx_mtx); if (local->use_chanctx) { - sdata_info(sdata, - "not handling channel switch with channel contexts\n"); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - return; + u32 num_chanctx = 0; + list_for_each_entry(chanctx, &local->chanctx_list, list) + num_chanctx++; + + if (num_chanctx > 1 || + !(local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)) { + sdata_info(sdata, + "not handling chan-switch with channel contexts\n"); + ieee80211_queue_work(&local->hw, + &ifmgd->csa_connection_drop_work); + mutex_unlock(&local->chanctx_mtx); + return; + } } - mutex_lock(&local->chanctx_mtx); if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) { + ieee80211_queue_work(&local->hw, + &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); return; } @@ -1374,8 +1194,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) if (!mgd->associated) return false; - if (mgd->flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL)) + if (mgd->flags & IEEE80211_STA_CONNECTION_POLL) return false; if (!mgd->have_beacon) @@ -1691,8 +1510,7 @@ static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) { lockdep_assert_held(&sdata->local->mtx); - sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | - IEEE80211_STA_BEACON_POLL); + sdata->u.mgd.flags &= ~IEEE80211_STA_CONNECTION_POLL; ieee80211_run_deferred_scan(sdata->local); } @@ -1954,11 +1772,8 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; mutex_lock(&local->mtx); - if (!(ifmgd->flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL))) { - mutex_unlock(&local->mtx); - return; - } + if (!(ifmgd->flags & IEEE80211_STA_CONNECTION_POLL)) + goto out; __ieee80211_stop_poll(sdata); @@ -2094,15 +1909,9 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, * because otherwise we would reset the timer every time and * never check whether we received a probe response! */ - if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL)) + if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) already = true; - if (beacon) - ifmgd->flags |= IEEE80211_STA_BEACON_POLL; - else - ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; - mutex_unlock(&sdata->local->mtx); if (already) @@ -2174,6 +1983,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + sdata->vif.csa_active = false; ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); @@ -3061,17 +2871,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } } - if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) { + if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) { mlme_dbg_ratelimited(sdata, "cancelling AP probe due to a received beacon\n"); - mutex_lock(&local->mtx); - ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL; - ieee80211_run_deferred_scan(local); - mutex_unlock(&local->mtx); - - mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, -1); - mutex_unlock(&local->iflist_mtx); + ieee80211_reset_ap_probe(sdata); } /* @@ -3543,8 +3346,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(sdata, ifmgd->assoc_data->timeout); - if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL) && + if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL && ifmgd->associated) { u8 bssid[ETH_ALEN]; int max_tries; @@ -3876,7 +3678,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, return ret; while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { - ifmgd->flags |= chandef_downgrade(&chandef); + ifmgd->flags |= ieee80211_chandef_downgrade(&chandef); ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index acd1f71adc03..0c2a29484c07 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -394,6 +394,8 @@ void ieee80211_sw_roc_work(struct work_struct *work) if (started) ieee80211_start_next_roc(local); + else if (list_empty(&local->roc_list)) + ieee80211_run_deferred_scan(local); } out_unlock: diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5dedc56c94db..505bc0dea074 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -144,8 +144,8 @@ void rate_control_deinitialize(struct ieee80211_local *local); /* Rate control algorithms */ #ifdef CONFIG_MAC80211_RC_PID -extern int rc80211_pid_init(void); -extern void rc80211_pid_exit(void); +int rc80211_pid_init(void); +void rc80211_pid_exit(void); #else static inline int rc80211_pid_init(void) { @@ -157,8 +157,8 @@ static inline void rc80211_pid_exit(void) #endif #ifdef CONFIG_MAC80211_RC_MINSTREL -extern int rc80211_minstrel_init(void); -extern void rc80211_minstrel_exit(void); +int rc80211_minstrel_init(void); +void rc80211_minstrel_exit(void); #else static inline int rc80211_minstrel_init(void) { @@ -170,8 +170,8 @@ static inline void rc80211_minstrel_exit(void) #endif #ifdef CONFIG_MAC80211_RC_MINSTREL_HT -extern int rc80211_minstrel_ht_init(void); -extern void rc80211_minstrel_ht_exit(void); +int rc80211_minstrel_ht_init(void); +void rc80211_minstrel_ht_exit(void); #else static inline int rc80211_minstrel_ht_init(void) { diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 8b5f7ef7c0c9..7fa1b36e6202 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -203,6 +203,15 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate)); mi->max_prob_rate = tmp_prob_rate; +#ifdef CONFIG_MAC80211_DEBUGFS + /* use fixed index if set */ + if (mp->fixed_rate_idx != -1) { + mi->max_tp_rate[0] = mp->fixed_rate_idx; + mi->max_tp_rate[1] = mp->fixed_rate_idx; + mi->max_prob_rate = mp->fixed_rate_idx; + } +#endif + /* Reset update timer */ mi->stats_update = jiffies; @@ -310,6 +319,11 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, /* increase sum packet counter */ mi->packet_count++; +#ifdef CONFIG_MAC80211_DEBUGFS + if (mp->fixed_rate_idx != -1) + return; +#endif + delta = (mi->packet_count * sampling_ratio / 100) - (mi->sample_count + mi->sample_deferred / 2); diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 7c323f27ba23..5d60779a0c1b 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -365,6 +365,14 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } } +#ifdef CONFIG_MAC80211_DEBUGFS + /* use fixed index if set */ + if (mp->fixed_rate_idx != -1) { + mi->max_tp_rate = mp->fixed_rate_idx; + mi->max_tp_rate2 = mp->fixed_rate_idx; + mi->max_prob_rate = mp->fixed_rate_idx; + } +#endif mi->stats_update = jiffies; } @@ -774,6 +782,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, info->flags |= mi->tx_flags; minstrel_ht_check_cck_shortpreamble(mp, mi, txrc->short_preamble); +#ifdef CONFIG_MAC80211_DEBUGFS + if (mp->fixed_rate_idx != -1) + return; +#endif + /* Don't use EAPOL frames for sampling on non-mrr hw */ if (mp->hw->max_rates == 1 && (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) @@ -781,16 +794,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, else sample_idx = minstrel_get_sample_rate(mp, mi); -#ifdef CONFIG_MAC80211_DEBUGFS - /* use fixed index if set */ - if (mp->fixed_rate_idx != -1) { - mi->max_tp_rate = mp->fixed_rate_idx; - mi->max_tp_rate2 = mp->fixed_rate_idx; - mi->max_prob_rate = mp->fixed_rate_idx; - sample_idx = -1; - } -#endif - mi->total_packets++; /* wraparound */ diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index c97a0657c043..6ff134650a84 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -167,29 +167,29 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, * provide large enough buffers. */ length = length < RC_PID_PRINT_BUF_SIZE ? length : RC_PID_PRINT_BUF_SIZE; - p = snprintf(pb, length, "%u %lu ", ev->id, ev->timestamp); + p = scnprintf(pb, length, "%u %lu ", ev->id, ev->timestamp); switch (ev->type) { case RC_PID_EVENT_TYPE_TX_STATUS: - p += snprintf(pb + p, length - p, "tx_status %u %u", - !(ev->data.flags & IEEE80211_TX_STAT_ACK), - ev->data.tx_status.status.rates[0].idx); + p += scnprintf(pb + p, length - p, "tx_status %u %u", + !(ev->data.flags & IEEE80211_TX_STAT_ACK), + ev->data.tx_status.status.rates[0].idx); break; case RC_PID_EVENT_TYPE_RATE_CHANGE: - p += snprintf(pb + p, length - p, "rate_change %d %d", - ev->data.index, ev->data.rate); + p += scnprintf(pb + p, length - p, "rate_change %d %d", + ev->data.index, ev->data.rate); break; case RC_PID_EVENT_TYPE_TX_RATE: - p += snprintf(pb + p, length - p, "tx_rate %d %d", - ev->data.index, ev->data.rate); + p += scnprintf(pb + p, length - p, "tx_rate %d %d", + ev->data.index, ev->data.rate); break; case RC_PID_EVENT_TYPE_PF_SAMPLE: - p += snprintf(pb + p, length - p, - "pf_sample %d %d %d %d", - ev->data.pf_sample, ev->data.prop_err, - ev->data.int_err, ev->data.der_err); + p += scnprintf(pb + p, length - p, + "pf_sample %d %d %d %d", + ev->data.pf_sample, ev->data.prop_err, + ev->data.int_err, ev->data.der_err); break; } - p += snprintf(pb + p, length - p, "\n"); + p += scnprintf(pb + p, length - p, "\n"); spin_unlock_irqrestore(&events->lock, status); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 674eac1f996c..0011ac815097 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -995,8 +995,9 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) rx->sta->num_duplicates++; } return RX_DROP_UNUSABLE; - } else + } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; + } } if (unlikely(rx->skb->len < 16)) { @@ -2402,7 +2403,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC && - mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED) + mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED && + mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) return RX_DROP_UNUSABLE; if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) @@ -2566,31 +2568,46 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; case WLAN_CATEGORY_SPECTRUM_MGMT: - if (status->band != IEEE80211_BAND_5GHZ) - break; - - if (sdata->vif.type != NL80211_IFTYPE_STATION) - break; - /* verify action_code is present */ if (len < IEEE80211_MIN_ACTION_SIZE + 1) break; switch (mgmt->u.action.u.measurement.action_code) { case WLAN_ACTION_SPCT_MSR_REQ: + if (status->band != IEEE80211_BAND_5GHZ) + break; + if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.measurement))) break; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + ieee80211_process_measurement_req(sdata, mgmt, len); goto handled; - case WLAN_ACTION_SPCT_CHL_SWITCH: - if (sdata->vif.type != NL80211_IFTYPE_STATION) + case WLAN_ACTION_SPCT_CHL_SWITCH: { + u8 *bssid; + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.chan_switch))) + break; + + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) break; - if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid)) + if (sdata->vif.type == NL80211_IFTYPE_STATION) + bssid = sdata->u.mgd.bssid; + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + bssid = sdata->u.ibss.bssid; + else + break; + + if (!ether_addr_equal(mgmt->bssid, bssid)) break; goto queue; + } } break; case WLAN_CATEGORY_SA_QUERY: diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 08afe74b98f4..5ad66a83ef7f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -238,6 +238,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) enum ieee80211_band band; int i, ielen, n_chans; + if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) + return false; + do { if (local->hw_scan_band == IEEE80211_NUM_BANDS) return false; @@ -391,8 +394,7 @@ static bool ieee80211_can_scan(struct ieee80211_local *local, return false; if (sdata->vif.type == NL80211_IFTYPE_STATION && - sdata->u.mgd.flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL)) + sdata->u.mgd.flags & IEEE80211_STA_CONNECTION_POLL) return false; return true; @@ -940,7 +942,23 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) if (!local->scan_req) goto out; + /* + * We have a scan running and the driver already reported completion, + * but the worker hasn't run yet or is stuck on the mutex - mark it as + * cancelled. + */ + if (test_bit(SCAN_HW_SCANNING, &local->scanning) && + test_bit(SCAN_COMPLETED, &local->scanning)) { + set_bit(SCAN_HW_CANCELLED, &local->scanning); + goto out; + } + if (test_bit(SCAN_HW_SCANNING, &local->scanning)) { + /* + * Make sure that __ieee80211_scan_completed doesn't trigger a + * scan on another band. + */ + set_bit(SCAN_HW_CANCELLED, &local->scanning); if (local->ops->cancel_hw_scan) drv_cancel_hw_scan(local, rcu_dereference_protected(local->scan_sdata, diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 578eea3fc04d..921597e279a3 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -21,6 +21,168 @@ #include "sta_info.h" #include "wme.h" +int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, + struct ieee802_11_elems *elems, bool beacon, + enum ieee80211_band current_band, + u32 sta_flags, u8 *bssid, u8 *count, u8 *mode, + struct cfg80211_chan_def *new_chandef) +{ + enum ieee80211_band new_band; + int new_freq; + u8 new_chan_no; + struct ieee80211_channel *new_chan; + struct cfg80211_chan_def new_vht_chandef = {}; + const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; + const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; + const struct ieee80211_ht_operation *ht_oper; + int secondary_channel_offset = -1; + + sec_chan_offs = elems->sec_chan_offs; + wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + ht_oper = elems->ht_operation; + + if (sta_flags & (IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_40MHZ)) { + sec_chan_offs = NULL; + wide_bw_chansw_ie = NULL; + /* only used for bandwidth here */ + ht_oper = NULL; + } + + if (sta_flags & IEEE80211_STA_DISABLE_VHT) + wide_bw_chansw_ie = NULL; + + if (elems->ext_chansw_ie) { + if (!ieee80211_operating_class_to_band( + elems->ext_chansw_ie->new_operating_class, + &new_band)) { + sdata_info(sdata, + "cannot understand ECSA IE operating class %d, disconnecting\n", + elems->ext_chansw_ie->new_operating_class); + return -EINVAL; + } + new_chan_no = elems->ext_chansw_ie->new_ch_num; + *count = elems->ext_chansw_ie->count; + *mode = elems->ext_chansw_ie->mode; + } else if (elems->ch_switch_ie) { + new_band = current_band; + new_chan_no = elems->ch_switch_ie->new_ch_num; + *count = elems->ch_switch_ie->count; + *mode = elems->ch_switch_ie->mode; + } else { + /* nothing here we understand */ + return 1; + } + + new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); + new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); + if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) { + sdata_info(sdata, + "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n", + bssid, new_freq); + return -EINVAL; + } + + if (!beacon && sec_chan_offs) { + secondary_channel_offset = sec_chan_offs->sec_chan_offs; + } else if (beacon && ht_oper) { + secondary_channel_offset = + ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; + } else if (!(sta_flags & IEEE80211_STA_DISABLE_HT)) { + /* If it's not a beacon, HT is enabled and the IE not present, + * it's 20 MHz, 802.11-2012 8.5.2.6: + * This element [the Secondary Channel Offset Element] is + * present when switching to a 40 MHz channel. It may be + * present when switching to a 20 MHz channel (in which + * case the secondary channel offset is set to SCN). + */ + secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + } + + switch (secondary_channel_offset) { + default: + /* secondary_channel_offset was present but is invalid */ + case IEEE80211_HT_PARAM_CHA_SEC_NONE: + cfg80211_chandef_create(new_chandef, new_chan, + NL80211_CHAN_HT20); + break; + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + cfg80211_chandef_create(new_chandef, new_chan, + NL80211_CHAN_HT40PLUS); + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + cfg80211_chandef_create(new_chandef, new_chan, + NL80211_CHAN_HT40MINUS); + break; + case -1: + cfg80211_chandef_create(new_chandef, new_chan, + NL80211_CHAN_NO_HT); + /* keep width for 5/10 MHz channels */ + switch (sdata->vif.bss_conf.chandef.width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + new_chandef->width = sdata->vif.bss_conf.chandef.width; + break; + default: + break; + } + break; + } + + if (wide_bw_chansw_ie) { + new_vht_chandef.chan = new_chan; + new_vht_chandef.center_freq1 = + ieee80211_channel_to_frequency( + wide_bw_chansw_ie->new_center_freq_seg0, + new_band); + + switch (wide_bw_chansw_ie->new_channel_width) { + default: + /* hmmm, ignore VHT and use HT if present */ + case IEEE80211_VHT_CHANWIDTH_USE_HT: + new_vht_chandef.chan = NULL; + break; + case IEEE80211_VHT_CHANWIDTH_80MHZ: + new_vht_chandef.width = NL80211_CHAN_WIDTH_80; + break; + case IEEE80211_VHT_CHANWIDTH_160MHZ: + new_vht_chandef.width = NL80211_CHAN_WIDTH_160; + break; + case IEEE80211_VHT_CHANWIDTH_80P80MHZ: + /* field is otherwise reserved */ + new_vht_chandef.center_freq2 = + ieee80211_channel_to_frequency( + wide_bw_chansw_ie->new_center_freq_seg1, + new_band); + new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80; + break; + } + if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ && + new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80) + ieee80211_chandef_downgrade(&new_vht_chandef); + if (sta_flags & IEEE80211_STA_DISABLE_160MHZ && + new_vht_chandef.width == NL80211_CHAN_WIDTH_160) + ieee80211_chandef_downgrade(&new_vht_chandef); + if (sta_flags & IEEE80211_STA_DISABLE_40MHZ && + new_vht_chandef.width > NL80211_CHAN_WIDTH_20) + ieee80211_chandef_downgrade(&new_vht_chandef); + } + + /* if VHT data is there validate & use it */ + if (new_vht_chandef.chan) { + if (!cfg80211_chandef_compatible(&new_vht_chandef, + new_chandef)) { + sdata_info(sdata, + "BSS %pM: CSA has inconsistent channel data, disconnecting\n", + bssid); + return -EINVAL; + } + *new_chandef = new_vht_chandef; + } + + return 0; +} + static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata, struct ieee80211_msrment_ie *request_ie, const u8 *da, const u8 *bssid, diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 368837fe3b80..78dc2e99027e 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -180,6 +180,9 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + sta->last_rx = jiffies; + if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; u8 *qc = ieee80211_get_qos_ctl(hdr); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3fb9dd6d02fc..d4cee98533fd 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1475,6 +1475,41 @@ DEFINE_EVENT(local_sdata_evt, drv_ipv6_addr_change, ); #endif +TRACE_EVENT(drv_join_ibss, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *info), + + TP_ARGS(local, sdata, info), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, dtimper) + __field(u16, bcnint) + __dynamic_array(u8, ssid, info->ssid_len); + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->dtimper = info->dtim_period; + __entry->bcnint = info->beacon_int; + memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len); + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG + ) +); + +DEFINE_EVENT(local_sdata_evt, drv_leave_ibss, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3456c0486b48..9993fcb19ecd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1120,7 +1120,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = rcu_dereference(sdata->u.vlan.sta); if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr) return TX_DROP; - } else if (info->flags & IEEE80211_TX_CTL_INJECTED || + } else if (info->flags & (IEEE80211_TX_CTL_INJECTED | + IEEE80211_TX_INTFL_NL80211_FRAME_TX) || tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } @@ -1981,7 +1982,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, * EAPOL frames from the local station. */ if (unlikely(!ieee80211_vif_is_mesh(&sdata->vif) && - !is_multicast_ether_addr(hdr.addr1) && !authorized && + !multicast && !authorized && (cpu_to_be16(ethertype) != sdata->control_port_protocol || !ether_addr_equal(sdata->vif.addr, skb->data + ETH_ALEN)))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG @@ -2357,15 +2358,31 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, struct probe_resp *resp; int counter_offset_beacon = sdata->csa_counter_offset_beacon; int counter_offset_presp = sdata->csa_counter_offset_presp; + u8 *beacon_data; + size_t beacon_data_len; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + beacon_data = beacon->tail; + beacon_data_len = beacon->tail_len; + break; + case NL80211_IFTYPE_ADHOC: + beacon_data = beacon->head; + beacon_data_len = beacon->head_len; + break; + default: + return; + } + if (WARN_ON(counter_offset_beacon >= beacon_data_len)) + return; /* warn if the driver did not check for/react to csa completeness */ - if (WARN_ON(((u8 *)beacon->tail)[counter_offset_beacon] == 0)) + if (WARN_ON(beacon_data[counter_offset_beacon] == 0)) return; - ((u8 *)beacon->tail)[counter_offset_beacon]--; + beacon_data[counter_offset_beacon]--; - if (sdata->vif.type == NL80211_IFTYPE_AP && - counter_offset_presp) { + if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) { rcu_read_lock(); resp = rcu_dereference(sdata->u.ap.probe_resp); @@ -2400,6 +2417,15 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) goto out; beacon_data = beacon->tail; beacon_data_len = beacon->tail_len; + } else if (vif->type == NL80211_IFTYPE_ADHOC) { + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + beacon = rcu_dereference(ifibss->presp); + if (!beacon) + goto out; + + beacon_data = beacon->head; + beacon_data_len = beacon->head_len; } else { WARN_ON(1); goto out; @@ -2484,6 +2510,10 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!presp) goto out; + if (sdata->vif.csa_active) + ieee80211_update_csa(sdata, presp); + + skb = dev_alloc_skb(local->tx_headroom + presp->head_len); if (!skb) goto out; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 9c3200bcfc02..aefb9d5b9620 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -567,18 +567,15 @@ void ieee80211_flush_queues(struct ieee80211_local *local, IEEE80211_QUEUE_STOP_REASON_FLUSH); } -void ieee80211_iterate_active_interfaces( - struct ieee80211_hw *hw, u32 iter_flags, - void (*iterator)(void *data, u8 *mac, - struct ieee80211_vif *vif), - void *data) +static void __iterate_active_interfaces(struct ieee80211_local *local, + u32 iter_flags, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) { - struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; - mutex_lock(&local->iflist_mtx); - - list_for_each_entry(sdata, &local->interfaces, list) { + list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) @@ -597,13 +594,25 @@ void ieee80211_iterate_active_interfaces( &sdata->vif); } - sdata = rcu_dereference_protected(local->monitor_sdata, - lockdep_is_held(&local->iflist_mtx)); + sdata = rcu_dereference_check(local->monitor_sdata, + lockdep_is_held(&local->iflist_mtx) || + lockdep_rtnl_is_held()); if (sdata && (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || sdata->flags & IEEE80211_SDATA_IN_DRIVER)) iterator(data, sdata->vif.addr, &sdata->vif); +} + +void ieee80211_iterate_active_interfaces( + struct ieee80211_hw *hw, u32 iter_flags, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) +{ + struct ieee80211_local *local = hw_to_local(hw); + mutex_lock(&local->iflist_mtx); + __iterate_active_interfaces(local, iter_flags, iterator, data); mutex_unlock(&local->iflist_mtx); } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); @@ -615,38 +624,26 @@ void ieee80211_iterate_active_interfaces_atomic( void *data) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; rcu_read_lock(); + __iterate_active_interfaces(local, iter_flags, iterator, data); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - switch (sdata->vif.type) { - case NL80211_IFTYPE_MONITOR: - if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) - continue; - break; - case NL80211_IFTYPE_AP_VLAN: - continue; - default: - break; - } - if (!(iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL) && - !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) - continue; - if (ieee80211_sdata_running(sdata)) - iterator(data, sdata->vif.addr, - &sdata->vif); - } +void ieee80211_iterate_active_interfaces_rtnl( + struct ieee80211_hw *hw, u32 iter_flags, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) +{ + struct ieee80211_local *local = hw_to_local(hw); - sdata = rcu_dereference(local->monitor_sdata); - if (sdata && - (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || - sdata->flags & IEEE80211_SDATA_IN_DRIVER)) - iterator(data, sdata->vif.addr, &sdata->vif); + ASSERT_RTNL(); - rcu_read_unlock(); + __iterate_active_interfaces(local, iter_flags, iterator, data); } -EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl); /* * Nothing should have been stuffed into the workqueue during @@ -1007,14 +1004,21 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, */ enable_qos = (sdata->vif.type != NL80211_IFTYPE_STATION); - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - /* Set defaults according to 802.11-2007 Table 7-37 */ - aCWmax = 1023; - if (use_11b) - aCWmin = 31; - else - aCWmin = 15; + /* Set defaults according to 802.11-2007 Table 7-37 */ + aCWmax = 1023; + if (use_11b) + aCWmin = 31; + else + aCWmin = 15; + + /* Confiure old 802.11b/g medium access rules. */ + qparam.cw_max = aCWmax; + qparam.cw_min = aCWmin; + qparam.txop = 0; + qparam.aifs = 2; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + /* Update if QoS is enabled. */ if (enable_qos) { switch (ac) { case IEEE80211_AC_BK: @@ -1050,12 +1054,6 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, qparam.aifs = 2; break; } - } else { - /* Confiure old 802.11b/g medium access rules. */ - qparam.cw_max = aCWmax; - qparam.cw_min = aCWmin; - qparam.txop = 0; - qparam.aifs = 2; } qparam.uapsd = false; @@ -1084,8 +1082,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; int err; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*mgmt) + 6 + extra_len); + /* 24 + 6 = header + auth_algo + auth_transaction + status_code */ + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 6 + extra_len); if (!skb) return; @@ -2238,6 +2236,10 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, } rate = cfg80211_calculate_bitrate(&ri); + if (WARN_ONCE(!rate, + "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n", + status->flag, status->rate_idx, status->vht_nss)) + return 0; /* rewind from end of MPDU */ if (status->flag & RX_FLAG_MACTIME_END) @@ -2292,3 +2294,63 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw) ieee80211_queue_work(hw, &local->radar_detected_work); } EXPORT_SYMBOL(ieee80211_radar_detected); + +u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c) +{ + u32 ret; + int tmp; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + c->width = NL80211_CHAN_WIDTH_20_NOHT; + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_40: + c->width = NL80211_CHAN_WIDTH_20; + c->center_freq1 = c->chan->center_freq; + ret = IEEE80211_STA_DISABLE_40MHZ | + IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_80: + tmp = (30 + c->chan->center_freq - c->center_freq1)/20; + /* n_P40 */ + tmp /= 2; + /* freq_P40 */ + c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; + c->width = NL80211_CHAN_WIDTH_40; + ret = IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_80P80: + c->center_freq2 = 0; + c->width = NL80211_CHAN_WIDTH_80; + ret = IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ; + break; + case NL80211_CHAN_WIDTH_160: + /* n_P20 */ + tmp = (70 + c->chan->center_freq - c->center_freq1)/20; + /* n_P80 */ + tmp /= 4; + c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; + c->width = NL80211_CHAN_WIDTH_80; + ret = IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ; + break; + default: + case NL80211_CHAN_WIDTH_20_NOHT: + WARN_ON_ONCE(1); + c->width = NL80211_CHAN_WIDTH_20_NOHT; + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + WARN_ON_ONCE(1); + /* keep c->width */ + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; + } + + WARN_ON_ONCE(!cfg80211_chandef_valid(c)); + + return ret; +} diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 97c289414e32..de0112785aae 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -185,13 +185,13 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | - IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX | IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); } if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & - IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | + IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX); if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index b7c7f815deae..52ae6646a411 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -174,8 +174,7 @@ ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) if (!ops || !ops->xmit || !ops->ed || !ops->start || !ops->stop || !ops->set_channel) { - printk(KERN_ERR - "undefined IEEE802.15.4 device operations\n"); + pr_err("undefined IEEE802.15.4 device operations\n"); return NULL; } @@ -201,8 +200,7 @@ ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) phy = wpan_phy_alloc(priv_size); if (!phy) { - printk(KERN_ERR - "failure to allocate master IEEE802.15.4 device\n"); + pr_err("failure to allocate master IEEE802.15.4 device\n"); return NULL; } diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 2ca2f4dceab7..e24bcf977296 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -208,6 +208,8 @@ static int mac802154_header_create(struct sk_buff *skb, head[1] = fc >> 8; memcpy(skb_push(skb, pos), head, pos); + skb_reset_mac_header(skb); + skb->mac_len = pos; return pos; } diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 1bec1219ab81..851cd880b0c0 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | SKB_GSO_MPLS))) goto out; diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index a13e15be7911..f2c7d83dc23f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set, struct mtype *map = set->data; struct nlattr *adt, *nested; void *x; - u32 id, first = cb->args[2]; + u32 id, first = cb->args[IPSET_CB_ARG0]; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!adt) return -EMSGSIZE; - for (; cb->args[2] < map->elements; cb->args[2]++) { - id = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->elements; + cb->args[IPSET_CB_ARG0]++) { + id = cb->args[IPSET_CB_ARG0]; x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && @@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set, ipset_nest_end(skb, adt); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(id == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, adt); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e7603c5b53d7..cf99676e69f8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -254,7 +254,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -ENOMEM; map->elements = last_port - first_port + 1; - map->memsize = map->elements * sizeof(unsigned long); + map->memsize = bitmap_bytes(0, map->elements); set->variant = &bitmap_port; set->dsize = ip_set_elem_len(set, tb, 0); if (!init_map_port(set, map, first_port, last_port)) { diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index dc9284bdd2dd..bac7e01df67f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); - __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); + struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; + if (cb->args[IPSET_CB_ARG0]) { + pr_debug("release set %s\n", + nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); + __ip_set_put_byindex(inst, + (ip_set_id_t) cb->args[IPSET_CB_INDEX]); } return 0; } @@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh) } static int -dump_init(struct netlink_callback *cb) +dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); @@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[0] : dump single set/all sets - * [1] : set index - * [..]: type specific + /* cb->args[IPSET_CB_NET]: net namespace + * [IPSET_CB_DUMP]: dump single set/all sets + * [IPSET_CB_INDEX]: set index + * [IPSET_CB_ARG0]: type specific */ if (cda[IPSET_ATTR_SETNAME]) { @@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb) return -ENOENT; dump_type = DUMP_ONE; - cb->args[1] = index; + cb->args[IPSET_CB_INDEX] = index; } else dump_type = DUMP_ALL; @@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb) u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); dump_type |= (f << 16); } - cb->args[0] = dump_type; + cb->args[IPSET_CB_NET] = (unsigned long)inst; + cb->args[IPSET_CB_DUMP] = dump_type; return 0; } @@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; int ret = 0; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (!cb->args[0]) { - ret = dump_init(cb); + if (!cb->args[IPSET_CB_DUMP]) { + ret = dump_init(cb, inst); if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message @@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= inst->ip_set_max) + if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; - dump_type = DUMP_TYPE(cb->args[0]); - dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; + dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]); + dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]); + max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1 + : inst->ip_set_max; dump_last: - pr_debug("args[0]: %u %u args[1]: %ld\n", - dump_type, dump_flags, cb->args[1]); - for (; cb->args[1] < max; cb->args[1]++) { - index = (ip_set_id_t) cb->args[1]; + pr_debug("dump type, flag: %u %u index: %ld\n", + dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); + for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { + index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { @@ -1294,7 +1298,7 @@ dump_last: !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); - if (!cb->args[2]) { + if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); __ip_set_get(set); @@ -1311,7 +1315,7 @@ dump_last: goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) goto next_set; - switch (cb->args[2]) { + switch (cb->args[IPSET_CB_ARG0]) { case 0: /* Core header data */ if (nla_put_string(skb, IPSET_ATTR_TYPENAME, @@ -1331,7 +1335,7 @@ dump_last: read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) + if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; goto release_refcount; @@ -1340,8 +1344,8 @@ dump_last: /* If we dump all sets, continue with dumping last ones */ if (dump_type == DUMP_ALL) { dump_type = DUMP_LAST; - cb->args[0] = dump_type | (dump_flags << 16); - cb->args[1] = 0; + cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); + cb->args[IPSET_CB_INDEX] = 0; goto dump_last; } goto out; @@ -1350,15 +1354,15 @@ nla_put_failure: ret = -EFAULT; next_set: if (dump_type == DUMP_ONE) - cb->args[1] = IPSET_INVALID_ID; + cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID; else - cb->args[1]++; + cb->args[IPSET_CB_INDEX]++; release_refcount: /* If there was an error or set is done, release set */ - if (ret || !cb->args[2]) { + if (ret || !cb->args[IPSET_CB_ARG0]) { pr_debug("release set %s\n", nfnl_set(inst, index)->name); __ip_set_put_byindex(inst, index); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; } out: if (nlh) { @@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); - if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, - .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1961,7 +1962,6 @@ static int __net_init ip_set_net_init(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set **list; inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6a80dbd30df7..be6932ad3a86 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -234,7 +234,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem IPSET_TOKEN(MTYPE, _elem) #define mtype_add IPSET_TOKEN(MTYPE, _add) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) @@ -931,7 +930,7 @@ mtype_list(const struct ip_set *set, struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; - u32 first = cb->args[2]; + u32 first = cb->args[IPSET_CB_ARG0]; /* We assume that one hash bucket fills into one page */ void *incomplete; int i; @@ -940,20 +939,22 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); - for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); + cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[2]); - pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + n = hbucket(t, cb->args[IPSET_CB_ARG0]); + pr_debug("cb->arg bucket: %lu, t %p n %p\n", + cb->args[IPSET_CB_ARG0], t, n); for (i = 0; i < n->pos; i++) { e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", - cb->args[2], n, i, e); + cb->args[IPSET_CB_ARG0], n, i, e); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { - if (cb->args[2] == first) { + if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); return -EMSGSIZE; } else @@ -968,16 +969,16 @@ mtype_list(const struct ip_set *set, } ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nlmsg_trim(skb, incomplete); - if (unlikely(first == cb->args[2])) { + if (unlikely(first == cb->args[IPSET_CB_ARG0])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 426032706ca9..2bc2dec20b00 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -137,12 +137,11 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, - }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -160,14 +159,14 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || @@ -364,12 +363,11 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK - }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; @@ -386,11 +384,11 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 363fab933d48..703d1192a6a2 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -147,12 +147,11 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -174,8 +173,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; @@ -183,6 +181,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -419,12 +418,11 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; @@ -446,13 +444,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ec6f6d15dded..3e2317f3cf68 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set, { const struct list_set *map = set->data; struct nlattr *atd, *nested; - u32 i, first = cb->args[2]; + u32 i, first = cb->args[IPSET_CB_ARG0]; const struct set_elem *e; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - for (; cb->args[2] < map->size; cb->args[2]++) { - i = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->size; + cb->args[IPSET_CB_ARG0]++) { + i = cb->args[IPSET_CB_ARG0]; e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; @@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set, finish: ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f7..62786a495cea 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) __ip_vs_dst_cache_reset(dest); __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); - kfree(dest); + ip_vs_dest_put_and_free(dest); } /* @@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - /* Some dest can be in grace period even before cleanup, we have to - * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. - */ - rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index eff13c94498e..ca056a331e60 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head) struct ip_vs_lblc_entry, rcu_head); - ip_vs_dest_put(en->dest); + ip_vs_dest_put_and_free(en->dest); kfree(en); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0b8550089a2e..3f21a2f47de1 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) struct ip_vs_dest_set_elem *e; e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); - ip_vs_dest_put(e->dest); + ip_vs_dest_put_and_free(e->dest); kfree(e); } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 23e596e438b3..2f7ea7564044 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -20,13 +20,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, sctp_sctphdr_t *sh, _sctph; sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) + if (sh == NULL) { + *verdict = NF_DROP; return 0; + } sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); - if (sch == NULL) + if (sch == NULL) { + *verdict = NF_DROP; return 0; + } + net = skb_net(skb); ipvs = net_ipvs(net); rcu_read_lock(); @@ -76,6 +81,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -87,19 +93,31 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + ret = ip_vs_app_pkt_out(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->source != cp->vport || payload_csum || + skb->ip_summed == CHECKSUM_PARTIAL) { + sctph->source = cp->vport; + sctp_nat_csum(skb, sctph, sctphoff); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } @@ -110,6 +128,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -121,19 +140,32 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_in(cp, skb)) + ret = ip_vs_app_pkt_in(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->dest != cp->dport || payload_csum || + (skb->ip_summed == CHECKSUM_PARTIAL && + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + sctph->dest = cp->dport; + sctp_nat_csum(skb, sctph, sctphoff); + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 3588faebe529..cc65b2f42cd4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -115,27 +115,46 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, } -/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +/* As ip_vs_sh_get, but with fallback if selected server is unavailable + * + * The fallback strategy loops around the table starting from a "random" + * point (in fact, it is chosen to be the original hash value to make the + * algorithm deterministic) to find a new server. + */ static inline struct ip_vs_dest * ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { - unsigned int offset; - unsigned int hash; + unsigned int offset, roffset; + unsigned int hash, ihash; struct ip_vs_dest *dest; + /* first try the dest it's supposed to go to */ + ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + dest = rcu_dereference(s->buckets[ihash].dest); + if (!dest) + return NULL; + if (!is_unavailable(dest)) + return dest; + + IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + + /* if the original dest is unavailable, loop around the table + * starting from ihash to find a new dest + */ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { - hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; + hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); dest = rcu_dereference(s->buckets[hash].dest); if (!dest) break; - if (is_unavailable(dest)) - IP_VS_DBG_BUF(6, "SH: selected unavailable server " - "%s:%d (offset %d)", - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port), offset); - else + if (!is_unavailable(dest)) return dest; + IP_VS_DBG_BUF(6, "SH: selected unavailable " + "server %s:%d (offset %d), reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), roffset); } return NULL; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 2d3030ab5b61..a4b5e2a435ac 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -39,21 +39,23 @@ static struct ctl_table acct_sysctl_table[] = { unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; acct = nf_conn_acct_find(ct); if (!acct) return 0; + counter = acct->counter; return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&acct[dir].packets), - (unsigned long long)atomic64_read(&acct[dir].bytes)); + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); }; EXPORT_SYMBOL_GPL(seq_print_acct); static struct nf_ct_ext_type acct_extend __read_mostly = { - .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), - .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .len = sizeof(struct nf_conn_acct), + .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5d892febd64c..e22d950c60b3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1109,12 +1109,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, acct: if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes); } } } @@ -1126,13 +1128,15 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, int do_acct) { if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); atomic64_add(skb->len - skb_network_offset(skb), - &acct[CTINFO2DIR(ctinfo)].bytes); + &counter[CTINFO2DIR(ctinfo)].bytes); } } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index bdebd03bc8cd..70866d192efc 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src, flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { - if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, - sizeof(rt1->rt6i_gateway)) && + if (ipv6_addr_equal(rt6_nexthop(rt1), + rt6_nexthop(rt2)) && rt1->dst.dev == rt2->dst.dev) ret = 1; dst_release(&rt2->dst); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eea936b70d15..08870b859046 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -211,13 +211,23 @@ nla_put_failure: } static int -dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, - enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, + enum ip_conntrack_dir dir, int type) { - enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nf_conn_counter *counter = acct->counter; struct nlattr *nest_count; + u64 pkts, bytes; - nest_count = nla_nest_start(skb, type | NLA_F_NESTED); + if (type == IPCTNL_MSG_CT_GET_CTRZERO) { + pkts = atomic64_xchg(&counter[dir].packets, 0); + bytes = atomic64_xchg(&counter[dir].bytes, 0); + } else { + pkts = atomic64_read(&counter[dir].packets); + bytes = atomic64_read(&counter[dir].bytes); + } + + nest_count = nla_nest_start(skb, attr | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; @@ -234,24 +244,19 @@ nla_put_failure: } static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, - enum ip_conntrack_dir dir, int type) +ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type) { - struct nf_conn_counter *acct; - u64 pkts, bytes; + struct nf_conn_acct *acct = nf_conn_acct_find(ct); - acct = nf_conn_acct_find(ct); if (!acct) return 0; - if (type == IPCTNL_MSG_CT_GET_CTRZERO) { - pkts = atomic64_xchg(&acct[dir].packets, 0); - bytes = atomic64_xchg(&acct[dir].bytes, 0); - } else { - pkts = atomic64_read(&acct[dir].packets); - bytes = atomic64_read(&acct[dir].bytes); - } - return dump_counters(skb, pkts, bytes, dir); + if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0) + return -1; + if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0) + return -1; + + return 0; } static int @@ -488,8 +493,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 || + ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -530,7 +534,7 @@ ctnetlink_proto_size(const struct nf_conn *ct) } static inline size_t -ctnetlink_counters_size(const struct nf_conn *ct) +ctnetlink_acct_size(const struct nf_conn *ct) { if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT)) return 0; @@ -579,7 +583,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ - + ctnetlink_counters_size(ct) + + ctnetlink_acct_size(ct) + ctnetlink_timestamp_size(ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ @@ -673,10 +677,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { - if (ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_REPLY, type) < 0 || + if (ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 3deec997be89..61a3c927e63c 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,26 +13,20 @@ /* core.c */ -extern unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, - unsigned int hook, - const struct net_device *indev, - const struct net_device *outdev, - struct nf_hook_ops **elemp, - int (*okfn)(struct sk_buff *), - int hook_thresh); +unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, + unsigned int hook, const struct net_device *indev, + const struct net_device *outdev, + struct nf_hook_ops **elemp, + int (*okfn)(struct sk_buff *), int hook_thresh); /* nf_queue.c */ -extern int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, - u_int8_t pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum); -extern int __init netfilter_queue_init(void); +int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf, + unsigned int hook, struct net_device *indev, + struct net_device *outdev, int (*okfn)(struct sk_buff *), + unsigned int queuenum); +int __init netfilter_queue_init(void); /* nf_log.c */ -extern int __init netfilter_log_init(void); +int __init netfilter_log_init(void); #endif diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8b03028cca69..227aa11e8409 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table, return NULL; } - table->private = newinfo; newinfo->initial_entries = private->initial_entries; + /* + * Ensure contents of newinfo are visible before assigning to + * private. + */ + smp_wmb(); + table->private = newinfo; /* * Even though table entries have now been swapped, other CPU's diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 1e2fae32f81b..ed00fef58996 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -147,6 +147,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 queue = info->queuenum; + int ret; if (info->queues_total > 1) { if (info->flags & NFQ_FLAG_CPU_FANOUT) { @@ -157,7 +158,11 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = nfqueue_hash(skb, par); } - return NF_QUEUE_NR(queue); + ret = NF_QUEUE_NR(queue); + if (info->flags & NFQ_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + return ret; } static struct xt_target nfqueue_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index e595e07a759b..1e634615ab9d 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -26,16 +26,18 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; + const struct nf_conn_acct *acct; const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = nf_conn_acct_find(ct); - if (!counters) + acct = nf_conn_acct_find(ct); + if (!acct) return false; + counters = acct->counter; switch (sinfo->what) { case XT_CONNBYTES_PKTS: switch (sinfo->direction) { diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 3dd0e374bc2b..1ba67931eb1b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,15 +35,6 @@ #include <net/netfilter/nf_conntrack.h> #endif -static void -xt_socket_put_sk(struct sock *sk) -{ - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} - static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -216,7 +207,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; @@ -381,7 +372,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index ea36e99089af..3591cb5dae91 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -9,6 +9,8 @@ openvswitch-y := \ datapath.o \ dp_notify.o \ flow.o \ + flow_netlink.o \ + flow_table.o \ vport.o \ vport-internal_dev.o \ vport-netdev.o diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2aa13bd7f2b2..1408adc2a2a7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -55,14 +55,10 @@ #include "datapath.h" #include "flow.h" +#include "flow_netlink.h" #include "vport-internal_dev.h" #include "vport-netdev.h" - -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) -static void rehash_flow_table(struct work_struct *work); -static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); - int ovs_net_id __read_mostly; static void ovs_notify(struct sk_buff *skb, struct genl_info *info, @@ -165,7 +161,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); + ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -225,6 +221,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) struct dp_stats_percpu *stats; struct sw_flow_key key; u64 *stats_counter; + u32 n_mask_hit; int error; stats = this_cpu_ptr(dp->stats_percpu); @@ -237,7 +234,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_lookup(rcu_dereference(dp->table), &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -262,6 +259,7 @@ out: /* Update datapath statistics. */ u64_stats_update_begin(&stats->sync); (*stats_counter)++; + stats->n_mask_hit += n_mask_hit; u64_stats_update_end(&stats->sync); } @@ -435,7 +433,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb); + ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); nla_nest_end(user_skb, nla); if (upcall_info->userdata) @@ -455,398 +453,6 @@ out: return err; } -/* Called with ovs_mutex. */ -static int flush_flows(struct datapath *dp) -{ - struct flow_table *old_table; - struct flow_table *new_table; - - old_table = ovsl_dereference(dp->table); - new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); - if (!new_table) - return -ENOMEM; - - rcu_assign_pointer(dp->table, new_table); - - ovs_flow_tbl_destroy(old_table, true); - return 0; -} - -static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) -{ - - struct sw_flow_actions *acts; - int new_acts_size; - int req_size = NLA_ALIGN(attr_len); - int next_offset = offsetof(struct sw_flow_actions, actions) + - (*sfa)->actions_len; - - if (req_size <= (ksize(*sfa) - next_offset)) - goto out; - - new_acts_size = ksize(*sfa) * 2; - - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) - return ERR_PTR(-EMSGSIZE); - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - - acts = ovs_flow_actions_alloc(new_acts_size); - if (IS_ERR(acts)) - return (void *)acts; - - memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); - acts->actions_len = (*sfa)->actions_len; - kfree(*sfa); - *sfa = acts; - -out: - (*sfa)->actions_len += req_size; - return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); -} - -static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) -{ - struct nlattr *a; - - a = reserve_sfa_size(sfa, nla_attr_size(len)); - if (IS_ERR(a)) - return PTR_ERR(a); - - a->nla_type = attrtype; - a->nla_len = nla_attr_size(len); - - if (data) - memcpy(nla_data(a), data, len); - memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); - - return 0; -} - -static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) -{ - int used = (*sfa)->actions_len; - int err; - - err = add_action(sfa, attrtype, NULL, 0); - if (err) - return err; - - return used; -} - -static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) -{ - struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); - - a->nla_len = sfa->actions_len - st_offset; -} - -static int validate_and_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa); - -static int validate_and_copy_sample(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa) -{ - const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; - const struct nlattr *probability, *actions; - const struct nlattr *a; - int rem, start, err, st_acts; - - memset(attrs, 0, sizeof(attrs)); - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) - return -EINVAL; - attrs[type] = a; - } - if (rem) - return -EINVAL; - - probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; - if (!probability || nla_len(probability) != sizeof(u32)) - return -EINVAL; - - actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) - return -EINVAL; - - /* validation done, copy sample action. */ - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); - if (start < 0) - return start; - err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); - if (err) - return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); - if (st_acts < 0) - return st_acts; - - err = validate_and_copy_actions(actions, key, depth + 1, sfa); - if (err) - return err; - - add_nested_action_end(*sfa, st_acts); - add_nested_action_end(*sfa, start); - - return 0; -} - -static int validate_tp_port(const struct sw_flow_key *flow_key) -{ - if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) - return 0; - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) - return 0; - } - - return -EINVAL; -} - -static int validate_and_copy_set_tun(const struct nlattr *attr, - struct sw_flow_actions **sfa) -{ - struct sw_flow_match match; - struct sw_flow_key key; - int err, start; - - ovs_match_init(&match, &key, NULL); - err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false); - if (err) - return err; - - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); - if (start < 0) - return start; - - err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, - sizeof(match.key->tun_key)); - add_nested_action_end(*sfa, start); - - return err; -} - -static int validate_set(const struct nlattr *a, - const struct sw_flow_key *flow_key, - struct sw_flow_actions **sfa, - bool *set_tun) -{ - const struct nlattr *ovs_key = nla_data(a); - int key_type = nla_type(ovs_key); - - /* There can be only one key in a action */ - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) - return -EINVAL; - - if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) - return -EINVAL; - - switch (key_type) { - const struct ovs_key_ipv4 *ipv4_key; - const struct ovs_key_ipv6 *ipv6_key; - int err; - - case OVS_KEY_ATTR_PRIORITY: - case OVS_KEY_ATTR_SKB_MARK: - case OVS_KEY_ATTR_ETHERNET: - break; - - case OVS_KEY_ATTR_TUNNEL: - *set_tun = true; - err = validate_and_copy_set_tun(a, sfa); - if (err) - return err; - break; - - case OVS_KEY_ATTR_IPV4: - if (flow_key->eth.type != htons(ETH_P_IP)) - return -EINVAL; - - if (!flow_key->ip.proto) - return -EINVAL; - - ipv4_key = nla_data(ovs_key); - if (ipv4_key->ipv4_proto != flow_key->ip.proto) - return -EINVAL; - - if (ipv4_key->ipv4_frag != flow_key->ip.frag) - return -EINVAL; - - break; - - case OVS_KEY_ATTR_IPV6: - if (flow_key->eth.type != htons(ETH_P_IPV6)) - return -EINVAL; - - if (!flow_key->ip.proto) - return -EINVAL; - - ipv6_key = nla_data(ovs_key); - if (ipv6_key->ipv6_proto != flow_key->ip.proto) - return -EINVAL; - - if (ipv6_key->ipv6_frag != flow_key->ip.frag) - return -EINVAL; - - if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) - return -EINVAL; - - break; - - case OVS_KEY_ATTR_TCP: - if (flow_key->ip.proto != IPPROTO_TCP) - return -EINVAL; - - return validate_tp_port(flow_key); - - case OVS_KEY_ATTR_UDP: - if (flow_key->ip.proto != IPPROTO_UDP) - return -EINVAL; - - return validate_tp_port(flow_key); - - case OVS_KEY_ATTR_SCTP: - if (flow_key->ip.proto != IPPROTO_SCTP) - return -EINVAL; - - return validate_tp_port(flow_key); - - default: - return -EINVAL; - } - - return 0; -} - -static int validate_userspace(const struct nlattr *attr) -{ - static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { - [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, - }; - struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; - int error; - - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, - attr, userspace_policy); - if (error) - return error; - - if (!a[OVS_USERSPACE_ATTR_PID] || - !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) - return -EINVAL; - - return 0; -} - -static int copy_action(const struct nlattr *from, - struct sw_flow_actions **sfa) -{ - int totlen = NLA_ALIGN(from->nla_len); - struct nlattr *to; - - to = reserve_sfa_size(sfa, from->nla_len); - if (IS_ERR(to)) - return PTR_ERR(to); - - memcpy(to, from, totlen); - return 0; -} - -static int validate_and_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, - int depth, - struct sw_flow_actions **sfa) -{ - const struct nlattr *a; - int rem, err; - - if (depth >= SAMPLE_ACTION_DEPTH) - return -EOVERFLOW; - - nla_for_each_nested(a, attr, rem) { - /* Expected argument lengths, (u32)-1 for variable length. */ - static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { - [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), - [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, - [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), - [OVS_ACTION_ATTR_POP_VLAN] = 0, - [OVS_ACTION_ATTR_SET] = (u32)-1, - [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 - }; - const struct ovs_action_push_vlan *vlan; - int type = nla_type(a); - bool skip_copy; - - if (type > OVS_ACTION_ATTR_MAX || - (action_lens[type] != nla_len(a) && - action_lens[type] != (u32)-1)) - return -EINVAL; - - skip_copy = false; - switch (type) { - case OVS_ACTION_ATTR_UNSPEC: - return -EINVAL; - - case OVS_ACTION_ATTR_USERSPACE: - err = validate_userspace(a); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_OUTPUT: - if (nla_get_u32(a) >= DP_MAX_PORTS) - return -EINVAL; - break; - - - case OVS_ACTION_ATTR_POP_VLAN: - break; - - case OVS_ACTION_ATTR_PUSH_VLAN: - vlan = nla_data(a); - if (vlan->vlan_tpid != htons(ETH_P_8021Q)) - return -EINVAL; - if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) - return -EINVAL; - break; - - case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa); - if (err) - return err; - skip_copy = true; - break; - - default: - return -EINVAL; - } - if (!skip_copy) { - err = copy_action(a, sfa); - if (err) - return err; - } - } - - if (rem > 0) - return -EINVAL; - - return 0; -} - static void clear_stats(struct sw_flow *flow) { flow->used = 0; @@ -902,15 +508,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); + err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) goto err_flow_free; - err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); + err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], + &flow->key, 0, &acts); rcu_assign_pointer(flow->sf_acts, acts); if (err) goto err_flow_free; @@ -958,15 +565,18 @@ static struct genl_ops dp_packet_genl_ops[] = { } }; -static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) +static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, + struct ovs_dp_megaflow_stats *mega_stats) { - struct flow_table *table; int i; - table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); - stats->n_flows = ovs_flow_tbl_count(table); + memset(mega_stats, 0, sizeof(*mega_stats)); + + stats->n_flows = ovs_flow_tbl_count(&dp->table); + mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); stats->n_hit = stats->n_missed = stats->n_lost = 0; + for_each_possible_cpu(i) { const struct dp_stats_percpu *percpu_stats; struct dp_stats_percpu local_stats; @@ -982,6 +592,7 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; + mega_stats->n_mask_hit += local_stats.n_mask_hit; } } @@ -1005,100 +616,6 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) -{ - const struct nlattr *a; - struct nlattr *start; - int err = 0, rem; - - start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); - if (!start) - return -EMSGSIZE; - - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - struct nlattr *st_sample; - - switch (type) { - case OVS_SAMPLE_ATTR_PROBABILITY: - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) - return -EMSGSIZE; - break; - case OVS_SAMPLE_ATTR_ACTIONS: - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); - if (!st_sample) - return -EMSGSIZE; - err = actions_to_attr(nla_data(a), nla_len(a), skb); - if (err) - return err; - nla_nest_end(skb, st_sample); - break; - } - } - - nla_nest_end(skb, start); - return err; -} - -static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) -{ - const struct nlattr *ovs_key = nla_data(a); - int key_type = nla_type(ovs_key); - struct nlattr *start; - int err; - - switch (key_type) { - case OVS_KEY_ATTR_IPV4_TUNNEL: - start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); - if (!start) - return -EMSGSIZE; - - err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key), - nla_data(ovs_key)); - if (err) - return err; - nla_nest_end(skb, start); - break; - default: - if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) - return -EMSGSIZE; - break; - } - - return 0; -} - -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) -{ - const struct nlattr *a; - int rem, err; - - nla_for_each_attr(a, attr, len, rem) { - int type = nla_type(a); - - switch (type) { - case OVS_ACTION_ATTR_SET: - err = set_action_to_attr(a, skb); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_SAMPLE: - err = sample_action_to_attr(a, skb); - if (err) - return err; - break; - default: - if (nla_put(skb, type, nla_len(a), nla_data(a))) - return -EMSGSIZE; - break; - } - } - - return 0; -} - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -1135,8 +652,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!nla) goto nla_put_failure; - err = ovs_flow_to_nlattrs(&flow->unmasked_key, - &flow->unmasked_key, skb); + err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); if (err) goto error; nla_nest_end(skb, nla); @@ -1145,7 +661,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!nla) goto nla_put_failure; - err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb); + err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); if (err) goto error; @@ -1155,7 +671,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, used = flow->used; stats.n_packets = flow->packet_count; stats.n_bytes = flow->byte_count; - tcp_flags = flow->tcp_flags; + tcp_flags = (u8)ntohs(flow->tcp_flags); spin_unlock_bh(&flow->lock); if (used && @@ -1188,7 +704,8 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_check(flow->sf_acts, lockdep_ovsl_is_held()); - err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); + err = ovs_nla_put_actions(sf_acts->actions, + sf_acts->actions_len, skb); if (!err) nla_nest_end(skb, start); else { @@ -1234,6 +751,14 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, return skb; } +static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + u32 __always_unused n_mask_hit; + + return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); +} + static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1243,7 +768,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct flow_table *table; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; int error; @@ -1254,21 +778,21 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_match_from_nlattrs(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); error = PTR_ERR(acts); if (IS_ERR(acts)) goto error; - ovs_flow_key_mask(&masked_key, &key, &mask); - error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree; @@ -1284,29 +808,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!dp) goto err_unlock_ovs; - table = ovsl_dereference(dp->table); - /* Check if this is a duplicate flow */ - flow = ovs_flow_lookup(table, &key); + flow = __ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - struct sw_flow_mask *mask_p; /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) goto err_unlock_ovs; - /* Expand table, if necessary, to make room. */ - if (ovs_flow_tbl_need_to_expand(table)) { - struct flow_table *new_table; - - new_table = ovs_flow_tbl_expand(table); - if (!IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_destroy(table, true); - table = ovsl_dereference(dp->table); - } - } - /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { @@ -1317,25 +826,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->key = masked_key; flow->unmasked_key = key; - - /* Make sure mask is unique in the system */ - mask_p = ovs_sw_flow_mask_find(table, &mask); - if (!mask_p) { - /* Allocate a new mask if none exsits. */ - mask_p = ovs_sw_flow_mask_alloc(); - if (!mask_p) - goto err_flow_free; - mask_p->key = mask.key; - mask_p->range = mask.range; - ovs_sw_flow_mask_insert(table, mask_p); - } - - ovs_sw_flow_mask_add_ref(mask_p); - flow->mask = mask_p; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - ovs_flow_insert(table, flow); + error = ovs_flow_tbl_insert(&dp->table, flow, &mask); + if (error) { + acts = NULL; + goto err_flow_free; + } reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1356,7 +854,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* The unmasked key has to be the same for flow updates. */ error = -EINVAL; - if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { + if (!ovs_flow_cmp_unmasked_key(flow, &match)) { OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); goto err_unlock_ovs; } @@ -1364,7 +862,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_flow_deferred_free_acts(old_acts); + ovs_nla_free_flow_actions(old_acts); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1403,7 +901,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -1413,7 +910,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -1424,9 +921,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - table = ovsl_dereference(dp->table); - flow = ovs_flow_lookup_unmasked_key(table, &match); - if (!flow) { + flow = __ovs_flow_tbl_lookup(&dp->table, &key); + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } @@ -1453,7 +949,6 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -1465,18 +960,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } if (!a[OVS_FLOW_ATTR_KEY]) { - err = flush_flows(dp); + err = ovs_flow_tbl_flush(&dp->table); goto unlock; } ovs_match_init(&match, &key, NULL); - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; - table = ovsl_dereference(dp->table); - flow = ovs_flow_lookup_unmasked_key(table, &match); - if (!flow) { + flow = __ovs_flow_tbl_lookup(&dp->table, &key); + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } @@ -1487,7 +981,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - ovs_flow_remove(table, flow); + ovs_flow_tbl_remove(&dp->table, flow); err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); @@ -1506,8 +1000,8 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct table_instance *ti; struct datapath *dp; - struct flow_table *table; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1516,14 +1010,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENODEV; } - table = rcu_dereference(dp->table); + ti = rcu_dereference(dp->table.ti); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; - flow = ovs_flow_dump_next(table, &bucket, &obj); + flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); if (!flow) break; @@ -1589,6 +1083,7 @@ static size_t ovs_dp_cmd_msg_size(void) msgsize += nla_total_size(IFNAMSIZ); msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); + msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); return msgsize; } @@ -1598,6 +1093,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; + struct ovs_dp_megaflow_stats dp_megaflow_stats; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, @@ -1613,8 +1109,14 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (err) goto nla_put_failure; - get_dp_stats(dp, &dp_stats); - if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) + get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); + if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), + &dp_stats)) + goto nla_put_failure; + + if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, + sizeof(struct ovs_dp_megaflow_stats), + &dp_megaflow_stats)) goto nla_put_failure; return genlmsg_end(skb, ovs_header); @@ -1687,9 +1189,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ - err = -ENOMEM; - rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); - if (!dp->table) + err = ovs_flow_tbl_init(&dp->table); + if (err) goto err_free_dp; dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); @@ -1699,7 +1200,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) } dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL); if (!dp->ports) { err = -ENOMEM; goto err_destroy_percpu; @@ -1746,7 +1247,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); + ovs_flow_tbl_destroy(&dp->table); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); @@ -2336,32 +1837,6 @@ error: return err; } -static void rehash_flow_table(struct work_struct *work) -{ - struct datapath *dp; - struct net *net; - - ovs_lock(); - rtnl_lock(); - for_each_net(net) { - struct ovs_net *ovs_net = net_generic(net, ovs_net_id); - - list_for_each_entry(dp, &ovs_net->dps, list_node) { - struct flow_table *old_table = ovsl_dereference(dp->table); - struct flow_table *new_table; - - new_table = ovs_flow_tbl_rehash(old_table); - if (!IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_destroy(old_table, true); - } - } - } - rtnl_unlock(); - ovs_unlock(); - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); -} - static int __net_init ovs_init_net(struct net *net) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); @@ -2419,8 +1894,6 @@ static int __init dp_init(void) if (err < 0) goto error_unreg_notifier; - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); - return 0; error_unreg_notifier: @@ -2437,7 +1910,6 @@ error: static void dp_cleanup(void) { - cancel_delayed_work_sync(&rehash_flow_wq); dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); unregister_netdevice_notifier(&ovs_dp_device_notifier); unregister_pernet_device(&ovs_net_ops); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4d109c176ef3..d3d14a58aa91 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -27,6 +27,7 @@ #include <linux/u64_stats_sync.h> #include "flow.h" +#include "flow_table.h" #include "vport.h" #define DP_MAX_PORTS USHRT_MAX @@ -45,11 +46,15 @@ * @n_lost: Number of received packets that had no matching flow in the flow * table that could not be sent to userspace (normally due to an overflow in * one of the datapath's queues). + * @n_mask_hit: Number of masks looked up for flow match. + * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked + * up per packet. */ struct dp_stats_percpu { u64 n_hit; u64 n_missed; u64 n_lost; + u64 n_mask_hit; struct u64_stats_sync sync; }; @@ -57,7 +62,7 @@ struct dp_stats_percpu { * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. - * @table: Current flow table. Protected by ovs_mutex and RCU. + * @table: flow table. * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. @@ -71,7 +76,7 @@ struct datapath { struct list_head list_node; /* Flow table. */ - struct flow_table __rcu *table; + struct flow_table table; /* Switch ports. */ struct hlist_head *ports; diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index c3235675f359..5c2dab276109 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -65,8 +65,7 @@ void ovs_dp_notify_wq(struct work_struct *work) continue; netdev_vport = netdev_vport_priv(vport); - if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED || - netdev_vport->dev->reg_state == NETREG_UNREGISTERING) + if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)) dp_detach_port_notify(vport); } } @@ -88,6 +87,10 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { + /* upper_dev_unlink and decrement promisc immediately */ + ovs_netdev_detach_dev(vport); + + /* schedule vport destroy, dev_put and genl notification */ ovs_net = net_generic(dev_net(dev), ovs_net_id); queue_work(system_wq, &ovs_net->dp_notify_work); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 410db90db73d..b409f5279601 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -45,202 +45,38 @@ #include <net/ipv6.h> #include <net/ndisc.h> -static struct kmem_cache *flow_cache; - -static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val); - -static void update_range__(struct sw_flow_match *match, - size_t offset, size_t size, bool is_mask) +u64 ovs_flow_used_time(unsigned long flow_jiffies) { - struct sw_flow_key_range *range = NULL; - size_t start = rounddown(offset, sizeof(long)); - size_t end = roundup(offset + size, sizeof(long)); - - if (!is_mask) - range = &match->range; - else if (match->mask) - range = &match->mask->range; - - if (!range) - return; - - if (range->start == range->end) { - range->start = start; - range->end = end; - return; - } - - if (range->start > start) - range->start = start; + struct timespec cur_ts; + u64 cur_ms, idle_ms; - if (range->end < end) - range->end = end; -} + ktime_get_ts(&cur_ts); + idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); + cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + + cur_ts.tv_nsec / NSEC_PER_MSEC; -#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - sizeof((match)->key->field), is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - (match)->mask->key.field = value; \ - } else { \ - (match)->key->field = value; \ - } \ - } while (0) - -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - len, is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - memcpy(&(match)->mask->key.field, value_p, len);\ - } else { \ - memcpy(&(match)->key->field, value_p, len); \ - } \ - } while (0) - -static u16 range_n_bytes(const struct sw_flow_key_range *range) -{ - return range->end - range->start; + return cur_ms - idle_ms; } -void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, - struct sw_flow_mask *mask) -{ - memset(match, 0, sizeof(*match)); - match->key = key; - match->mask = mask; +#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) - memset(key, 0, sizeof(*key)); - - if (mask) { - memset(&mask->key, 0, sizeof(mask->key)); - mask->range.start = mask->range.end = 0; - } -} - -static bool ovs_match_validate(const struct sw_flow_match *match, - u64 key_attrs, u64 mask_attrs) +void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) { - u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; - u64 mask_allowed = key_attrs; /* At most allow all key attributes */ - - /* The following mask attributes allowed only if they - * pass the validation tests. */ - mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) - | (1 << OVS_KEY_ATTR_IPV6) - | (1 << OVS_KEY_ATTR_TCP) - | (1 << OVS_KEY_ATTR_UDP) - | (1 << OVS_KEY_ATTR_SCTP) - | (1 << OVS_KEY_ATTR_ICMP) - | (1 << OVS_KEY_ATTR_ICMPV6) - | (1 << OVS_KEY_ATTR_ARP) - | (1 << OVS_KEY_ATTR_ND)); - - /* Always allowed mask fields. */ - mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) - | (1 << OVS_KEY_ATTR_IN_PORT) - | (1 << OVS_KEY_ATTR_ETHERTYPE)); - - /* Check key attributes. */ - if (match->key->eth.type == htons(ETH_P_ARP) - || match->key->eth.type == htons(ETH_P_RARP)) { - key_expected |= 1 << OVS_KEY_ATTR_ARP; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_ARP; - } + __be16 tcp_flags = 0; - if (match->key->eth.type == htons(ETH_P_IP)) { - key_expected |= 1 << OVS_KEY_ATTR_IPV4; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; - - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { - if (match->key->ip.proto == IPPROTO_UDP) { - key_expected |= 1 << OVS_KEY_ATTR_UDP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_UDP; - } - - if (match->key->ip.proto == IPPROTO_SCTP) { - key_expected |= 1 << OVS_KEY_ATTR_SCTP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; - } - - if (match->key->ip.proto == IPPROTO_TCP) { - key_expected |= 1 << OVS_KEY_ATTR_TCP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_TCP; - } - - if (match->key->ip.proto == IPPROTO_ICMP) { - key_expected |= 1 << OVS_KEY_ATTR_ICMP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; - } - } - } - - if (match->key->eth.type == htons(ETH_P_IPV6)) { - key_expected |= 1 << OVS_KEY_ATTR_IPV6; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; - - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { - if (match->key->ip.proto == IPPROTO_UDP) { - key_expected |= 1 << OVS_KEY_ATTR_UDP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_UDP; - } - - if (match->key->ip.proto == IPPROTO_SCTP) { - key_expected |= 1 << OVS_KEY_ATTR_SCTP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; - } - - if (match->key->ip.proto == IPPROTO_TCP) { - key_expected |= 1 << OVS_KEY_ATTR_TCP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_TCP; - } - - if (match->key->ip.proto == IPPROTO_ICMPV6) { - key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; - - if (match->key->ipv6.tp.src == - htons(NDISC_NEIGHBOUR_SOLICITATION) || - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { - key_expected |= 1 << OVS_KEY_ATTR_ND; - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) - mask_allowed |= 1 << OVS_KEY_ATTR_ND; - } - } - } - } - - if ((key_attrs & key_expected) != key_expected) { - /* Key attributes check failed. */ - OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - key_attrs, key_expected); - return false; - } - - if ((mask_attrs & mask_allowed) != mask_attrs) { - /* Mask attributes check failed. */ - OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - mask_attrs, mask_allowed); - return false; + if ((flow->key.eth.type == htons(ETH_P_IP) || + flow->key.eth.type == htons(ETH_P_IPV6)) && + flow->key.ip.proto == IPPROTO_TCP && + likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { + tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); } - return true; + spin_lock(&flow->lock); + flow->used = jiffies; + flow->packet_count++; + flow->byte_count += skb->len; + flow->tcp_flags |= tcp_flags; + spin_unlock(&flow->lock); } static int check_header(struct sk_buff *skb, int len) @@ -311,19 +147,6 @@ static bool icmphdr_ok(struct sk_buff *skb) sizeof(struct icmphdr)); } -u64 ovs_flow_used_time(unsigned long flow_jiffies) -{ - struct timespec cur_ts; - u64 cur_ms, idle_ms; - - ktime_get_ts(&cur_ts); - idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); - cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + - cur_ts.tv_nsec / NSEC_PER_MSEC; - - return cur_ms - idle_ms; -} - static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) { unsigned int nh_ofs = skb_network_offset(skb); @@ -372,311 +195,6 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask) -{ - const long *m = (long *)((u8 *)&mask->key + mask->range.start); - const long *s = (long *)((u8 *)src + mask->range.start); - long *d = (long *)((u8 *)dst + mask->range.start); - int i; - - /* The memory outside of the 'mask->range' are not set since - * further operations on 'dst' only uses contents within - * 'mask->range'. - */ - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) - *d++ = *s++ & *m++; -} - -#define TCP_FLAGS_OFFSET 13 -#define TCP_FLAG_MASK 0x3f - -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) -{ - u8 tcp_flags = 0; - - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && - flow->key.ip.proto == IPPROTO_TCP && - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { - u8 *tcp = (u8 *)tcp_hdr(skb); - tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; - } - - spin_lock(&flow->lock); - flow->used = jiffies; - flow->packet_count++; - flow->byte_count += skb->len; - flow->tcp_flags |= tcp_flags; - spin_unlock(&flow->lock); -} - -struct sw_flow_actions *ovs_flow_actions_alloc(int size) -{ - struct sw_flow_actions *sfa; - - if (size > MAX_ACTIONS_BUFSIZE) - return ERR_PTR(-EINVAL); - - sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); - if (!sfa) - return ERR_PTR(-ENOMEM); - - sfa->actions_len = 0; - return sfa; -} - -struct sw_flow *ovs_flow_alloc(void) -{ - struct sw_flow *flow; - - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); - if (!flow) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&flow->lock); - flow->sf_acts = NULL; - flow->mask = NULL; - - return flow; -} - -static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) -{ - hash = jhash_1word(hash, table->hash_seed); - return flex_array_get(table->buckets, - (hash & (table->n_buckets - 1))); -} - -static struct flex_array *alloc_buckets(unsigned int n_buckets) -{ - struct flex_array *buckets; - int i, err; - - buckets = flex_array_alloc(sizeof(struct hlist_head), - n_buckets, GFP_KERNEL); - if (!buckets) - return NULL; - - err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); - if (err) { - flex_array_free(buckets); - return NULL; - } - - for (i = 0; i < n_buckets; i++) - INIT_HLIST_HEAD((struct hlist_head *) - flex_array_get(buckets, i)); - - return buckets; -} - -static void free_buckets(struct flex_array *buckets) -{ - flex_array_free(buckets); -} - -static struct flow_table *__flow_tbl_alloc(int new_size) -{ - struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); - - if (!table) - return NULL; - - table->buckets = alloc_buckets(new_size); - - if (!table->buckets) { - kfree(table); - return NULL; - } - table->n_buckets = new_size; - table->count = 0; - table->node_ver = 0; - table->keep_flows = false; - get_random_bytes(&table->hash_seed, sizeof(u32)); - table->mask_list = NULL; - - return table; -} - -static void __flow_tbl_destroy(struct flow_table *table) -{ - int i; - - if (table->keep_flows) - goto skip_flows; - - for (i = 0; i < table->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head = flex_array_get(table->buckets, i); - struct hlist_node *n; - int ver = table->node_ver; - - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del(&flow->hash_node[ver]); - ovs_flow_free(flow, false); - } - } - - BUG_ON(!list_empty(table->mask_list)); - kfree(table->mask_list); - -skip_flows: - free_buckets(table->buckets); - kfree(table); -} - -struct flow_table *ovs_flow_tbl_alloc(int new_size) -{ - struct flow_table *table = __flow_tbl_alloc(new_size); - - if (!table) - return NULL; - - table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); - if (!table->mask_list) { - table->keep_flows = true; - __flow_tbl_destroy(table); - return NULL; - } - INIT_LIST_HEAD(table->mask_list); - - return table; -} - -static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) -{ - struct flow_table *table = container_of(rcu, struct flow_table, rcu); - - __flow_tbl_destroy(table); -} - -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) -{ - if (!table) - return; - - if (deferred) - call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); - else - __flow_tbl_destroy(table); -} - -struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last) -{ - struct sw_flow *flow; - struct hlist_head *head; - int ver; - int i; - - ver = table->node_ver; - while (*bucket < table->n_buckets) { - i = 0; - head = flex_array_get(table->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { - if (i < *last) { - i++; - continue; - } - *last = i + 1; - return flow; - } - (*bucket)++; - *last = 0; - } - - return NULL; -} - -static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) -{ - struct hlist_head *head; - - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - - table->count++; -} - -static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) -{ - int old_ver; - int i; - - old_ver = old->node_ver; - new->node_ver = !old_ver; - - /* Insert in new table. */ - for (i = 0; i < old->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head; - - head = flex_array_get(old->buckets, i); - - hlist_for_each_entry(flow, head, hash_node[old_ver]) - __tbl_insert(new, flow); - } - - new->mask_list = old->mask_list; - old->keep_flows = true; -} - -static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) -{ - struct flow_table *new_table; - - new_table = __flow_tbl_alloc(n_buckets); - if (!new_table) - return ERR_PTR(-ENOMEM); - - flow_table_copy_flows(table, new_table); - - return new_table; -} - -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets); -} - -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets * 2); -} - -static void __flow_free(struct sw_flow *flow) -{ - kfree((struct sf_flow_acts __force *)flow->sf_acts); - kmem_cache_free(flow_cache, flow); -} - -static void rcu_free_flow_callback(struct rcu_head *rcu) -{ - struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); - - __flow_free(flow); -} - -void ovs_flow_free(struct sw_flow *flow, bool deferred) -{ - if (!flow) - return; - - ovs_sw_flow_mask_del_ref(flow->mask, deferred); - - if (deferred) - call_rcu(&flow->rcu, rcu_free_flow_callback); - else - __flow_free(flow); -} - -/* Schedules 'sf_acts' to be freed after the next RCU grace period. - * The caller must hold rcu_read_lock for this to be sensible. */ -void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) -{ - kfree_rcu(sf_acts, rcu); -} - static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) { struct qtag_prefix { @@ -910,6 +428,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) struct tcphdr *tcp = tcp_hdr(skb); key->ipv4.tp.src = tcp->source; key->ipv4.tp.dst = tcp->dest; + key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { @@ -978,6 +497,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) struct tcphdr *tcp = tcp_hdr(skb); key->ipv6.tp.src = tcp->source; key->ipv6.tp.dst = tcp->dest; + key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { @@ -1002,1080 +522,3 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) return 0; } - -static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, - int key_end) -{ - u32 *hash_key = (u32 *)((u8 *)key + key_start); - int hash_u32s = (key_end - key_start) >> 2; - - /* Make sure number of hash bytes are multiple of u32. */ - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); - - return jhash2(hash_key, hash_u32s, 0); -} - -static int flow_key_start(const struct sw_flow_key *key) -{ - if (key->tun_key.ipv4_dst) - return 0; - else - return rounddown(offsetof(struct sw_flow_key, phy), - sizeof(long)); -} - -static bool __cmp_key(const struct sw_flow_key *key1, - const struct sw_flow_key *key2, int key_start, int key_end) -{ - const long *cp1 = (long *)((u8 *)key1 + key_start); - const long *cp2 = (long *)((u8 *)key2 + key_start); - long diffs = 0; - int i; - - for (i = key_start; i < key_end; i += sizeof(long)) - diffs |= *cp1++ ^ *cp2++; - - return diffs == 0; -} - -static bool __flow_cmp_masked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_end) -{ - return __cmp_key(&flow->key, key, key_start, key_end); -} - -static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_end) -{ - return __cmp_key(&flow->unmasked_key, key, key_start, key_end); -} - -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_end) -{ - int key_start; - key_start = flow_key_start(key); - - return __flow_cmp_unmasked_key(flow, key, key_start, key_end); - -} - -struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, - struct sw_flow_match *match) -{ - struct sw_flow_key *unmasked = match->key; - int key_end = match->range.end; - struct sw_flow *flow; - - flow = ovs_flow_lookup(table, unmasked); - if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) - flow = NULL; - - return flow; -} - -static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, - const struct sw_flow_key *unmasked, - struct sw_flow_mask *mask) -{ - struct sw_flow *flow; - struct hlist_head *head; - int key_start = mask->range.start; - int key_end = mask->range.end; - u32 hash; - struct sw_flow_key masked_key; - - ovs_flow_key_mask(&masked_key, unmasked, mask); - hash = ovs_flow_hash(&masked_key, key_start, key_end); - head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { - if (flow->mask == mask && - __flow_cmp_masked_key(flow, &masked_key, - key_start, key_end)) - return flow; - } - return NULL; -} - -struct sw_flow *ovs_flow_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) -{ - struct sw_flow *flow = NULL; - struct sw_flow_mask *mask; - - list_for_each_entry_rcu(mask, tbl->mask_list, list) { - flow = ovs_masked_flow_lookup(tbl, key, mask); - if (flow) /* Found */ - break; - } - - return flow; -} - - -void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow) -{ - flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); - __tbl_insert(table, flow); -} - -void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow) -{ - BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[table->node_ver]); - table->count--; -} - -/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_TUNNEL] = -1, -}; - -static bool is_all_zero(const u8 *fp, size_t size) -{ - int i; - - if (!fp) - return false; - - for (i = 0; i < size; i++) - if (fp[i]) - return false; - - return true; -} - -static int __parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], - u64 *attrsp, bool nz) -{ - const struct nlattr *nla; - u32 attrs; - int rem; - - attrs = *attrsp; - nla_for_each_nested(nla, attr, rem) { - u16 type = nla_type(nla); - int expected_len; - - if (type > OVS_KEY_ATTR_MAX) { - OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", - type, OVS_KEY_ATTR_MAX); - return -EINVAL; - } - - if (attrs & (1 << type)) { - OVS_NLERR("Duplicate key attribute (type %d).\n", type); - return -EINVAL; - } - - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { - OVS_NLERR("Key attribute has unexpected length (type=%d" - ", length=%d, expected=%d).\n", type, - nla_len(nla), expected_len); - return -EINVAL; - } - - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { - attrs |= 1 << type; - a[type] = nla; - } - } - if (rem) { - OVS_NLERR("Message has %d unknown bytes.\n", rem); - return -EINVAL; - } - - *attrsp = attrs; - return 0; -} - -static int parse_flow_mask_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) -{ - return __parse_flow_nlattrs(attr, a, attrsp, true); -} - -static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) -{ - return __parse_flow_nlattrs(attr, a, attrsp, false); -} - -int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask) -{ - struct nlattr *a; - int rem; - bool ttl = false; - __be16 tun_flags = 0; - - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - }; - - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { - OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", - type, OVS_TUNNEL_KEY_ATTR_MAX); - return -EINVAL; - } - - if (ovs_tunnel_key_lens[type] != nla_len(a)) { - OVS_NLERR("IPv4 tunnel attribute type has unexpected " - " length (type=%d, length=%d, expected=%d).\n", - type, nla_len(a), ovs_tunnel_key_lens[type]); - return -EINVAL; - } - - switch (type) { - case OVS_TUNNEL_KEY_ATTR_ID: - SW_FLOW_KEY_PUT(match, tun_key.tun_id, - nla_get_be64(a), is_mask); - tun_flags |= TUNNEL_KEY; - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, - nla_get_be32(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, - nla_get_be32(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_TOS: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, - nla_get_u8(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_TTL: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, - nla_get_u8(a), is_mask); - ttl = true; - break; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_flags |= TUNNEL_DONT_FRAGMENT; - break; - case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_flags |= TUNNEL_CSUM; - break; - default: - return -EINVAL; - } - } - - SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); - - if (rem > 0) { - OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); - return -EINVAL; - } - - if (!is_mask) { - if (!match->key->tun_key.ipv4_dst) { - OVS_NLERR("IPv4 tunnel destination address is zero.\n"); - return -EINVAL; - } - - if (!ttl) { - OVS_NLERR("IPv4 tunnel TTL not specified.\n"); - return -EINVAL; - } - } - - return 0; -} - -int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output) -{ - struct nlattr *nla; - - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); - if (!nla) - return -EMSGSIZE; - - if (output->tun_flags & TUNNEL_KEY && - nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) - return -EMSGSIZE; - if (output->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) - return -EMSGSIZE; - if (output->ipv4_dst && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) - return -EMSGSIZE; - if (output->ipv4_tos && - nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) - return -EMSGSIZE; - if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) - return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) - return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_CSUM) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) - return -EMSGSIZE; - - nla_nest_end(skb, nla); - return 0; -} - -static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask) -{ - if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { - SW_FLOW_KEY_PUT(match, phy.priority, - nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); - *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); - } - - if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { - u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); - - if (is_mask) - in_port = 0xffffffff; /* Always exact match in_port. */ - else if (in_port >= DP_MAX_PORTS) - return -EINVAL; - - SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); - *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); - } - - if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { - uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); - - SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); - *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); - } - if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { - if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask)) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); - } - return 0; -} - -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) -{ - int err; - u64 orig_attrs = attrs; - - err = metadata_from_nlattrs(match, &attrs, a, is_mask); - if (err) - return err; - - if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { - const struct ovs_key_ethernet *eth_key; - - eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); - SW_FLOW_KEY_MEMCPY(match, eth.src, - eth_key->eth_src, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, eth.dst, - eth_key->eth_dst, ETH_ALEN, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); - } - - if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - __be16 tci; - - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (!(tci & htons(VLAN_TAG_PRESENT))) { - if (is_mask) - OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); - else - OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); - - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_VLAN); - } else if (!is_mask) - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); - - if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { - __be16 eth_type; - - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (is_mask) { - /* Always exact match EtherType. */ - eth_type = htons(0xffff); - } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { - OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", - ntohs(eth_type), ETH_P_802_3_MIN); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); - } - - if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { - const struct ovs_key_ipv4 *ipv4_key; - - ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); - if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", - ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); - return -EINVAL; - } - SW_FLOW_KEY_PUT(match, ip.proto, - ipv4_key->ipv4_proto, is_mask); - SW_FLOW_KEY_PUT(match, ip.tos, - ipv4_key->ipv4_tos, is_mask); - SW_FLOW_KEY_PUT(match, ip.ttl, - ipv4_key->ipv4_ttl, is_mask); - SW_FLOW_KEY_PUT(match, ip.frag, - ipv4_key->ipv4_frag, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.src, - ipv4_key->ipv4_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, - ipv4_key->ipv4_dst, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_IPV4); - } - - if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { - const struct ovs_key_ipv6 *ipv6_key; - - ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); - if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", - ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); - return -EINVAL; - } - SW_FLOW_KEY_PUT(match, ipv6.label, - ipv6_key->ipv6_label, is_mask); - SW_FLOW_KEY_PUT(match, ip.proto, - ipv6_key->ipv6_proto, is_mask); - SW_FLOW_KEY_PUT(match, ip.tos, - ipv6_key->ipv6_tclass, is_mask); - SW_FLOW_KEY_PUT(match, ip.ttl, - ipv6_key->ipv6_hlimit, is_mask); - SW_FLOW_KEY_PUT(match, ip.frag, - ipv6_key->ipv6_frag, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, - ipv6_key->ipv6_src, - sizeof(match->key->ipv6.addr.src), - is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, - ipv6_key->ipv6_dst, - sizeof(match->key->ipv6.addr.dst), - is_mask); - - attrs &= ~(1 << OVS_KEY_ATTR_IPV6); - } - - if (attrs & (1 << OVS_KEY_ATTR_ARP)) { - const struct ovs_key_arp *arp_key; - - arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); - if (!is_mask && (arp_key->arp_op & htons(0xff00))) { - OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", - arp_key->arp_op); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, ipv4.addr.src, - arp_key->arp_sip, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, - arp_key->arp_tip, is_mask); - SW_FLOW_KEY_PUT(match, ip.proto, - ntohs(arp_key->arp_op), is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, - arp_key->arp_sha, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, - arp_key->arp_tha, ETH_ALEN, is_mask); - - attrs &= ~(1 << OVS_KEY_ATTR_ARP); - } - - if (attrs & (1 << OVS_KEY_ATTR_TCP)) { - const struct ovs_key_tcp *tcp_key; - - tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - tcp_key->tcp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - tcp_key->tcp_dst, is_mask); - } - attrs &= ~(1 << OVS_KEY_ATTR_TCP); - } - - if (attrs & (1 << OVS_KEY_ATTR_UDP)) { - const struct ovs_key_udp *udp_key; - - udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - udp_key->udp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - udp_key->udp_dst, is_mask); - } - attrs &= ~(1 << OVS_KEY_ATTR_UDP); - } - - if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { - const struct ovs_key_sctp *sctp_key; - - sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - sctp_key->sctp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - sctp_key->sctp_dst, is_mask); - } - attrs &= ~(1 << OVS_KEY_ATTR_SCTP); - } - - if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { - const struct ovs_key_icmp *icmp_key; - - icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - htons(icmp_key->icmp_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - htons(icmp_key->icmp_code), is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ICMP); - } - - if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { - const struct ovs_key_icmpv6 *icmpv6_key; - - icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - htons(icmpv6_key->icmpv6_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - htons(icmpv6_key->icmpv6_code), is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); - } - - if (attrs & (1 << OVS_KEY_ATTR_ND)) { - const struct ovs_key_nd *nd_key; - - nd_key = nla_data(a[OVS_KEY_ATTR_ND]); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, - nd_key->nd_target, - sizeof(match->key->ipv6.nd.target), - is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, - nd_key->nd_sll, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, - nd_key->nd_tll, ETH_ALEN, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ND); - } - - if (attrs != 0) - return -EINVAL; - - return 0; -} - -/** - * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and - * mask. In case the 'mask' is NULL, the flow is treated as exact match - * flow. Otherwise, it is treated as a wildcarded flow, except the mask - * does not include any don't care bit. - * @match: receives the extracted flow match information. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. The fields should of the packet that triggered the creation - * of this flow. - * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink - * attribute specifies the mask field of the wildcarded flow. - */ -int ovs_match_from_nlattrs(struct sw_flow_match *match, - const struct nlattr *key, - const struct nlattr *mask) -{ - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct nlattr *encap; - u64 key_attrs = 0; - u64 mask_attrs = 0; - bool encap_valid = false; - int err; - - err = parse_flow_nlattrs(key, a, &key_attrs); - if (err) - return err; - - if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && - (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && - (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { - __be16 tci; - - if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && - (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR("Invalid Vlan frame.\n"); - return -EINVAL; - } - - key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - encap = a[OVS_KEY_ATTR_ENCAP]; - key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - encap_valid = true; - - if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs); - if (err) - return err; - } else if (!tci) { - /* Corner case for truncated 802.1Q header. */ - if (nla_len(encap)) { - OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); - return -EINVAL; - } - } else { - OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); - return -EINVAL; - } - } - - err = ovs_key_from_nlattrs(match, key_attrs, a, false); - if (err) - return err; - - if (mask) { - err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); - if (err) - return err; - - if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { - __be16 eth_type = 0; - __be16 tci = 0; - - if (!encap_valid) { - OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); - return -EINVAL; - } - - mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - if (a[OVS_KEY_ATTR_ETHERTYPE]) - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - - if (eth_type == htons(0xffff)) { - mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); - } else { - OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", - ntohs(eth_type)); - return -EINVAL; - } - - if (a[OVS_KEY_ATTR_VLAN]) - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - - if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); - return -EINVAL; - } - } - - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); - if (err) - return err; - } else { - /* Populate exact match flow's key mask. */ - if (match->mask) - ovs_sw_flow_mask_set(match->mask, &match->range, 0xff); - } - - if (!ovs_match_validate(match, key_attrs, mask_attrs)) - return -EINVAL; - - return 0; -} - -/** - * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @flow: Receives extracted in_port, priority, tun_key and skb_mark. - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. - * - * This parses a series of Netlink attributes that form a flow key, which must - * take the same form accepted by flow_from_nlattrs(), but only enough of it to - * get the metadata, that is, the parts of the flow key that cannot be - * extracted from the packet itself. - */ - -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, - const struct nlattr *attr) -{ - struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - u64 attrs = 0; - int err; - struct sw_flow_match match; - - flow->key.phy.in_port = DP_MAX_PORTS; - flow->key.phy.priority = 0; - flow->key.phy.skb_mark = 0; - memset(tun_key, 0, sizeof(flow->key.tun_key)); - - err = parse_flow_nlattrs(attr, a, &attrs); - if (err) - return -EINVAL; - - memset(&match, 0, sizeof(match)); - match.key = &flow->key; - - err = metadata_from_nlattrs(&match, &attrs, a, false); - if (err) - return err; - - return 0; -} - -int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, - const struct sw_flow_key *output, struct sk_buff *skb) -{ - struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; - bool is_mask = (swkey != output); - - if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) - goto nla_put_failure; - - if ((swkey->tun_key.ipv4_dst || is_mask) && - ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) - goto nla_put_failure; - - if (swkey->phy.in_port == DP_MAX_PORTS) { - if (is_mask && (output->phy.in_port == 0xffff)) - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) - goto nla_put_failure; - } else { - u16 upper_u16; - upper_u16 = !is_mask ? 0 : 0xffff; - - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, - (upper_u16 << 16) | output->phy.in_port)) - goto nla_put_failure; - } - - if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) - goto nla_put_failure; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); - if (!nla) - goto nla_put_failure; - - eth_key = nla_data(nla); - memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); - memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); - - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { - __be16 eth_type; - eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) - goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.tci) - goto unencap; - } else - encap = NULL; - - if (swkey->eth.type == htons(ETH_P_802_2)) { - /* - * Ethertype 802.2 is represented in the netlink with omitted - * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and - * 0xffff in the mask attribute. Ethertype can also - * be wildcarded. - */ - if (is_mask && output->eth.type) - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, - output->eth.type)) - goto nla_put_failure; - goto unencap; - } - - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) - goto nla_put_failure; - - if (swkey->eth.type == htons(ETH_P_IP)) { - struct ovs_key_ipv4 *ipv4_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); - if (!nla) - goto nla_put_failure; - ipv4_key = nla_data(nla); - ipv4_key->ipv4_src = output->ipv4.addr.src; - ipv4_key->ipv4_dst = output->ipv4.addr.dst; - ipv4_key->ipv4_proto = output->ip.proto; - ipv4_key->ipv4_tos = output->ip.tos; - ipv4_key->ipv4_ttl = output->ip.ttl; - ipv4_key->ipv4_frag = output->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - struct ovs_key_ipv6 *ipv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); - if (!nla) - goto nla_put_failure; - ipv6_key = nla_data(nla); - memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, - sizeof(ipv6_key->ipv6_src)); - memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, - sizeof(ipv6_key->ipv6_dst)); - ipv6_key->ipv6_label = output->ipv6.label; - ipv6_key->ipv6_proto = output->ip.proto; - ipv6_key->ipv6_tclass = output->ip.tos; - ipv6_key->ipv6_hlimit = output->ip.ttl; - ipv6_key->ipv6_frag = output->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_ARP) || - swkey->eth.type == htons(ETH_P_RARP)) { - struct ovs_key_arp *arp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); - if (!nla) - goto nla_put_failure; - arp_key = nla_data(nla); - memset(arp_key, 0, sizeof(struct ovs_key_arp)); - arp_key->arp_sip = output->ipv4.addr.src; - arp_key->arp_tip = output->ipv4.addr.dst; - arp_key->arp_op = htons(output->ip.proto); - memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); - memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); - } - - if ((swkey->eth.type == htons(ETH_P_IP) || - swkey->eth.type == htons(ETH_P_IPV6)) && - swkey->ip.frag != OVS_FRAG_TYPE_LATER) { - - if (swkey->ip.proto == IPPROTO_TCP) { - struct ovs_key_tcp *tcp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); - if (!nla) - goto nla_put_failure; - tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = output->ipv4.tp.src; - tcp_key->tcp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = output->ipv6.tp.src; - tcp_key->tcp_dst = output->ipv6.tp.dst; - } - } else if (swkey->ip.proto == IPPROTO_UDP) { - struct ovs_key_udp *udp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); - if (!nla) - goto nla_put_failure; - udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = output->ipv4.tp.src; - udp_key->udp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = output->ipv6.tp.src; - udp_key->udp_dst = output->ipv6.tp.dst; - } - } else if (swkey->ip.proto == IPPROTO_SCTP) { - struct ovs_key_sctp *sctp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); - if (!nla) - goto nla_put_failure; - sctp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = swkey->ipv4.tp.src; - sctp_key->sctp_dst = swkey->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = swkey->ipv6.tp.src; - sctp_key->sctp_dst = swkey->ipv6.tp.dst; - } - } else if (swkey->eth.type == htons(ETH_P_IP) && - swkey->ip.proto == IPPROTO_ICMP) { - struct ovs_key_icmp *icmp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); - if (!nla) - goto nla_put_failure; - icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); - } else if (swkey->eth.type == htons(ETH_P_IPV6) && - swkey->ip.proto == IPPROTO_ICMPV6) { - struct ovs_key_icmpv6 *icmpv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, - sizeof(*icmpv6_key)); - if (!nla) - goto nla_put_failure; - icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); - - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { - struct ovs_key_nd *nd_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); - if (!nla) - goto nla_put_failure; - nd_key = nla_data(nla); - memcpy(nd_key->nd_target, &output->ipv6.nd.target, - sizeof(nd_key->nd_target)); - memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); - memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); - } - } - } - -unencap: - if (encap) - nla_nest_end(skb, encap); - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -/* Initializes the flow module. - * Returns zero if successful or a negative error code. */ -int ovs_flow_init(void) -{ - BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); - BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); - - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); - if (flow_cache == NULL) - return -ENOMEM; - - return 0; -} - -/* Uninitializes the flow module. */ -void ovs_flow_exit(void) -{ - kmem_cache_destroy(flow_cache); -} - -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) -{ - struct sw_flow_mask *mask; - - mask = kmalloc(sizeof(*mask), GFP_KERNEL); - if (mask) - mask->ref_count = 0; - - return mask; -} - -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) -{ - mask->ref_count++; -} - -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) -{ - if (!mask) - return; - - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - kfree_rcu(mask, rcu); - else - kfree(mask); - } -} - -static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, - const struct sw_flow_mask *b) -{ - u8 *a_ = (u8 *)&a->key + a->range.start; - u8 *b_ = (u8 *)&b->key + b->range.start; - - return (a->range.end == b->range.end) - && (a->range.start == b->range.start) - && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); -} - -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, - const struct sw_flow_mask *mask) -{ - struct list_head *ml; - - list_for_each(ml, tbl->mask_list) { - struct sw_flow_mask *m; - m = container_of(ml, struct sw_flow_mask, list); - if (ovs_sw_flow_mask_equal(mask, m)) - return m; - } - - return NULL; -} - -/** - * add a new mask into the mask list. - * The caller needs to make sure that 'mask' is not the same - * as any masks that are already on the list. - */ -void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) -{ - list_add_rcu(&mask->list, tbl->mask_list); -} - -/** - * Set 'range' fields in the mask to the value of 'val'. - */ -static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val) -{ - u8 *m = (u8 *)&mask->key + range->start; - - mask->range = *range; - memset(m, val, range_n_bytes(range)); -} diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 212fbf7510c4..1510f51dbf74 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -33,14 +33,6 @@ #include <net/inet_ecn.h> struct sk_buff; -struct sw_flow_mask; -struct flow_table; - -struct sw_flow_actions { - struct rcu_head rcu; - u32 actions_len; - struct nlattr actions[]; -}; /* Used to memset ovs_key_ipv4_tunnel padding. */ #define OVS_TUNNEL_KEY_SIZE \ @@ -101,6 +93,7 @@ struct sw_flow_key { struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ } tp; struct { u8 sha[ETH_ALEN]; /* ARP source hardware address. */ @@ -117,6 +110,7 @@ struct sw_flow_key { struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ } tp; struct { struct in6_addr target; /* ND target address. */ @@ -127,6 +121,31 @@ struct sw_flow_key { }; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ +struct sw_flow_key_range { + size_t start; + size_t end; +}; + +struct sw_flow_mask { + int ref_count; + struct rcu_head rcu; + struct list_head list; + struct sw_flow_key_range range; + struct sw_flow_key key; +}; + +struct sw_flow_match { + struct sw_flow_key *key; + struct sw_flow_key_range range; + struct sw_flow_mask *mask; +}; + +struct sw_flow_actions { + struct rcu_head rcu; + u32 actions_len; + struct nlattr actions[]; +}; + struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; @@ -141,23 +160,9 @@ struct sw_flow { unsigned long used; /* Last used time (in jiffies). */ u64 packet_count; /* Number of packets matched. */ u64 byte_count; /* Number of bytes matched. */ - u8 tcp_flags; /* Union of seen TCP flags. */ -}; - -struct sw_flow_key_range { - size_t start; - size_t end; + __be16 tcp_flags; /* Union of seen TCP flags. */ }; -struct sw_flow_match { - struct sw_flow_key *key; - struct sw_flow_key_range range; - struct sw_flow_mask *mask; -}; - -void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); - struct arp_eth_header { __be16 ar_hrd; /* format of hardware address */ __be16 ar_pro; /* format of protocol address */ @@ -172,88 +177,9 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -int ovs_flow_init(void); -void ovs_flow_exit(void); - -struct sw_flow *ovs_flow_alloc(void); -void ovs_flow_deferred_free(struct sw_flow *); -void ovs_flow_free(struct sw_flow *, bool deferred); - -struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); -void ovs_flow_deferred_free_acts(struct sw_flow_actions *); - -int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); void ovs_flow_used(struct sw_flow *, struct sk_buff *); u64 ovs_flow_used_time(unsigned long flow_jiffies); -int ovs_flow_to_nlattrs(const struct sw_flow_key *, - const struct sw_flow_key *, struct sk_buff *); -int ovs_match_from_nlattrs(struct sw_flow_match *match, - const struct nlattr *, - const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, - const struct nlattr *attr); -#define MAX_ACTIONS_BUFSIZE (32 * 1024) -#define TBL_MIN_BUCKETS 1024 - -struct flow_table { - struct flex_array *buckets; - unsigned int count, n_buckets; - struct rcu_head rcu; - struct list_head *mask_list; - int node_ver; - u32 hash_seed; - bool keep_flows; -}; - -static inline int ovs_flow_tbl_count(struct flow_table *table) -{ - return table->count; -} - -static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) -{ - return (table->count > table->n_buckets); -} - -struct sw_flow *ovs_flow_lookup(struct flow_table *, - const struct sw_flow_key *); -struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, - struct sw_flow_match *match); - -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); -struct flow_table *ovs_flow_tbl_alloc(int new_size); -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); - -void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow); -void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow); - -struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx); -extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; -int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask); -int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output); - -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_end); - -struct sw_flow_mask { - int ref_count; - struct rcu_head rcu; - struct list_head list; - struct sw_flow_key_range range; - struct sw_flow_key key; -}; +int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); -void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *); -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *, - const struct sw_flow_mask *); -void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask); #endif /* flow.h */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c new file mode 100644 index 000000000000..2bc1bc1aca3b --- /dev/null +++ b/net/openvswitch/flow_netlink.c @@ -0,0 +1,1630 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include <linux/uaccess.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/llc_pdu.h> +#include <linux/kernel.h> +#include <linux/jhash.h> +#include <linux/jiffies.h> +#include <linux/llc.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/rcupdate.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/sctp.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/rculist.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/ndisc.h> + +#include "flow_netlink.h" + +static void update_range__(struct sw_flow_match *match, + size_t offset, size_t size, bool is_mask) +{ + struct sw_flow_key_range *range = NULL; + size_t start = rounddown(offset, sizeof(long)); + size_t end = roundup(offset + size, sizeof(long)); + + if (!is_mask) + range = &match->range; + else if (match->mask) + range = &match->mask->range; + + if (!range) + return; + + if (range->start == range->end) { + range->start = start; + range->end = end; + return; + } + + if (range->start > start) + range->start = start; + + if (range->end < end) + range->end = end; +} + +#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + (match)->mask->key.field = value; \ + } else { \ + (match)->key->field = value; \ + } \ + } while (0) + +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + len, is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + memcpy(&(match)->mask->key.field, value_p, len);\ + } else { \ + memcpy(&(match)->key->field, value_p, len); \ + } \ + } while (0) + +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +static bool match_validate(const struct sw_flow_match *match, + u64 key_attrs, u64 mask_attrs) +{ + u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; + u64 mask_allowed = key_attrs; /* At most allow all key attributes */ + + /* The following mask attributes allowed only if they + * pass the validation tests. */ + mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) + | (1 << OVS_KEY_ATTR_IPV6) + | (1 << OVS_KEY_ATTR_TCP) + | (1 << OVS_KEY_ATTR_TCP_FLAGS) + | (1 << OVS_KEY_ATTR_UDP) + | (1 << OVS_KEY_ATTR_SCTP) + | (1 << OVS_KEY_ATTR_ICMP) + | (1 << OVS_KEY_ATTR_ICMPV6) + | (1 << OVS_KEY_ATTR_ARP) + | (1 << OVS_KEY_ATTR_ND)); + + /* Always allowed mask fields. */ + mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) + | (1 << OVS_KEY_ATTR_IN_PORT) + | (1 << OVS_KEY_ATTR_ETHERTYPE)); + + /* Check key attributes. */ + if (match->key->eth.type == htons(ETH_P_ARP) + || match->key->eth.type == htons(ETH_P_RARP)) { + key_expected |= 1 << OVS_KEY_ATTR_ARP; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_ARP; + } + + if (match->key->eth.type == htons(ETH_P_IP)) { + key_expected |= 1 << OVS_KEY_ATTR_IPV4; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; + + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { + if (match->key->ip.proto == IPPROTO_UDP) { + key_expected |= 1 << OVS_KEY_ATTR_UDP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_UDP; + } + + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1 << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; + } + + if (match->key->ip.proto == IPPROTO_TCP) { + key_expected |= 1 << OVS_KEY_ATTR_TCP; + key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + if (match->mask && (match->mask->key.ip.proto == 0xff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_TCP; + mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + } + } + + if (match->key->ip.proto == IPPROTO_ICMP) { + key_expected |= 1 << OVS_KEY_ATTR_ICMP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; + } + } + } + + if (match->key->eth.type == htons(ETH_P_IPV6)) { + key_expected |= 1 << OVS_KEY_ATTR_IPV6; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; + + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { + if (match->key->ip.proto == IPPROTO_UDP) { + key_expected |= 1 << OVS_KEY_ATTR_UDP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_UDP; + } + + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1 << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; + } + + if (match->key->ip.proto == IPPROTO_TCP) { + key_expected |= 1 << OVS_KEY_ATTR_TCP; + key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + if (match->mask && (match->mask->key.ip.proto == 0xff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_TCP; + mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; + } + } + + if (match->key->ip.proto == IPPROTO_ICMPV6) { + key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; + + if (match->key->ipv6.tp.src == + htons(NDISC_NEIGHBOUR_SOLICITATION) || + match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + key_expected |= 1 << OVS_KEY_ATTR_ND; + if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_ND; + } + } + } + } + + if ((key_attrs & key_expected) != key_expected) { + /* Key attributes check failed. */ + OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", + key_attrs, key_expected); + return false; + } + + if ((mask_attrs & mask_allowed) != mask_attrs) { + /* Mask attributes check failed. */ + OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", + mask_attrs, mask_allowed); + return false; + } + + return true; +} + +/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ +static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = -1, + [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), + [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), + [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), + [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), + [OVS_KEY_ATTR_VLAN] = sizeof(__be16), + [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), + [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), + [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), + [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), + [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), + [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), + [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), + [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), + [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), + [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), + [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_TUNNEL] = -1, +}; + +static bool is_all_zero(const u8 *fp, size_t size) +{ + int i; + + if (!fp) + return false; + + for (i = 0; i < size; i++) + if (fp[i]) + return false; + + return true; +} + +static int __parse_flow_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], + u64 *attrsp, bool nz) +{ + const struct nlattr *nla; + u64 attrs; + int rem; + + attrs = *attrsp; + nla_for_each_nested(nla, attr, rem) { + u16 type = nla_type(nla); + int expected_len; + + if (type > OVS_KEY_ATTR_MAX) { + OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", + type, OVS_KEY_ATTR_MAX); + return -EINVAL; + } + + if (attrs & (1 << type)) { + OVS_NLERR("Duplicate key attribute (type %d).\n", type); + return -EINVAL; + } + + expected_len = ovs_key_lens[type]; + if (nla_len(nla) != expected_len && expected_len != -1) { + OVS_NLERR("Key attribute has unexpected length (type=%d" + ", length=%d, expected=%d).\n", type, + nla_len(nla), expected_len); + return -EINVAL; + } + + if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + attrs |= 1 << type; + a[type] = nla; + } + } + if (rem) { + OVS_NLERR("Message has %d unknown bytes.\n", rem); + return -EINVAL; + } + + *attrsp = attrs; + return 0; +} + +static int parse_flow_mask_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], u64 *attrsp) +{ + return __parse_flow_nlattrs(attr, a, attrsp, true); +} + +static int parse_flow_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], u64 *attrsp) +{ + return __parse_flow_nlattrs(attr, a, attrsp, false); +} + +static int ipv4_tun_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask) +{ + struct nlattr *a; + int rem; + bool ttl = false; + __be16 tun_flags = 0; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + }; + + if (type > OVS_TUNNEL_KEY_ATTR_MAX) { + OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", + type, OVS_TUNNEL_KEY_ATTR_MAX); + return -EINVAL; + } + + if (ovs_tunnel_key_lens[type] != nla_len(a)) { + OVS_NLERR("IPv4 tunnel attribute type has unexpected " + " length (type=%d, length=%d, expected=%d).\n", + type, nla_len(a), ovs_tunnel_key_lens[type]); + return -EINVAL; + } + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + SW_FLOW_KEY_PUT(match, tun_key.tun_id, + nla_get_be64(a), is_mask); + tun_flags |= TUNNEL_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, + nla_get_be32(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, + nla_get_be32(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, + nla_get_u8(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, + nla_get_u8(a), is_mask); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun_flags |= TUNNEL_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun_flags |= TUNNEL_CSUM; + break; + default: + return -EINVAL; + } + } + + SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); + + if (rem > 0) { + OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); + return -EINVAL; + } + + if (!is_mask) { + if (!match->key->tun_key.ipv4_dst) { + OVS_NLERR("IPv4 tunnel destination address is zero.\n"); + return -EINVAL; + } + + if (!ttl) { + OVS_NLERR("IPv4 tunnel TTL not specified.\n"); + return -EINVAL; + } + } + + return 0; +} + +static int ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key, + const struct ovs_key_ipv4_tunnel *output) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + if (output->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) + return -EMSGSIZE; + if (output->ipv4_src && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) + return -EMSGSIZE; + if (output->ipv4_dst && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) + return -EMSGSIZE; + if (output->ipv4_tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) + return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + + nla_nest_end(skb, nla); + return 0; +} + + +static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, + const struct nlattr **a, bool is_mask) +{ + if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { + SW_FLOW_KEY_PUT(match, phy.priority, + nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); + } + + if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { + u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); + + if (is_mask) + in_port = 0xffffffff; /* Always exact match in_port. */ + else if (in_port >= DP_MAX_PORTS) + return -EINVAL; + + SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); + } + + if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { + uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); + + SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); + } + if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { + if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, + is_mask)) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); + } + return 0; +} + +static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, + const struct nlattr **a, bool is_mask) +{ + int err; + u64 orig_attrs = attrs; + + err = metadata_from_nlattrs(match, &attrs, a, is_mask); + if (err) + return err; + + if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { + const struct ovs_key_ethernet *eth_key; + + eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); + SW_FLOW_KEY_MEMCPY(match, eth.src, + eth_key->eth_src, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, eth.dst, + eth_key->eth_dst, ETH_ALEN, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); + } + + if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { + __be16 tci; + + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + if (!(tci & htons(VLAN_TAG_PRESENT))) { + if (is_mask) + OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); + else + OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); + + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + } else if (!is_mask) + SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); + + if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { + __be16 eth_type; + + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + if (is_mask) { + /* Always exact match EtherType. */ + eth_type = htons(0xffff); + } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { + OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", + ntohs(eth_type), ETH_P_802_3_MIN); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + } + + if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { + const struct ovs_key_ipv4 *ipv4_key; + + ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); + if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { + OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", + ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ip.proto, + ipv4_key->ipv4_proto, is_mask); + SW_FLOW_KEY_PUT(match, ip.tos, + ipv4_key->ipv4_tos, is_mask); + SW_FLOW_KEY_PUT(match, ip.ttl, + ipv4_key->ipv4_ttl, is_mask); + SW_FLOW_KEY_PUT(match, ip.frag, + ipv4_key->ipv4_frag, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.src, + ipv4_key->ipv4_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, + ipv4_key->ipv4_dst, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_IPV4); + } + + if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { + const struct ovs_key_ipv6 *ipv6_key; + + ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); + if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { + OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", + ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ipv6.label, + ipv6_key->ipv6_label, is_mask); + SW_FLOW_KEY_PUT(match, ip.proto, + ipv6_key->ipv6_proto, is_mask); + SW_FLOW_KEY_PUT(match, ip.tos, + ipv6_key->ipv6_tclass, is_mask); + SW_FLOW_KEY_PUT(match, ip.ttl, + ipv6_key->ipv6_hlimit, is_mask); + SW_FLOW_KEY_PUT(match, ip.frag, + ipv6_key->ipv6_frag, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, + ipv6_key->ipv6_src, + sizeof(match->key->ipv6.addr.src), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, + ipv6_key->ipv6_dst, + sizeof(match->key->ipv6.addr.dst), + is_mask); + + attrs &= ~(1 << OVS_KEY_ATTR_IPV6); + } + + if (attrs & (1 << OVS_KEY_ATTR_ARP)) { + const struct ovs_key_arp *arp_key; + + arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); + if (!is_mask && (arp_key->arp_op & htons(0xff00))) { + OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", + arp_key->arp_op); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, ipv4.addr.src, + arp_key->arp_sip, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, + arp_key->arp_tip, is_mask); + SW_FLOW_KEY_PUT(match, ip.proto, + ntohs(arp_key->arp_op), is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, + arp_key->arp_sha, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, + arp_key->arp_tha, ETH_ALEN, is_mask); + + attrs &= ~(1 << OVS_KEY_ATTR_ARP); + } + + if (attrs & (1 << OVS_KEY_ATTR_TCP)) { + const struct ovs_key_tcp *tcp_key; + + tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + tcp_key->tcp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + tcp_key->tcp_dst, is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_TCP); + } + + if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.flags, + nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), + is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.flags, + nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), + is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); + } + + if (attrs & (1 << OVS_KEY_ATTR_UDP)) { + const struct ovs_key_udp *udp_key; + + udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + udp_key->udp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + udp_key->udp_dst, is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_UDP); + } + + if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { + const struct ovs_key_sctp *sctp_key; + + sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + sctp_key->sctp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + sctp_key->sctp_dst, is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_SCTP); + } + + if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { + const struct ovs_key_icmp *icmp_key; + + icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + htons(icmp_key->icmp_type), is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + htons(icmp_key->icmp_code), is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ICMP); + } + + if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { + const struct ovs_key_icmpv6 *icmpv6_key; + + icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + htons(icmpv6_key->icmpv6_type), is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + htons(icmpv6_key->icmpv6_code), is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); + } + + if (attrs & (1 << OVS_KEY_ATTR_ND)) { + const struct ovs_key_nd *nd_key; + + nd_key = nla_data(a[OVS_KEY_ATTR_ND]); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, + nd_key->nd_target, + sizeof(match->key->ipv6.nd.target), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, + nd_key->nd_sll, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, + nd_key->nd_tll, ETH_ALEN, is_mask); + attrs &= ~(1 << OVS_KEY_ATTR_ND); + } + + if (attrs != 0) + return -EINVAL; + + return 0; +} + +static void sw_flow_mask_set(struct sw_flow_mask *mask, + struct sw_flow_key_range *range, u8 val) +{ + u8 *m = (u8 *)&mask->key + range->start; + + mask->range = *range; + memset(m, val, range_n_bytes(range)); +} + +/** + * ovs_nla_get_match - parses Netlink attributes into a flow key and + * mask. In case the 'mask' is NULL, the flow is treated as exact match + * flow. Otherwise, it is treated as a wildcarded flow, except the mask + * does not include any don't care bit. + * @match: receives the extracted flow match information. + * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. The fields should of the packet that triggered the creation + * of this flow. + * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink + * attribute specifies the mask field of the wildcarded flow. + */ +int ovs_nla_get_match(struct sw_flow_match *match, + const struct nlattr *key, + const struct nlattr *mask) +{ + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + const struct nlattr *encap; + u64 key_attrs = 0; + u64 mask_attrs = 0; + bool encap_valid = false; + int err; + + err = parse_flow_nlattrs(key, a, &key_attrs); + if (err) + return err; + + if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && + (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && + (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { + __be16 tci; + + if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && + (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { + OVS_NLERR("Invalid Vlan frame.\n"); + return -EINVAL; + } + + key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + encap = a[OVS_KEY_ATTR_ENCAP]; + key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + encap_valid = true; + + if (tci & htons(VLAN_TAG_PRESENT)) { + err = parse_flow_nlattrs(encap, a, &key_attrs); + if (err) + return err; + } else if (!tci) { + /* Corner case for truncated 802.1Q header. */ + if (nla_len(encap)) { + OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); + return -EINVAL; + } + } else { + OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); + return -EINVAL; + } + } + + err = ovs_key_from_nlattrs(match, key_attrs, a, false); + if (err) + return err; + + if (mask) { + err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); + if (err) + return err; + + if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { + __be16 eth_type = 0; + __be16 tci = 0; + + if (!encap_valid) { + OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); + return -EINVAL; + } + + mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + if (a[OVS_KEY_ATTR_ETHERTYPE]) + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (eth_type == htons(0xffff)) { + mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + encap = a[OVS_KEY_ATTR_ENCAP]; + err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + } else { + OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", + ntohs(eth_type)); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (!(tci & htons(VLAN_TAG_PRESENT))) { + OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); + return -EINVAL; + } + } + + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + if (err) + return err; + } else { + /* Populate exact match flow's key mask. */ + if (match->mask) + sw_flow_mask_set(match->mask, &match->range, 0xff); + } + + if (!match_validate(match, key_attrs, mask_attrs)) + return -EINVAL; + + return 0; +} + +/** + * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. + * + * This parses a series of Netlink attributes that form a flow key, which must + * take the same form accepted by flow_from_nlattrs(), but only enough of it to + * get the metadata, that is, the parts of the flow key that cannot be + * extracted from the packet itself. + */ + +int ovs_nla_get_flow_metadata(struct sw_flow *flow, + const struct nlattr *attr) +{ + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + u64 attrs = 0; + int err; + struct sw_flow_match match; + + flow->key.phy.in_port = DP_MAX_PORTS; + flow->key.phy.priority = 0; + flow->key.phy.skb_mark = 0; + memset(tun_key, 0, sizeof(flow->key.tun_key)); + + err = parse_flow_nlattrs(attr, a, &attrs); + if (err) + return -EINVAL; + + memset(&match, 0, sizeof(match)); + match.key = &flow->key; + + err = metadata_from_nlattrs(&match, &attrs, a, false); + if (err) + return err; + + return 0; +} + +int ovs_nla_put_flow(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) +{ + struct ovs_key_ethernet *eth_key; + struct nlattr *nla, *encap; + bool is_mask = (swkey != output); + + if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) + goto nla_put_failure; + + if ((swkey->tun_key.ipv4_dst || is_mask) && + ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) + goto nla_put_failure; + + if (swkey->phy.in_port == DP_MAX_PORTS) { + if (is_mask && (output->phy.in_port == 0xffff)) + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) + goto nla_put_failure; + } else { + u16 upper_u16; + upper_u16 = !is_mask ? 0 : 0xffff; + + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, + (upper_u16 << 16) | output->phy.in_port)) + goto nla_put_failure; + } + + if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) + goto nla_put_failure; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); + if (!nla) + goto nla_put_failure; + + eth_key = nla_data(nla); + memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); + memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); + + if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { + __be16 eth_type; + eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || + nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) + goto nla_put_failure; + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.tci) + goto unencap; + } else + encap = NULL; + + if (swkey->eth.type == htons(ETH_P_802_2)) { + /* + * Ethertype 802.2 is represented in the netlink with omitted + * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and + * 0xffff in the mask attribute. Ethertype can also + * be wildcarded. + */ + if (is_mask && output->eth.type) + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, + output->eth.type)) + goto nla_put_failure; + goto unencap; + } + + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) + goto nla_put_failure; + + if (swkey->eth.type == htons(ETH_P_IP)) { + struct ovs_key_ipv4 *ipv4_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); + if (!nla) + goto nla_put_failure; + ipv4_key = nla_data(nla); + ipv4_key->ipv4_src = output->ipv4.addr.src; + ipv4_key->ipv4_dst = output->ipv4.addr.dst; + ipv4_key->ipv4_proto = output->ip.proto; + ipv4_key->ipv4_tos = output->ip.tos; + ipv4_key->ipv4_ttl = output->ip.ttl; + ipv4_key->ipv4_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + struct ovs_key_ipv6 *ipv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); + if (!nla) + goto nla_put_failure; + ipv6_key = nla_data(nla); + memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, + sizeof(ipv6_key->ipv6_src)); + memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, + sizeof(ipv6_key->ipv6_dst)); + ipv6_key->ipv6_label = output->ipv6.label; + ipv6_key->ipv6_proto = output->ip.proto; + ipv6_key->ipv6_tclass = output->ip.tos; + ipv6_key->ipv6_hlimit = output->ip.ttl; + ipv6_key->ipv6_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_ARP) || + swkey->eth.type == htons(ETH_P_RARP)) { + struct ovs_key_arp *arp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); + if (!nla) + goto nla_put_failure; + arp_key = nla_data(nla); + memset(arp_key, 0, sizeof(struct ovs_key_arp)); + arp_key->arp_sip = output->ipv4.addr.src; + arp_key->arp_tip = output->ipv4.addr.dst; + arp_key->arp_op = htons(output->ip.proto); + memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); + memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); + } + + if ((swkey->eth.type == htons(ETH_P_IP) || + swkey->eth.type == htons(ETH_P_IPV6)) && + swkey->ip.frag != OVS_FRAG_TYPE_LATER) { + + if (swkey->ip.proto == IPPROTO_TCP) { + struct ovs_key_tcp *tcp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); + if (!nla) + goto nla_put_failure; + tcp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + tcp_key->tcp_src = output->ipv4.tp.src; + tcp_key->tcp_dst = output->ipv4.tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->ipv4.tp.flags)) + goto nla_put_failure; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + tcp_key->tcp_src = output->ipv6.tp.src; + tcp_key->tcp_dst = output->ipv6.tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->ipv6.tp.flags)) + goto nla_put_failure; + } + } else if (swkey->ip.proto == IPPROTO_UDP) { + struct ovs_key_udp *udp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); + if (!nla) + goto nla_put_failure; + udp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + udp_key->udp_src = output->ipv4.tp.src; + udp_key->udp_dst = output->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + udp_key->udp_src = output->ipv6.tp.src; + udp_key->udp_dst = output->ipv6.tp.dst; + } + } else if (swkey->ip.proto == IPPROTO_SCTP) { + struct ovs_key_sctp *sctp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); + if (!nla) + goto nla_put_failure; + sctp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + sctp_key->sctp_src = swkey->ipv4.tp.src; + sctp_key->sctp_dst = swkey->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + sctp_key->sctp_src = swkey->ipv6.tp.src; + sctp_key->sctp_dst = swkey->ipv6.tp.dst; + } + } else if (swkey->eth.type == htons(ETH_P_IP) && + swkey->ip.proto == IPPROTO_ICMP) { + struct ovs_key_icmp *icmp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); + if (!nla) + goto nla_put_failure; + icmp_key = nla_data(nla); + icmp_key->icmp_type = ntohs(output->ipv4.tp.src); + icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); + } else if (swkey->eth.type == htons(ETH_P_IPV6) && + swkey->ip.proto == IPPROTO_ICMPV6) { + struct ovs_key_icmpv6 *icmpv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, + sizeof(*icmpv6_key)); + if (!nla) + goto nla_put_failure; + icmpv6_key = nla_data(nla); + icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); + icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); + + if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || + icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + struct ovs_key_nd *nd_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); + if (!nla) + goto nla_put_failure; + nd_key = nla_data(nla); + memcpy(nd_key->nd_target, &output->ipv6.nd.target, + sizeof(nd_key->nd_target)); + memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); + memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); + } + } + } + +unencap: + if (encap) + nla_nest_end(skb, encap); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +#define MAX_ACTIONS_BUFSIZE (32 * 1024) + +struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) +{ + struct sw_flow_actions *sfa; + + if (size > MAX_ACTIONS_BUFSIZE) + return ERR_PTR(-EINVAL); + + sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); + + sfa->actions_len = 0; + return sfa; +} + +/* RCU callback used by ovs_nla_free_flow_actions. */ +static void rcu_free_acts_callback(struct rcu_head *rcu) +{ + struct sw_flow_actions *sf_acts = container_of(rcu, + struct sw_flow_actions, rcu); + kfree(sf_acts); +} + +/* Schedules 'sf_acts' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + call_rcu(&sf_acts->rcu, rcu_free_acts_callback); +} + +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, + int attr_len) +{ + + struct sw_flow_actions *acts; + int new_acts_size; + int req_size = NLA_ALIGN(attr_len); + int next_offset = offsetof(struct sw_flow_actions, actions) + + (*sfa)->actions_len; + + if (req_size <= (ksize(*sfa) - next_offset)) + goto out; + + new_acts_size = ksize(*sfa) * 2; + + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + return ERR_PTR(-EMSGSIZE); + new_acts_size = MAX_ACTIONS_BUFSIZE; + } + + acts = ovs_nla_alloc_flow_actions(new_acts_size); + if (IS_ERR(acts)) + return (void *)acts; + + memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); + acts->actions_len = (*sfa)->actions_len; + kfree(*sfa); + *sfa = acts; + +out: + (*sfa)->actions_len += req_size; + return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); +} + +static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) +{ + struct nlattr *a; + + a = reserve_sfa_size(sfa, nla_attr_size(len)); + if (IS_ERR(a)) + return PTR_ERR(a); + + a->nla_type = attrtype; + a->nla_len = nla_attr_size(len); + + if (data) + memcpy(nla_data(a), data, len); + memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); + + return 0; +} + +static inline int add_nested_action_start(struct sw_flow_actions **sfa, + int attrtype) +{ + int used = (*sfa)->actions_len; + int err; + + err = add_action(sfa, attrtype, NULL, 0); + if (err) + return err; + + return used; +} + +static inline void add_nested_action_end(struct sw_flow_actions *sfa, + int st_offset) +{ + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + + st_offset); + + a->nla_len = sfa->actions_len - st_offset; +} + +static int validate_and_copy_sample(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa) +{ + const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; + const struct nlattr *probability, *actions; + const struct nlattr *a; + int rem, start, err, st_acts; + + memset(attrs, 0, sizeof(attrs)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) + return -EINVAL; + attrs[type] = a; + } + if (rem) + return -EINVAL; + + probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; + if (!probability || nla_len(probability) != sizeof(u32)) + return -EINVAL; + + actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; + if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + return -EINVAL; + + /* validation done, copy sample action. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + if (start < 0) + return start; + err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, + nla_data(probability), sizeof(u32)); + if (err) + return err; + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + if (st_acts < 0) + return st_acts; + + err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); + if (err) + return err; + + add_nested_action_end(*sfa, st_acts); + add_nested_action_end(*sfa, start); + + return 0; +} + +static int validate_tp_port(const struct sw_flow_key *flow_key) +{ + if (flow_key->eth.type == htons(ETH_P_IP)) { + if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) + return 0; + } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { + if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) + return 0; + } + + return -EINVAL; +} + +void ovs_match_init(struct sw_flow_match *match, + struct sw_flow_key *key, + struct sw_flow_mask *mask) +{ + memset(match, 0, sizeof(*match)); + match->key = key; + match->mask = mask; + + memset(key, 0, sizeof(*key)); + + if (mask) { + memset(&mask->key, 0, sizeof(mask->key)); + mask->range.start = mask->range.end = 0; + } +} + +static int validate_and_copy_set_tun(const struct nlattr *attr, + struct sw_flow_actions **sfa) +{ + struct sw_flow_match match; + struct sw_flow_key key; + int err, start; + + ovs_match_init(&match, &key, NULL); + err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); + if (err) + return err; + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + if (start < 0) + return start; + + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, + sizeof(match.key->tun_key)); + add_nested_action_end(*sfa, start); + + return err; +} + +static int validate_set(const struct nlattr *a, + const struct sw_flow_key *flow_key, + struct sw_flow_actions **sfa, + bool *set_tun) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + + /* There can be only one key in a action */ + if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + return -EINVAL; + + if (key_type > OVS_KEY_ATTR_MAX || + (ovs_key_lens[key_type] != nla_len(ovs_key) && + ovs_key_lens[key_type] != -1)) + return -EINVAL; + + switch (key_type) { + const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv6 *ipv6_key; + int err; + + case OVS_KEY_ATTR_PRIORITY: + case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_ETHERNET: + break; + + case OVS_KEY_ATTR_TUNNEL: + *set_tun = true; + err = validate_and_copy_set_tun(a, sfa); + if (err) + return err; + break; + + case OVS_KEY_ATTR_IPV4: + if (flow_key->eth.type != htons(ETH_P_IP)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv4_key = nla_data(ovs_key); + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_IPV6: + if (flow_key->eth.type != htons(ETH_P_IPV6)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv6_key = nla_data(ovs_key); + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_TCP: + if (flow_key->ip.proto != IPPROTO_TCP) + return -EINVAL; + + return validate_tp_port(flow_key); + + case OVS_KEY_ATTR_UDP: + if (flow_key->ip.proto != IPPROTO_UDP) + return -EINVAL; + + return validate_tp_port(flow_key); + + case OVS_KEY_ATTR_SCTP: + if (flow_key->ip.proto != IPPROTO_SCTP) + return -EINVAL; + + return validate_tp_port(flow_key); + + default: + return -EINVAL; + } + + return 0; +} + +static int validate_userspace(const struct nlattr *attr) +{ + static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { + [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, + }; + struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; + int error; + + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, + attr, userspace_policy); + if (error) + return error; + + if (!a[OVS_USERSPACE_ATTR_PID] || + !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) + return -EINVAL; + + return 0; +} + +static int copy_action(const struct nlattr *from, + struct sw_flow_actions **sfa) +{ + int totlen = NLA_ALIGN(from->nla_len); + struct nlattr *to; + + to = reserve_sfa_size(sfa, from->nla_len); + if (IS_ERR(to)) + return PTR_ERR(to); + + memcpy(to, from, totlen); + return 0; +} + +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, + struct sw_flow_actions **sfa) +{ + const struct nlattr *a; + int rem, err; + + if (depth >= SAMPLE_ACTION_DEPTH) + return -EOVERFLOW; + + nla_for_each_nested(a, attr, rem) { + /* Expected argument lengths, (u32)-1 for variable length. */ + static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { + [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), + [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), + [OVS_ACTION_ATTR_POP_VLAN] = 0, + [OVS_ACTION_ATTR_SET] = (u32)-1, + [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 + }; + const struct ovs_action_push_vlan *vlan; + int type = nla_type(a); + bool skip_copy; + + if (type > OVS_ACTION_ATTR_MAX || + (action_lens[type] != nla_len(a) && + action_lens[type] != (u32)-1)) + return -EINVAL; + + skip_copy = false; + switch (type) { + case OVS_ACTION_ATTR_UNSPEC: + return -EINVAL; + + case OVS_ACTION_ATTR_USERSPACE: + err = validate_userspace(a); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_OUTPUT: + if (nla_get_u32(a) >= DP_MAX_PORTS) + return -EINVAL; + break; + + + case OVS_ACTION_ATTR_POP_VLAN: + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + vlan = nla_data(a); + if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + return -EINVAL; + if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_SET: + err = validate_set(a, key, sfa, &skip_copy); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = validate_and_copy_sample(a, key, depth, sfa); + if (err) + return err; + skip_copy = true; + break; + + default: + return -EINVAL; + } + if (!skip_copy) { + err = copy_action(a, sfa); + if (err) + return err; + } + } + + if (rem > 0) + return -EINVAL; + + return 0; +} + +static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + const struct nlattr *a; + struct nlattr *start; + int err = 0, rem; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + if (!start) + return -EMSGSIZE; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + struct nlattr *st_sample; + + switch (type) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, + sizeof(u32), nla_data(a))) + return -EMSGSIZE; + break; + case OVS_SAMPLE_ATTR_ACTIONS: + st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!st_sample) + return -EMSGSIZE; + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) + return err; + nla_nest_end(skb, st_sample); + break; + } + } + + nla_nest_end(skb, start); + return err; +} + +static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + struct nlattr *start; + int err; + + switch (key_type) { + case OVS_KEY_ATTR_IPV4_TUNNEL: + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!start) + return -EMSGSIZE; + + err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key), + nla_data(ovs_key)); + if (err) + return err; + nla_nest_end(skb, start); + break; + default: + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) + return -EMSGSIZE; + break; + } + + return 0; +} + +int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) +{ + const struct nlattr *a; + int rem, err; + + nla_for_each_attr(a, attr, len, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_ACTION_ATTR_SET: + err = set_action_to_attr(a, skb); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = sample_action_to_attr(a, skb); + if (err) + return err; + break; + default: + if (nla_put(skb, type, nla_len(a), nla_data(a))) + return -EMSGSIZE; + break; + } + } + + return 0; +} diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h new file mode 100644 index 000000000000..440151045d39 --- /dev/null +++ b/net/openvswitch/flow_netlink.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + + +#ifndef FLOW_NETLINK_H +#define FLOW_NETLINK_H 1 + +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/openvswitch.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/jiffies.h> +#include <linux/time.h> +#include <linux/flex_array.h> + +#include <net/inet_ecn.h> +#include <net/ip_tunnels.h> + +#include "flow.h" + +void ovs_match_init(struct sw_flow_match *match, + struct sw_flow_key *key, struct sw_flow_mask *mask); + +int ovs_nla_put_flow(const struct sw_flow_key *, + const struct sw_flow_key *, struct sk_buff *); +int ovs_nla_get_flow_metadata(struct sw_flow *flow, + const struct nlattr *attr); +int ovs_nla_get_match(struct sw_flow_match *match, + const struct nlattr *, + const struct nlattr *); + +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa); +int ovs_nla_put_actions(const struct nlattr *attr, + int len, struct sk_buff *skb); + +struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len); +void ovs_nla_free_flow_actions(struct sw_flow_actions *); + +#endif /* flow_netlink.h */ diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c new file mode 100644 index 000000000000..e42542706087 --- /dev/null +++ b/net/openvswitch/flow_table.c @@ -0,0 +1,592 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include <linux/uaccess.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/llc_pdu.h> +#include <linux/kernel.h> +#include <linux/jhash.h> +#include <linux/jiffies.h> +#include <linux/llc.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/rcupdate.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/sctp.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/rculist.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/ndisc.h> + +#include "datapath.h" + +#define TBL_MIN_BUCKETS 1024 +#define REHASH_INTERVAL (10 * 60 * HZ) + +static struct kmem_cache *flow_cache; + +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask) +{ + const long *m = (long *)((u8 *)&mask->key + mask->range.start); + const long *s = (long *)((u8 *)src + mask->range.start); + long *d = (long *)((u8 *)dst + mask->range.start); + int i; + + /* The memory outside of the 'mask->range' are not set since + * further operations on 'dst' only uses contents within + * 'mask->range'. + */ + for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + *d++ = *s++ & *m++; +} + +struct sw_flow *ovs_flow_alloc(void) +{ + struct sw_flow *flow; + + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&flow->lock); + flow->sf_acts = NULL; + flow->mask = NULL; + + return flow; +} + +int ovs_flow_tbl_count(struct flow_table *table) +{ + return table->count; +} + +static struct flex_array *alloc_buckets(unsigned int n_buckets) +{ + struct flex_array *buckets; + int i, err; + + buckets = flex_array_alloc(sizeof(struct hlist_head), + n_buckets, GFP_KERNEL); + if (!buckets) + return NULL; + + err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); + if (err) { + flex_array_free(buckets); + return NULL; + } + + for (i = 0; i < n_buckets; i++) + INIT_HLIST_HEAD((struct hlist_head *) + flex_array_get(buckets, i)); + + return buckets; +} + +static void flow_free(struct sw_flow *flow) +{ + kfree((struct sf_flow_acts __force *)flow->sf_acts); + kmem_cache_free(flow_cache, flow); +} + +static void rcu_free_flow_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + + flow_free(flow); +} + +static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) +{ + struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); + + kfree(mask); +} + +static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) +{ + if (!mask) + return; + + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + if (deferred) + call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + else + kfree(mask); + } +} + +void ovs_flow_free(struct sw_flow *flow, bool deferred) +{ + if (!flow) + return; + + flow_mask_del_ref(flow->mask, deferred); + + if (deferred) + call_rcu(&flow->rcu, rcu_free_flow_callback); + else + flow_free(flow); +} + +static void free_buckets(struct flex_array *buckets) +{ + flex_array_free(buckets); +} + +static void __table_instance_destroy(struct table_instance *ti) +{ + int i; + + if (ti->keep_flows) + goto skip_flows; + + for (i = 0; i < ti->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head = flex_array_get(ti->buckets, i); + struct hlist_node *n; + int ver = ti->node_ver; + + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { + hlist_del(&flow->hash_node[ver]); + ovs_flow_free(flow, false); + } + } + +skip_flows: + free_buckets(ti->buckets); + kfree(ti); +} + +static struct table_instance *table_instance_alloc(int new_size) +{ + struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); + + if (!ti) + return NULL; + + ti->buckets = alloc_buckets(new_size); + + if (!ti->buckets) { + kfree(ti); + return NULL; + } + ti->n_buckets = new_size; + ti->node_ver = 0; + ti->keep_flows = false; + get_random_bytes(&ti->hash_seed, sizeof(u32)); + + return ti; +} + +int ovs_flow_tbl_init(struct flow_table *table) +{ + struct table_instance *ti; + + ti = table_instance_alloc(TBL_MIN_BUCKETS); + + if (!ti) + return -ENOMEM; + + rcu_assign_pointer(table->ti, ti); + INIT_LIST_HEAD(&table->mask_list); + table->last_rehash = jiffies; + table->count = 0; + return 0; +} + +static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) +{ + struct table_instance *ti = container_of(rcu, struct table_instance, rcu); + + __table_instance_destroy(ti); +} + +static void table_instance_destroy(struct table_instance *ti, bool deferred) +{ + if (!ti) + return; + + if (deferred) + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + else + __table_instance_destroy(ti); +} + +void ovs_flow_tbl_destroy(struct flow_table *table) +{ + struct table_instance *ti = ovsl_dereference(table->ti); + + table_instance_destroy(ti, false); +} + +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, + u32 *bucket, u32 *last) +{ + struct sw_flow *flow; + struct hlist_head *head; + int ver; + int i; + + ver = ti->node_ver; + while (*bucket < ti->n_buckets) { + i = 0; + head = flex_array_get(ti->buckets, *bucket); + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + if (i < *last) { + i++; + continue; + } + *last = i + 1; + return flow; + } + (*bucket)++; + *last = 0; + } + + return NULL; +} + +static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) +{ + hash = jhash_1word(hash, ti->hash_seed); + return flex_array_get(ti->buckets, + (hash & (ti->n_buckets - 1))); +} + +static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->hash); + hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); +} + +static void flow_table_copy_flows(struct table_instance *old, + struct table_instance *new) +{ + int old_ver; + int i; + + old_ver = old->node_ver; + new->node_ver = !old_ver; + + /* Insert in new table. */ + for (i = 0; i < old->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head; + + head = flex_array_get(old->buckets, i); + + hlist_for_each_entry(flow, head, hash_node[old_ver]) + table_instance_insert(new, flow); + } + + old->keep_flows = true; +} + +static struct table_instance *table_instance_rehash(struct table_instance *ti, + int n_buckets) +{ + struct table_instance *new_ti; + + new_ti = table_instance_alloc(n_buckets); + if (!new_ti) + return NULL; + + flow_table_copy_flows(ti, new_ti); + + return new_ti; +} + +int ovs_flow_tbl_flush(struct flow_table *flow_table) +{ + struct table_instance *old_ti; + struct table_instance *new_ti; + + old_ti = ovsl_dereference(flow_table->ti); + new_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ti) + return -ENOMEM; + + rcu_assign_pointer(flow_table->ti, new_ti); + flow_table->last_rehash = jiffies; + flow_table->count = 0; + + table_instance_destroy(old_ti, true); + return 0; +} + +static u32 flow_hash(const struct sw_flow_key *key, int key_start, + int key_end) +{ + u32 *hash_key = (u32 *)((u8 *)key + key_start); + int hash_u32s = (key_end - key_start) >> 2; + + /* Make sure number of hash bytes are multiple of u32. */ + BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + + return jhash2(hash_key, hash_u32s, 0); +} + +static int flow_key_start(const struct sw_flow_key *key) +{ + if (key->tun_key.ipv4_dst) + return 0; + else + return rounddown(offsetof(struct sw_flow_key, phy), + sizeof(long)); +} + +static bool cmp_key(const struct sw_flow_key *key1, + const struct sw_flow_key *key2, + int key_start, int key_end) +{ + const long *cp1 = (long *)((u8 *)key1 + key_start); + const long *cp2 = (long *)((u8 *)key2 + key_start); + long diffs = 0; + int i; + + for (i = key_start; i < key_end; i += sizeof(long)) + diffs |= *cp1++ ^ *cp2++; + + return diffs == 0; +} + +static bool flow_cmp_masked_key(const struct sw_flow *flow, + const struct sw_flow_key *key, + int key_start, int key_end) +{ + return cmp_key(&flow->key, key, key_start, key_end); +} + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + struct sw_flow_match *match) +{ + struct sw_flow_key *key = match->key; + int key_start = flow_key_start(key); + int key_end = match->range.end; + + return cmp_key(&flow->unmasked_key, key, key_start, key_end); +} + +static struct sw_flow *masked_flow_lookup(struct table_instance *ti, + const struct sw_flow_key *unmasked, + struct sw_flow_mask *mask) +{ + struct sw_flow *flow; + struct hlist_head *head; + int key_start = mask->range.start; + int key_end = mask->range.end; + u32 hash; + struct sw_flow_key masked_key; + + ovs_flow_mask_key(&masked_key, unmasked, mask); + hash = flow_hash(&masked_key, key_start, key_end); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { + if (flow->mask == mask && flow->hash == hash && + flow_cmp_masked_key(flow, &masked_key, + key_start, key_end)) + return flow; + } + return NULL; +} + +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key, + u32 *n_mask_hit) +{ + struct table_instance *ti = rcu_dereference(tbl->ti); + struct sw_flow_mask *mask; + struct sw_flow *flow; + + *n_mask_hit = 0; + list_for_each_entry_rcu(mask, &tbl->mask_list, list) { + (*n_mask_hit)++; + flow = masked_flow_lookup(ti, key, mask); + if (flow) /* Found */ + return flow; + } + return NULL; +} + +int ovs_flow_tbl_num_masks(const struct flow_table *table) +{ + struct sw_flow_mask *mask; + int num = 0; + + list_for_each_entry(mask, &table->mask_list, list) + num++; + + return num; +} + +static struct table_instance *table_instance_expand(struct table_instance *ti) +{ + return table_instance_rehash(ti, ti->n_buckets * 2); +} + +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) +{ + struct table_instance *ti = ovsl_dereference(table->ti); + + BUG_ON(table->count == 0); + hlist_del_rcu(&flow->hash_node[ti->node_ver]); + table->count--; +} + +static struct sw_flow_mask *mask_alloc(void) +{ + struct sw_flow_mask *mask; + + mask = kmalloc(sizeof(*mask), GFP_KERNEL); + if (mask) + mask->ref_count = 0; + + return mask; +} + +static void mask_add_ref(struct sw_flow_mask *mask) +{ + mask->ref_count++; +} + +static bool mask_equal(const struct sw_flow_mask *a, + const struct sw_flow_mask *b) +{ + u8 *a_ = (u8 *)&a->key + a->range.start; + u8 *b_ = (u8 *)&b->key + b->range.start; + + return (a->range.end == b->range.end) + && (a->range.start == b->range.start) + && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); +} + +static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, + const struct sw_flow_mask *mask) +{ + struct list_head *ml; + + list_for_each(ml, &tbl->mask_list) { + struct sw_flow_mask *m; + m = container_of(ml, struct sw_flow_mask, list); + if (mask_equal(mask, m)) + return m; + } + + return NULL; +} + +/** + * add a new mask into the mask list. + * The caller needs to make sure that 'mask' is not the same + * as any masks that are already on the list. + */ +static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, + struct sw_flow_mask *new) +{ + struct sw_flow_mask *mask; + mask = flow_mask_find(tbl, new); + if (!mask) { + /* Allocate a new mask if none exsits. */ + mask = mask_alloc(); + if (!mask) + return -ENOMEM; + mask->key = new->key; + mask->range = new->range; + list_add_rcu(&mask->list, &tbl->mask_list); + } + + mask_add_ref(mask); + flow->mask = mask; + return 0; +} + +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask) +{ + struct table_instance *new_ti = NULL; + struct table_instance *ti; + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + + flow->hash = flow_hash(&flow->key, flow->mask->range.start, + flow->mask->range.end); + ti = ovsl_dereference(table->ti); + table_instance_insert(ti, flow); + table->count++; + + /* Expand table, if necessary, to make room. */ + if (table->count > ti->n_buckets) + new_ti = table_instance_expand(ti); + else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) + new_ti = table_instance_rehash(ti, ti->n_buckets); + + if (new_ti) { + rcu_assign_pointer(table->ti, new_ti); + table_instance_destroy(ti, true); + table->last_rehash = jiffies; + } + return 0; +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int ovs_flow_init(void) +{ + BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); + BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); + + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void ovs_flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h new file mode 100644 index 000000000000..fbe45d5ad07d --- /dev/null +++ b/net/openvswitch/flow_table.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef FLOW_TABLE_H +#define FLOW_TABLE_H 1 + +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/openvswitch.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/jiffies.h> +#include <linux/time.h> +#include <linux/flex_array.h> + +#include <net/inet_ecn.h> +#include <net/ip_tunnels.h> + +#include "flow.h" + +struct table_instance { + struct flex_array *buckets; + unsigned int n_buckets; + struct rcu_head rcu; + int node_ver; + u32 hash_seed; + bool keep_flows; +}; + +struct flow_table { + struct table_instance __rcu *ti; + struct list_head mask_list; + unsigned long last_rehash; + unsigned int count; +}; + +int ovs_flow_init(void); +void ovs_flow_exit(void); + +struct sw_flow *ovs_flow_alloc(void); +void ovs_flow_free(struct sw_flow *, bool deferred); + +int ovs_flow_tbl_init(struct flow_table *); +int ovs_flow_tbl_count(struct flow_table *table); +void ovs_flow_tbl_destroy(struct flow_table *table); +int ovs_flow_tbl_flush(struct flow_table *flow_table); + +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask); +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); +int ovs_flow_tbl_num_masks(const struct flow_table *table); +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, + u32 *bucket, u32 *idx); +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, + const struct sw_flow_key *, + u32 *n_mask_hit); + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + struct sw_flow_match *match); + +void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask); +#endif /* flow_table.h */ diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index c99dea543d64..a3d6951602db 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -24,8 +24,6 @@ #include <linux/if_tunnel.h> #include <linux/if_vlan.h> #include <linux/in.h> -#include <linux/if_vlan.h> -#include <linux/in.h> #include <linux/in_route.h> #include <linux/inetdevice.h> #include <linux/jhash.h> diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 98d3edbbc235..729c68763fe7 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -134,7 +134,7 @@ static void do_setup(struct net_device *netdev) netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; + NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; netdev->vlan_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 09d93c13cfd6..d21f77d875ba 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -150,15 +150,25 @@ static void free_port_rcu(struct rcu_head *rcu) ovs_vport_free(vport_from_priv(netdev_vport)); } -static void netdev_destroy(struct vport *vport) +void ovs_netdev_detach_dev(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); - rtnl_lock(); + ASSERT_RTNL(); netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(netdev_vport->dev); - netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp)); + netdev_upper_dev_unlink(netdev_vport->dev, + netdev_master_upper_dev_get(netdev_vport->dev)); dev_set_promiscuity(netdev_vport->dev, -1); +} + +static void netdev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + rtnl_lock(); + if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH) + ovs_netdev_detach_dev(vport); rtnl_unlock(); call_rcu(&netdev_vport->rcu, free_port_rcu); diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index dd298b5c5cdb..8df01c1127e5 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,5 +39,6 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); +void ovs_netdev_detach_dev(struct vport *); #endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 56e22b74cf96..e797a50ac2be 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -29,7 +29,6 @@ #include <net/ip.h> #include <net/udp.h> #include <net/ip_tunnels.h> -#include <net/udp.h> #include <net/rtnetlink.h> #include <net/route.h> #include <net/dsfield.h> diff --git a/net/rds/connection.c b/net/rds/connection.c index 642ad42c416b..378c3a6acf84 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -51,10 +51,16 @@ static struct kmem_cache *rds_conn_slab; static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) { + static u32 rds_hash_secret __read_mostly; + + unsigned long hash; + + net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); + /* Pass NULL, don't need struct net for hash */ - unsigned long hash = inet_ehashfn(NULL, - be32_to_cpu(laddr), 0, - be32_to_cpu(faddr), 0); + hash = __inet_ehashfn(be32_to_cpu(laddr), 0, + be32_to_cpu(faddr), 0, + rds_hash_secret); return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; } diff --git a/net/rds/rds.h b/net/rds/rds.h index ec1d731ecff0..48f8ffc60f8f 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -749,7 +749,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status); int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); -extern void __rds_put_mr_final(struct rds_mr *mr); +void __rds_put_mr_final(struct rds_mr *mr); static inline void rds_mr_put(struct rds_mr *mr) { if (atomic_dec_and_test(&mr->r_refcount)) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a693aca2ae2e..5f43675ee1df 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -426,17 +426,16 @@ extern struct workqueue_struct *rxrpc_workqueue; /* * ar-accept.c */ -extern void rxrpc_accept_incoming_calls(struct work_struct *); -extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, - unsigned long); -extern int rxrpc_reject_call(struct rxrpc_sock *); +void rxrpc_accept_incoming_calls(struct work_struct *); +struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long); +int rxrpc_reject_call(struct rxrpc_sock *); /* * ar-ack.c */ -extern void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); -extern void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); -extern void rxrpc_process_call(struct work_struct *); +void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); +void rxrpc_process_call(struct work_struct *); /* * ar-call.c @@ -445,19 +444,18 @@ extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; -extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, - struct rxrpc_transport *, - struct rxrpc_conn_bundle *, - unsigned long, int, gfp_t); -extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, - struct rxrpc_connection *, - struct rxrpc_header *, gfp_t); -extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, - unsigned long); -extern void rxrpc_release_call(struct rxrpc_call *); -extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *); -extern void __rxrpc_put_call(struct rxrpc_call *); -extern void __exit rxrpc_destroy_all_calls(void); +struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, + struct rxrpc_transport *, + struct rxrpc_conn_bundle *, + unsigned long, int, gfp_t); +struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, + struct rxrpc_connection *, + struct rxrpc_header *, gfp_t); +struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long); +void rxrpc_release_call(struct rxrpc_call *); +void rxrpc_release_calls_on_socket(struct rxrpc_sock *); +void __rxrpc_put_call(struct rxrpc_call *); +void __exit rxrpc_destroy_all_calls(void); /* * ar-connection.c @@ -465,19 +463,16 @@ extern void __exit rxrpc_destroy_all_calls(void); extern struct list_head rxrpc_connections; extern rwlock_t rxrpc_connection_lock; -extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *, - struct rxrpc_transport *, - struct key *, - __be16, gfp_t); -extern void rxrpc_put_bundle(struct rxrpc_transport *, - struct rxrpc_conn_bundle *); -extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *, - struct rxrpc_conn_bundle *, struct rxrpc_call *, - gfp_t); -extern void rxrpc_put_connection(struct rxrpc_connection *); -extern void __exit rxrpc_destroy_all_connections(void); -extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, - struct rxrpc_header *); +struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *, + struct rxrpc_transport *, + struct key *, __be16, gfp_t); +void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *); +int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *, + struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t); +void rxrpc_put_connection(struct rxrpc_connection *); +void __exit rxrpc_destroy_all_connections(void); +struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, + struct rxrpc_header *); extern struct rxrpc_connection * rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *, gfp_t); @@ -485,15 +480,15 @@ rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *, /* * ar-connevent.c */ -extern void rxrpc_process_connection(struct work_struct *); -extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); -extern void rxrpc_reject_packets(struct work_struct *); +void rxrpc_process_connection(struct work_struct *); +void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); +void rxrpc_reject_packets(struct work_struct *); /* * ar-error.c */ -extern void rxrpc_UDP_error_report(struct sock *); -extern void rxrpc_UDP_error_handler(struct work_struct *); +void rxrpc_UDP_error_report(struct sock *); +void rxrpc_UDP_error_handler(struct work_struct *); /* * ar-input.c @@ -501,18 +496,17 @@ extern void rxrpc_UDP_error_handler(struct work_struct *); extern unsigned long rxrpc_ack_timeout; extern const char *rxrpc_pkts[]; -extern void rxrpc_data_ready(struct sock *, int); -extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, - bool); -extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); +void rxrpc_data_ready(struct sock *, int); +int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); +void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); /* * ar-local.c */ extern rwlock_t rxrpc_local_lock; -extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *); -extern void rxrpc_put_local(struct rxrpc_local *); -extern void __exit rxrpc_destroy_all_locals(void); +struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *); +void rxrpc_put_local(struct rxrpc_local *); +void __exit rxrpc_destroy_all_locals(void); /* * ar-key.c @@ -520,31 +514,29 @@ extern void __exit rxrpc_destroy_all_locals(void); extern struct key_type key_type_rxrpc; extern struct key_type key_type_rxrpc_s; -extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int); -extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int); -extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, - time_t, u32); +int rxrpc_request_key(struct rxrpc_sock *, char __user *, int); +int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int); +int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, + u32); /* * ar-output.c */ extern int rxrpc_resend_timeout; -extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); -extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct rxrpc_transport *, struct msghdr *, - size_t); -extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct msghdr *, size_t); +int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); +int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, + struct rxrpc_transport *, struct msghdr *, size_t); +int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *, + size_t); /* * ar-peer.c */ -extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t); -extern void rxrpc_put_peer(struct rxrpc_peer *); -extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *, - __be32, __be16); -extern void __exit rxrpc_destroy_all_peers(void); +struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t); +void rxrpc_put_peer(struct rxrpc_peer *); +struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *, __be32, __be16); +void __exit rxrpc_destroy_all_peers(void); /* * ar-proc.c @@ -556,38 +548,36 @@ extern const struct file_operations rxrpc_connection_seq_fops; /* * ar-recvmsg.c */ -extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); -extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, - size_t, int); +void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); +int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, + int); /* * ar-security.c */ -extern int rxrpc_register_security(struct rxrpc_security *); -extern void rxrpc_unregister_security(struct rxrpc_security *); -extern int rxrpc_init_client_conn_security(struct rxrpc_connection *); -extern int rxrpc_init_server_conn_security(struct rxrpc_connection *); -extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *, - size_t, void *); -extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *, - u32 *); -extern void rxrpc_clear_conn_security(struct rxrpc_connection *); +int rxrpc_register_security(struct rxrpc_security *); +void rxrpc_unregister_security(struct rxrpc_security *); +int rxrpc_init_client_conn_security(struct rxrpc_connection *); +int rxrpc_init_server_conn_security(struct rxrpc_connection *); +int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *, size_t, + void *); +int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *, u32 *); +void rxrpc_clear_conn_security(struct rxrpc_connection *); /* * ar-skbuff.c */ -extern void rxrpc_packet_destructor(struct sk_buff *); +void rxrpc_packet_destructor(struct sk_buff *); /* * ar-transport.c */ -extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, - struct rxrpc_peer *, - gfp_t); -extern void rxrpc_put_transport(struct rxrpc_transport *); -extern void __exit rxrpc_destroy_all_transports(void); -extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, - struct rxrpc_peer *); +struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, + struct rxrpc_peer *, gfp_t); +void rxrpc_put_transport(struct rxrpc_transport *); +void __exit rxrpc_destroy_all_transports(void); +struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, + struct rxrpc_peer *); /* * debug tracing diff --git a/net/sched/Kconfig b/net/sched/Kconfig index c03a32a0418e..ad1f1d819203 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -443,6 +443,16 @@ config NET_CLS_CGROUP To compile this code as a module, choose M here: the module will be called cls_cgroup. +config NET_CLS_BPF + tristate "BPF-based classifier" + select NET_CLS + ---help--- + If you say Y here, you will be able to classify packets based on + programmable BPF (JIT'ed) filters as an alternative to ematches. + + To compile this code as a module, choose M here: the module will + be called cls_bpf. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index e5f9abe9a5db..35fa47a494ab 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o +obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c new file mode 100644 index 000000000000..1002a8226281 --- /dev/null +++ b/net/sched/cls_bpf.c @@ -0,0 +1,385 @@ +/* + * Berkeley Packet Filter based traffic classifier + * + * Might be used to classify traffic through flexible, user-defined and + * possibly JIT-ed BPF filters for traffic control as an alternative to + * ematches. + * + * (C) 2013 Daniel Borkmann <dborkman@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/filter.h> +#include <net/rtnetlink.h> +#include <net/pkt_cls.h> +#include <net/sock.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); +MODULE_DESCRIPTION("TC BPF based classifier"); + +struct cls_bpf_head { + struct list_head plist; + u32 hgen; +}; + +struct cls_bpf_prog { + struct sk_filter *filter; + struct sock_filter *bpf_ops; + struct tcf_exts exts; + struct tcf_result res; + struct list_head link; + u32 handle; + u16 bpf_len; +}; + +static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { + [TCA_BPF_CLASSID] = { .type = NLA_U32 }, + [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, + [TCA_BPF_OPS] = { .type = NLA_BINARY, + .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, +}; + +static const struct tcf_ext_map bpf_ext_map = { + .action = TCA_BPF_ACT, + .police = TCA_BPF_POLICE, +}; + +static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_bpf_head *head = tp->root; + struct cls_bpf_prog *prog; + int ret; + + list_for_each_entry(prog, &head->plist, link) { + int filter_res = SK_RUN_FILTER(prog->filter, skb); + + if (filter_res == 0) + continue; + + *res = prog->res; + if (filter_res != -1) + res->classid = filter_res; + + ret = tcf_exts_exec(skb, &prog->exts, res); + if (ret < 0) + continue; + + return ret; + } + + return -1; +} + +static int cls_bpf_init(struct tcf_proto *tp) +{ + struct cls_bpf_head *head; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + INIT_LIST_HEAD(&head->plist); + tp->root = head; + + return 0; +} + +static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) +{ + tcf_unbind_filter(tp, &prog->res); + tcf_exts_destroy(tp, &prog->exts); + + sk_unattached_filter_destroy(prog->filter); + + kfree(prog->bpf_ops); + kfree(prog); +} + +static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) +{ + struct cls_bpf_head *head = tp->root; + struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg; + + list_for_each_entry(prog, &head->plist, link) { + if (prog == todel) { + tcf_tree_lock(tp); + list_del(&prog->link); + tcf_tree_unlock(tp); + + cls_bpf_delete_prog(tp, prog); + return 0; + } + } + + return -ENOENT; +} + +static void cls_bpf_destroy(struct tcf_proto *tp) +{ + struct cls_bpf_head *head = tp->root; + struct cls_bpf_prog *prog, *tmp; + + list_for_each_entry_safe(prog, tmp, &head->plist, link) { + list_del(&prog->link); + cls_bpf_delete_prog(tp, prog); + } + + kfree(head); +} + +static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) +{ + struct cls_bpf_head *head = tp->root; + struct cls_bpf_prog *prog; + unsigned long ret = 0UL; + + if (head == NULL) + return 0UL; + + list_for_each_entry(prog, &head->plist, link) { + if (prog->handle == handle) { + ret = (unsigned long) prog; + break; + } + } + + return ret; +} + +static void cls_bpf_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, + struct cls_bpf_prog *prog, + unsigned long base, struct nlattr **tb, + struct nlattr *est) +{ + struct sock_filter *bpf_ops, *bpf_old; + struct tcf_exts exts; + struct sock_fprog tmp; + struct sk_filter *fp, *fp_old; + u16 bpf_size, bpf_len; + u32 classid; + int ret; + + if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) + return -EINVAL; + + ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map); + if (ret < 0) + return ret; + + classid = nla_get_u32(tb[TCA_BPF_CLASSID]); + bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); + if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { + ret = -EINVAL; + goto errout; + } + + bpf_size = bpf_len * sizeof(*bpf_ops); + bpf_ops = kzalloc(bpf_size, GFP_KERNEL); + if (bpf_ops == NULL) { + ret = -ENOMEM; + goto errout; + } + + memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); + + tmp.len = bpf_len; + tmp.filter = (struct sock_filter __user *) bpf_ops; + + ret = sk_unattached_filter_create(&fp, &tmp); + if (ret) + goto errout_free; + + tcf_tree_lock(tp); + fp_old = prog->filter; + bpf_old = prog->bpf_ops; + + prog->bpf_len = bpf_len; + prog->bpf_ops = bpf_ops; + prog->filter = fp; + prog->res.classid = classid; + tcf_tree_unlock(tp); + + tcf_bind_filter(tp, &prog->res, base); + tcf_exts_change(tp, &prog->exts, &exts); + + if (fp_old) + sk_unattached_filter_destroy(fp_old); + if (bpf_old) + kfree(bpf_old); + + return 0; + +errout_free: + kfree(bpf_ops); +errout: + tcf_exts_destroy(tp, &exts); + return ret; +} + +static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, + struct cls_bpf_head *head) +{ + unsigned int i = 0x80000000; + + do { + if (++head->hgen == 0x7FFFFFFF) + head->hgen = 1; + } while (--i > 0 && cls_bpf_get(tp, head->hgen)); + if (i == 0) + pr_err("Insufficient number of handles\n"); + + return i; +} + +static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg) +{ + struct cls_bpf_head *head = tp->root; + struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg; + struct nlattr *tb[TCA_BPF_MAX + 1]; + int ret; + + if (tca[TCA_OPTIONS] == NULL) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy); + if (ret < 0) + return ret; + + if (prog != NULL) { + if (handle && prog->handle != handle) + return -EINVAL; + return cls_bpf_modify_existing(net, tp, prog, base, tb, + tca[TCA_RATE]); + } + + prog = kzalloc(sizeof(*prog), GFP_KERNEL); + if (prog == NULL) + return -ENOBUFS; + + if (handle == 0) + prog->handle = cls_bpf_grab_new_handle(tp, head); + else + prog->handle = handle; + if (prog->handle == 0) { + ret = -EINVAL; + goto errout; + } + + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]); + if (ret < 0) + goto errout; + + tcf_tree_lock(tp); + list_add(&prog->link, &head->plist); + tcf_tree_unlock(tp); + + *arg = (unsigned long) prog; + + return 0; +errout: + if (*arg == 0UL && prog) + kfree(prog); + + return ret; +} + +static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *tm) +{ + struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; + struct nlattr *nest, *nla; + + if (prog == NULL) + return skb->len; + + tm->tcm_handle = prog->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) + goto nla_put_failure; + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) + goto nla_put_failure; + + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * + sizeof(struct sock_filter)); + if (nla == NULL) + goto nla_put_failure; + + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + + if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_bpf_head *head = tp->root; + struct cls_bpf_prog *prog; + + list_for_each_entry(prog, &head->plist, link) { + if (arg->count < arg->skip) + goto skip; + if (arg->fn(tp, (unsigned long) prog, arg) < 0) { + arg->stop = 1; + break; + } +skip: + arg->count++; + } +} + +static struct tcf_proto_ops cls_bpf_ops __read_mostly = { + .kind = "bpf", + .owner = THIS_MODULE, + .classify = cls_bpf_classify, + .init = cls_bpf_init, + .destroy = cls_bpf_destroy, + .get = cls_bpf_get, + .put = cls_bpf_put, + .change = cls_bpf_change, + .delete = cls_bpf_delete, + .walk = cls_bpf_walk, + .dump = cls_bpf_dump, +}; + +static int __init cls_bpf_init_mod(void) +{ + return register_tcf_proto_ops(&cls_bpf_ops); +} + +static void __exit cls_bpf_exit_mod(void) +{ + unregister_tcf_proto_ops(&cls_bpf_ops); +} + +module_init(cls_bpf_init_mod); +module_exit(cls_bpf_exit_mod); diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 1ac41d3de5c3..527aeb7a3ff0 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -24,7 +24,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, { struct xt_set_info *set = data; ip_set_id_t index; - struct net *net = qdisc_dev(tp->q)->nd_net; + struct net *net = dev_net(qdisc_dev(tp->q)); if (data_len != sizeof(*set)) return -EINVAL; @@ -46,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) { const struct xt_set_info *set = (const void *) em->data; if (set) { - ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index); + ip_set_nfnl_put(dev_net(qdisc_dev(p->q)), set->index); kfree((void *) em->data); } } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a9dfdda9ed1d..fdc041c57853 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -255,6 +255,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; + f->time_next_packet = 0ULL; } return f; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a6d788d45216..75c94e59a3bd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -235,7 +235,6 @@ static bool loss_4state(struct netem_sched_data *q) clg->state = 2; else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { clg->state = 1; - return true; } else if (clg->a2 + clg->a3 < rnd) { clg->state = 3; return true; @@ -358,6 +357,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche return PSCHED_NS2TICKS(ticks); } +static void tfifo_reset(struct Qdisc *sch) +{ + struct netem_sched_data *q = qdisc_priv(sch); + struct rb_node *p; + + while ((p = rb_first(&q->t_root))) { + struct sk_buff *skb = netem_rb_to_skb(p); + + rb_erase(p, &q->t_root); + skb->next = NULL; + skb->prev = NULL; + kfree_skb(skb); + } +} + static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -520,6 +534,7 @@ static unsigned int netem_drop(struct Qdisc *sch) skb->next = NULL; skb->prev = NULL; len = qdisc_pkt_len(skb); + sch->qstats.backlog -= len; kfree_skb(skb); } } @@ -609,6 +624,7 @@ static void netem_reset(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); qdisc_reset_queue(sch); + tfifo_reset(sch); if (q->qdisc) qdisc_reset(q->qdisc); qdisc_watchdog_cancel(&q->watchdog); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index cef509985192..c9b91cb1cb0d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -602,7 +602,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, /* Start a T3 timer here in case it wasn't running so * that these migrated packets have a chance to get - * retrnasmitted. + * retransmitted. */ if (!timer_pending(&active->T3_rtx_timer)) if (!mod_timer(&active->T3_rtx_timer, @@ -665,7 +665,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Set the path max_retrans. */ peer->pathmaxrxt = asoc->pathmaxrxt; - /* And the partial failure retrnas threshold */ + /* And the partial failure retrans threshold */ peer->pf_retrans = asoc->pf_retrans; /* Initialize the peer's SACK delay timeout based on the diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 8c4fa5dec824..46b5977978a1 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -539,18 +539,14 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) for (i = 0; i < n_elt; i++) { id = ntohs(hmacs->hmac_ids[i]); - /* Check the id is in the supported range */ - if (id > SCTP_AUTH_HMAC_ID_MAX) { - id = 0; - continue; - } - - /* See is we support the id. Supported IDs have name and - * length fields set, so that we can allocated and use + /* Check the id is in the supported range. And + * see if we support the id. Supported IDs have name and + * length fields set, so that we can allocate and use * them. We can safely just check for name, for without the * name, we can't allocate the TFM. */ - if (!sctp_hmac_list[id].hmac_name) { + if (id > SCTP_AUTH_HMAC_ID_MAX || + !sctp_hmac_list[id].hmac_name) { id = 0; continue; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7bd5ed4a8657..f2044fcb9dd1 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, max = asoc->frag_point; /* If the the peer requested that we authenticate DATA chunks - * we need to accound for bundling of the AUTH chunks along with + * we need to account for bundling of the AUTH chunks along with * DATA. */ if (sctp_auth_send_cid(SCTP_CID_DATA, asoc)) { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f6334aa19151..7567e6f1a920 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -279,7 +279,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) + if (!laddr->valid || laddr->state == SCTP_ADDR_DEL || + (laddr->state != SCTP_ADDR_SRC && + !asoc->src_out_of_asoc_ok)) continue; /* Do not compare against v4 addrs */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 0ac3a65daccb..e650978daf27 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -390,7 +390,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) __u8 has_data = 0; struct dst_entry *dst = tp->dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ - __u32 cksum_buf_len = sizeof(struct sctphdr); pr_debug("%s: packet:%p\n", __func__, packet); @@ -493,7 +492,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (chunk == packet->auth) auth = skb_tail_pointer(nskb); - cksum_buf_len += chunk->skb->len; memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); @@ -536,13 +534,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { - __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); - - /* 3) Put the resultant value into the checksum field in the - * common header, and leave the rest of the bits unchanged. - */ - sh->checksum = sctp_end_cksum(crc32); + if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + (dst_xfrm(dst) != NULL) || packet->ipfragok) { + sh->checksum = sctp_compute_cksum(nskb, 0); } else { /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d244a23ab8d3..fe690320b1e4 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1297,6 +1297,13 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) /* Turn an skb into a chunk. * FIXME: Eventually move the structure directly inside the skb->cb[]. + * + * sctpimpguide-05.txt Section 2.8.2 + * M1) Each time a new DATA chunk is transmitted + * set the 'TSN.Missing.Report' count for that TSN to 0. The + * 'TSN.Missing.Report' count will be used to determine missing chunks + * and when to fast retransmit. + * */ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, const struct sctp_association *asoc, @@ -1314,29 +1321,9 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, INIT_LIST_HEAD(&retval->list); retval->skb = skb; retval->asoc = (struct sctp_association *)asoc; - retval->has_tsn = 0; - retval->has_ssn = 0; - retval->rtt_in_progress = 0; - retval->sent_at = 0; retval->singleton = 1; - retval->end_of_packet = 0; - retval->ecn_ce_done = 0; - retval->pdiscard = 0; - - /* sctpimpguide-05.txt Section 2.8.2 - * M1) Each time a new DATA chunk is transmitted - * set the 'TSN.Missing.Report' count for that TSN to 0. The - * 'TSN.Missing.Report' count will be used to determine missing chunks - * and when to fast retransmit. - */ - retval->tsn_missing_report = 0; - retval->tsn_gap_acked = 0; - retval->fast_retransmit = SCTP_CAN_FRTX; - /* If this is a fragmented message, track all fragments - * of the message (for SEND_FAILED). - */ - retval->msg = NULL; + retval->fast_retransmit = SCTP_CAN_FRTX; /* Polish the bead hole. */ INIT_LIST_HEAD(&retval->transmitted_list); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 666c66842799..1a6eef39ab2f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; - BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 609c30c80816..3f9707a16d06 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -387,7 +387,7 @@ restart: b_ptr = &tipc_bearers[bearer_id]; strcpy(b_ptr->name, name); - res = m_ptr->enable_bearer(b_ptr); + res = m_ptr->enable_media(b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -420,23 +420,15 @@ exit: } /** - * tipc_block_bearer - Block the bearer with the given name, and reset all its links + * tipc_block_bearer - Block the bearer, and reset all its links */ -int tipc_block_bearer(const char *name) +int tipc_block_bearer(struct tipc_bearer *b_ptr) { - struct tipc_bearer *b_ptr = NULL; struct tipc_link *l_ptr; struct tipc_link *temp_l_ptr; read_lock_bh(&tipc_net_lock); - b_ptr = tipc_bearer_find(name); - if (!b_ptr) { - pr_warn("Attempt to block unknown bearer <%s>\n", name); - read_unlock_bh(&tipc_net_lock); - return -EINVAL; - } - - pr_info("Blocking bearer <%s>\n", name); + pr_info("Blocking bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); b_ptr->blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { @@ -465,7 +457,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr) pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); b_ptr->blocked = 1; - b_ptr->media->disable_bearer(b_ptr); + b_ptr->media->disable_media(b_ptr); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 09c869adcfcf..e5e04be6fffa 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -75,8 +75,8 @@ struct tipc_bearer; /** * struct tipc_media - TIPC media information available to internal users * @send_msg: routine which handles buffer transmission - * @enable_bearer: routine which enables a bearer - * @disable_bearer: routine which disables a bearer + * @enable_media: routine which enables a media + * @disable_media: routine which disables a media * @addr2str: routine which converts media address to string * @addr2msg: routine which converts media address to protocol message area * @msg2addr: routine which converts media address from protocol message area @@ -91,8 +91,8 @@ struct tipc_media { int (*send_msg)(struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_bearer)(struct tipc_bearer *b_ptr); - void (*disable_bearer)(struct tipc_bearer *b_ptr); + int (*enable_media)(struct tipc_bearer *b_ptr); + void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); int (*msg2addr)(const struct tipc_bearer *b_ptr, @@ -163,7 +163,7 @@ int tipc_register_media(struct tipc_media *m_ptr); void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); -int tipc_block_bearer(const char *name); +int tipc_block_bearer(struct tipc_bearer *b_ptr); void tipc_continue(struct tipc_bearer *tb_ptr); int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); diff --git a/net/tipc/core.h b/net/tipc/core.h index be72f8cebc53..94895d4e86ab 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -90,21 +90,21 @@ extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems */ -extern int tipc_core_start_net(unsigned long); -extern int tipc_handler_start(void); -extern void tipc_handler_stop(void); -extern int tipc_netlink_start(void); -extern void tipc_netlink_stop(void); -extern int tipc_socket_init(void); -extern void tipc_socket_stop(void); -extern int tipc_sock_create_local(int type, struct socket **res); -extern void tipc_sock_release_local(struct socket *sock); -extern int tipc_sock_accept_local(struct socket *sock, - struct socket **newsock, int flags); +int tipc_core_start_net(unsigned long); +int tipc_handler_start(void); +void tipc_handler_stop(void); +int tipc_netlink_start(void); +void tipc_netlink_stop(void); +int tipc_socket_init(void); +void tipc_socket_stop(void); +int tipc_sock_create_local(int type, struct socket **res); +void tipc_sock_release_local(struct socket *sock); +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags); #ifdef CONFIG_SYSCTL -extern int tipc_register_sysctl(void); -extern void tipc_unregister_sysctl(void); +int tipc_register_sysctl(void); +void tipc_unregister_sysctl(void); #else #define tipc_register_sysctl() 0 #define tipc_unregister_sysctl() @@ -201,6 +201,6 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb) return (struct tipc_msg *)skb->data; } -extern struct sk_buff *tipc_buf_acquire(u32 size); +struct sk_buff *tipc_buf_acquire(u32 size); #endif diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 40ea40cf6204..f80d59f5a161 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -2,7 +2,7 @@ * net/tipc/eth_media.c: Ethernet bearer support for TIPC * * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2005-2008, 2011, Wind River Systems + * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,19 +37,19 @@ #include "core.h" #include "bearer.h" -#define MAX_ETH_BEARERS MAX_BEARERS +#define MAX_ETH_MEDIA MAX_BEARERS #define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ /** - * struct eth_bearer - Ethernet bearer data structure + * struct eth_media - Ethernet bearer data structure * @bearer: ptr to associated "generic" bearer structure * @dev: ptr to associated Ethernet network device * @tipc_packet_type: used in binding TIPC to Ethernet driver * @setup: work item used when enabling bearer * @cleanup: work item used when disabling bearer */ -struct eth_bearer { +struct eth_media { struct tipc_bearer *bearer; struct net_device *dev; struct packet_type tipc_packet_type; @@ -58,7 +58,7 @@ struct eth_bearer { }; static struct tipc_media eth_media_info; -static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; +static struct eth_media eth_media_array[MAX_ETH_MEDIA]; static int eth_started; static int recv_notification(struct notifier_block *nb, unsigned long evt, @@ -100,7 +100,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, if (!clone) return 0; - dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; + dev = ((struct eth_media *)(tb_ptr->usr_handle))->dev; delta = dev->hard_header_len - skb_headroom(buf); if ((delta > 0) && @@ -128,43 +128,43 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, static int recv_msg(struct sk_buff *buf, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; + struct eth_media *eb_ptr = (struct eth_media *)pt->af_packet_priv; if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(buf); - return 0; + return NET_RX_DROP; } if (likely(eb_ptr->bearer)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; tipc_recv_msg(buf, eb_ptr->bearer); - return 0; + return NET_RX_SUCCESS; } } kfree_skb(buf); - return 0; + return NET_RX_DROP; } /** - * setup_bearer - setup association between Ethernet bearer and interface + * setup_media - setup association between Ethernet bearer and interface */ -static void setup_bearer(struct work_struct *work) +static void setup_media(struct work_struct *work) { - struct eth_bearer *eb_ptr = - container_of(work, struct eth_bearer, setup); + struct eth_media *eb_ptr = + container_of(work, struct eth_media, setup); dev_add_pack(&eb_ptr->tipc_packet_type); } /** - * enable_bearer - attach TIPC bearer to an Ethernet interface + * enable_media - attach TIPC bearer to an Ethernet interface */ -static int enable_bearer(struct tipc_bearer *tb_ptr) +static int enable_media(struct tipc_bearer *tb_ptr) { struct net_device *dev; - struct eth_bearer *eb_ptr = ð_bearers[0]; - struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + struct eth_media *eb_ptr = ð_media_array[0]; + struct eth_media *stop = ð_media_array[MAX_ETH_MEDIA]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; int pending_dev = 0; @@ -188,7 +188,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) eb_ptr->tipc_packet_type.func = recv_msg; eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - INIT_WORK(&eb_ptr->setup, setup_bearer); + INIT_WORK(&eb_ptr->setup, setup_media); schedule_work(&eb_ptr->setup); /* Associate TIPC bearer with Ethernet bearer */ @@ -205,14 +205,14 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /** - * cleanup_bearer - break association between Ethernet bearer and interface + * cleanup_media - break association between Ethernet bearer and interface * * This routine must be invoked from a work queue because it can sleep. */ -static void cleanup_bearer(struct work_struct *work) +static void cleanup_media(struct work_struct *work) { - struct eth_bearer *eb_ptr = - container_of(work, struct eth_bearer, cleanup); + struct eth_media *eb_ptr = + container_of(work, struct eth_media, cleanup); dev_remove_pack(&eb_ptr->tipc_packet_type); dev_put(eb_ptr->dev); @@ -220,18 +220,18 @@ static void cleanup_bearer(struct work_struct *work) } /** - * disable_bearer - detach TIPC bearer from an Ethernet interface + * disable_media - detach TIPC bearer from an Ethernet interface * * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, * then get worker thread to complete bearer cleanup. (Can't do cleanup * here because cleanup code needs to sleep and caller holds spinlocks.) */ -static void disable_bearer(struct tipc_bearer *tb_ptr) +static void disable_media(struct tipc_bearer *tb_ptr) { - struct eth_bearer *eb_ptr = (struct eth_bearer *)tb_ptr->usr_handle; + struct eth_media *eb_ptr = (struct eth_media *)tb_ptr->usr_handle; eb_ptr->bearer = NULL; - INIT_WORK(&eb_ptr->cleanup, cleanup_bearer); + INIT_WORK(&eb_ptr->cleanup, cleanup_media); schedule_work(&eb_ptr->cleanup); } @@ -245,8 +245,8 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct eth_bearer *eb_ptr = ð_bearers[0]; - struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + struct eth_media *eb_ptr = ð_media_array[0]; + struct eth_media *stop = ð_media_array[MAX_ETH_MEDIA]; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -265,17 +265,17 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, if (netif_carrier_ok(dev)) tipc_continue(eb_ptr->bearer); else - tipc_block_bearer(eb_ptr->bearer->name); + tipc_block_bearer(eb_ptr->bearer); break; case NETDEV_UP: tipc_continue(eb_ptr->bearer); break; case NETDEV_DOWN: - tipc_block_bearer(eb_ptr->bearer->name); + tipc_block_bearer(eb_ptr->bearer); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: - tipc_block_bearer(eb_ptr->bearer->name); + tipc_block_bearer(eb_ptr->bearer); tipc_continue(eb_ptr->bearer); break; case NETDEV_UNREGISTER: @@ -327,8 +327,8 @@ static int eth_msg2addr(const struct tipc_bearer *tb_ptr, */ static struct tipc_media eth_media_info = { .send_msg = send_msg, - .enable_bearer = enable_bearer, - .disable_bearer = disable_bearer, + .enable_media = enable_media, + .disable_media = disable_media, .addr2str = eth_addr2str, .addr2msg = eth_addr2msg, .msg2addr = eth_msg2addr, diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 9934a32bfa87..c13989297464 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,17 +42,17 @@ #include "core.h" #include "bearer.h" -#define MAX_IB_BEARERS MAX_BEARERS +#define MAX_IB_MEDIA MAX_BEARERS /** - * struct ib_bearer - Infiniband bearer data structure + * struct ib_media - Infiniband media data structure * @bearer: ptr to associated "generic" bearer structure * @dev: ptr to associated Infiniband network device * @tipc_packet_type: used in binding TIPC to Infiniband driver * @cleanup: work item used when disabling bearer */ -struct ib_bearer { +struct ib_media { struct tipc_bearer *bearer; struct net_device *dev; struct packet_type tipc_packet_type; @@ -61,7 +61,7 @@ struct ib_bearer { }; static struct tipc_media ib_media_info; -static struct ib_bearer ib_bearers[MAX_IB_BEARERS]; +static struct ib_media ib_media_array[MAX_IB_MEDIA]; static int ib_started; /** @@ -93,7 +93,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, if (!clone) return 0; - dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev; + dev = ((struct ib_media *)(tb_ptr->usr_handle))->dev; delta = dev->hard_header_len - skb_headroom(buf); if ((delta > 0) && @@ -121,43 +121,43 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, static int recv_msg(struct sk_buff *buf, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv; + struct ib_media *ib_ptr = (struct ib_media *)pt->af_packet_priv; if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(buf); - return 0; + return NET_RX_DROP; } if (likely(ib_ptr->bearer)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; tipc_recv_msg(buf, ib_ptr->bearer); - return 0; + return NET_RX_SUCCESS; } } kfree_skb(buf); - return 0; + return NET_RX_DROP; } /** * setup_bearer - setup association between InfiniBand bearer and interface */ -static void setup_bearer(struct work_struct *work) +static void setup_media(struct work_struct *work) { - struct ib_bearer *ib_ptr = - container_of(work, struct ib_bearer, setup); + struct ib_media *ib_ptr = + container_of(work, struct ib_media, setup); dev_add_pack(&ib_ptr->tipc_packet_type); } /** - * enable_bearer - attach TIPC bearer to an InfiniBand interface + * enable_media - attach TIPC bearer to an InfiniBand interface */ -static int enable_bearer(struct tipc_bearer *tb_ptr) +static int enable_media(struct tipc_bearer *tb_ptr) { struct net_device *dev; - struct ib_bearer *ib_ptr = &ib_bearers[0]; - struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; + struct ib_media *ib_ptr = &ib_media_array[0]; + struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; int pending_dev = 0; @@ -181,7 +181,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) ib_ptr->tipc_packet_type.func = recv_msg; ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr; INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list)); - INIT_WORK(&ib_ptr->setup, setup_bearer); + INIT_WORK(&ib_ptr->setup, setup_media); schedule_work(&ib_ptr->setup); /* Associate TIPC bearer with InfiniBand bearer */ @@ -204,8 +204,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) */ static void cleanup_bearer(struct work_struct *work) { - struct ib_bearer *ib_ptr = - container_of(work, struct ib_bearer, cleanup); + struct ib_media *ib_ptr = + container_of(work, struct ib_media, cleanup); dev_remove_pack(&ib_ptr->tipc_packet_type); dev_put(ib_ptr->dev); @@ -213,15 +213,15 @@ static void cleanup_bearer(struct work_struct *work) } /** - * disable_bearer - detach TIPC bearer from an InfiniBand interface + * disable_media - detach TIPC bearer from an InfiniBand interface * * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away, * then get worker thread to complete bearer cleanup. (Can't do cleanup * here because cleanup code needs to sleep and caller holds spinlocks.) */ -static void disable_bearer(struct tipc_bearer *tb_ptr) +static void disable_media(struct tipc_bearer *tb_ptr) { - struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle; + struct ib_media *ib_ptr = (struct ib_media *)tb_ptr->usr_handle; ib_ptr->bearer = NULL; INIT_WORK(&ib_ptr->cleanup, cleanup_bearer); @@ -238,8 +238,8 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ib_bearer *ib_ptr = &ib_bearers[0]; - struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; + struct ib_media *ib_ptr = &ib_media_array[0]; + struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA]; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -258,17 +258,17 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, if (netif_carrier_ok(dev)) tipc_continue(ib_ptr->bearer); else - tipc_block_bearer(ib_ptr->bearer->name); + tipc_block_bearer(ib_ptr->bearer); break; case NETDEV_UP: tipc_continue(ib_ptr->bearer); break; case NETDEV_DOWN: - tipc_block_bearer(ib_ptr->bearer->name); + tipc_block_bearer(ib_ptr->bearer); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: - tipc_block_bearer(ib_ptr->bearer->name); + tipc_block_bearer(ib_ptr->bearer); tipc_continue(ib_ptr->bearer); break; case NETDEV_UNREGISTER: @@ -323,8 +323,8 @@ static int ib_msg2addr(const struct tipc_bearer *tb_ptr, */ static struct tipc_media ib_media_info = { .send_msg = send_msg, - .enable_bearer = enable_bearer, - .disable_bearer = disable_bearer, + .enable_media = enable_media, + .disable_media = disable_media, .addr2str = ib_addr2str, .addr2msg = ib_addr2msg, .msg2addr = ib_msg2addr, diff --git a/net/tipc/link.c b/net/tipc/link.c index 0cc3d9015c5d..54163f91b8ae 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -75,20 +75,6 @@ static const char *link_unk_evt = "Unknown link event "; */ #define START_CHANGEOVER 100000u -/** - * struct tipc_link_name - deconstructed link name - * @addr_local: network address of node at this end - * @if_local: name of interface at this end - * @addr_peer: network address of node at far end - * @if_peer: name of interface at far end - */ -struct tipc_link_name { - u32 addr_local; - char if_local[TIPC_MAX_IF_NAME]; - u32 addr_peer; - char if_peer[TIPC_MAX_IF_NAME]; -}; - static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf); static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); @@ -97,8 +83,7 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr, static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - u32 destnode); + unsigned int len, u32 destnode); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); @@ -161,72 +146,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr) } /** - * link_name_validate - validate & (optionally) deconstruct tipc_link name - * @name: ptr to link name string - * @name_parts: ptr to area for link name components (or NULL if not needed) - * - * Returns 1 if link name is valid, otherwise 0. - */ -static int link_name_validate(const char *name, - struct tipc_link_name *name_parts) -{ - char name_copy[TIPC_MAX_LINK_NAME]; - char *addr_local; - char *if_local; - char *addr_peer; - char *if_peer; - char dummy; - u32 z_local, c_local, n_local; - u32 z_peer, c_peer, n_peer; - u32 if_local_len; - u32 if_peer_len; - - /* copy link name & ensure length is OK */ - name_copy[TIPC_MAX_LINK_NAME - 1] = 0; - /* need above in case non-Posix strncpy() doesn't pad with nulls */ - strncpy(name_copy, name, TIPC_MAX_LINK_NAME); - if (name_copy[TIPC_MAX_LINK_NAME - 1] != 0) - return 0; - - /* ensure all component parts of link name are present */ - addr_local = name_copy; - if_local = strchr(addr_local, ':'); - if (if_local == NULL) - return 0; - *(if_local++) = 0; - addr_peer = strchr(if_local, '-'); - if (addr_peer == NULL) - return 0; - *(addr_peer++) = 0; - if_local_len = addr_peer - if_local; - if_peer = strchr(addr_peer, ':'); - if (if_peer == NULL) - return 0; - *(if_peer++) = 0; - if_peer_len = strlen(if_peer) + 1; - - /* validate component parts of link name */ - if ((sscanf(addr_local, "%u.%u.%u%c", - &z_local, &c_local, &n_local, &dummy) != 3) || - (sscanf(addr_peer, "%u.%u.%u%c", - &z_peer, &c_peer, &n_peer, &dummy) != 3) || - (z_local > 255) || (c_local > 4095) || (n_local > 4095) || - (z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) || - (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) || - (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME)) - return 0; - - /* return link name components, if necessary */ - if (name_parts) { - name_parts->addr_local = tipc_addr(z_local, c_local, n_local); - strcpy(name_parts->if_local, if_local); - name_parts->addr_peer = tipc_addr(z_peer, c_peer, n_peer); - strcpy(name_parts->if_peer, if_peer); - } - return 1; -} - -/** * link_timeout - handle expiration of link timer * @l_ptr: pointer to link * @@ -1065,8 +984,7 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, */ int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, - const u32 num_sect, unsigned int total_len, - u32 destaddr) + unsigned int len, u32 destaddr) { struct tipc_msg *hdr = &sender->phdr; struct tipc_link *l_ptr; @@ -1080,8 +998,7 @@ again: * Try building message using port's max_pkt hint. * (Must not hold any locks while building message.) */ - res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, - sender->max_pkt, &buf); + res = tipc_msg_build(hdr, msg_sect, len, sender->max_pkt, &buf); /* Exit if build request was invalid */ if (unlikely(res < 0)) return res; @@ -1121,8 +1038,7 @@ exit: if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) goto again; - return link_send_sections_long(sender, msg_sect, - num_sect, total_len, + return link_send_sections_long(sender, msg_sect, len, destaddr); } tipc_node_unlock(node); @@ -1133,8 +1049,8 @@ exit: if (buf) return tipc_reject_msg(buf, TIPC_ERR_NO_NODE); if (res >= 0) - return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NODE); + return tipc_port_reject_sections(sender, hdr, msg_sect, + len, TIPC_ERR_NO_NODE); return res; } @@ -1154,18 +1070,17 @@ exit: */ static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - u32 destaddr) + unsigned int len, u32 destaddr) { struct tipc_link *l_ptr; struct tipc_node *node; struct tipc_msg *hdr = &sender->phdr; - u32 dsz = total_len; + u32 dsz = len; u32 max_pkt, fragm_sz, rest; struct tipc_msg fragm_hdr; struct sk_buff *buf, *buf_chain, *prev; u32 fragm_crs, fragm_rest, hsz, sect_rest; - const unchar *sect_crs; + const unchar __user *sect_crs; int curr_sect; u32 fragm_no; int res = 0; @@ -1207,7 +1122,7 @@ again: if (!sect_rest) { sect_rest = msg_sect[++curr_sect].iov_len; - sect_crs = (const unchar *)msg_sect[curr_sect].iov_base; + sect_crs = msg_sect[curr_sect].iov_base; } if (sect_rest < fragm_rest) @@ -1283,8 +1198,8 @@ reject: buf = buf_chain->next; kfree_skb(buf_chain); } - return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NODE); + return tipc_port_reject_sections(sender, hdr, msg_sect, + len, TIPC_ERR_NO_NODE); } /* Append chain of fragments to send queue & send them */ @@ -1592,15 +1507,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) /* Ensure bearer is still enabled */ if (unlikely(!b_ptr->active)) - goto cont; + goto discard; /* Ensure message is well-formed */ if (unlikely(!link_recv_buf_validate(buf))) - goto cont; + goto discard; /* Ensure message data is a single contiguous unit */ if (unlikely(skb_linearize(buf))) - goto cont; + goto discard; /* Handle arrival of a non-unicast link message */ msg = buf_msg(buf); @@ -1616,20 +1531,18 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) /* Discard unicast link messages destined for another node */ if (unlikely(!msg_short(msg) && (msg_destnode(msg) != tipc_own_addr))) - goto cont; + goto discard; /* Locate neighboring node that sent message */ n_ptr = tipc_node_find(msg_prevnode(msg)); if (unlikely(!n_ptr)) - goto cont; + goto discard; tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; - if (unlikely(!l_ptr)) { - tipc_node_unlock(n_ptr); - goto cont; - } + if (unlikely(!l_ptr)) + goto unlock_discard; /* Verify that communication with node is currently allowed */ if ((n_ptr->block_setup & WAIT_PEER_DOWN) && @@ -1639,10 +1552,8 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) !msg_redundant_link(msg)) n_ptr->block_setup &= ~WAIT_PEER_DOWN; - if (n_ptr->block_setup) { - tipc_node_unlock(n_ptr); - goto cont; - } + if (n_ptr->block_setup) + goto unlock_discard; /* Validate message sequence number info */ seq_no = msg_seqno(msg); @@ -1678,98 +1589,97 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) /* Now (finally!) process the incoming message */ protocol_check: - if (likely(link_working_working(l_ptr))) { - if (likely(seq_no == mod(l_ptr->next_in_no))) { - l_ptr->next_in_no++; - if (unlikely(l_ptr->oldest_deferred_in)) - head = link_insert_deferred_queue(l_ptr, - head); -deliver: - if (likely(msg_isdata(msg))) { - tipc_node_unlock(n_ptr); - tipc_port_recv_msg(buf); - continue; - } - switch (msg_user(msg)) { - int ret; - case MSG_BUNDLER: - l_ptr->stats.recv_bundles++; - l_ptr->stats.recv_bundled += - msg_msgcnt(msg); - tipc_node_unlock(n_ptr); - tipc_link_recv_bundle(buf); - continue; - case NAME_DISTRIBUTOR: - n_ptr->bclink.recv_permitted = true; - tipc_node_unlock(n_ptr); - tipc_named_recv(buf); - continue; - case BCAST_PROTOCOL: - tipc_link_recv_sync(n_ptr, buf); - tipc_node_unlock(n_ptr); - continue; - case CONN_MANAGER: - tipc_node_unlock(n_ptr); - tipc_port_recv_proto_msg(buf); - continue; - case MSG_FRAGMENTER: - l_ptr->stats.recv_fragments++; - ret = tipc_link_recv_fragment( - &l_ptr->defragm_buf, - &buf, &msg); - if (ret == 1) { - l_ptr->stats.recv_fragmented++; - goto deliver; - } - if (ret == -1) - l_ptr->next_in_no--; - break; - case CHANGEOVER_PROTOCOL: - type = msg_type(msg); - if (link_recv_changeover_msg(&l_ptr, - &buf)) { - msg = buf_msg(buf); - seq_no = msg_seqno(msg); - if (type == ORIGINAL_MSG) - goto deliver; - goto protocol_check; - } - break; - default: - kfree_skb(buf); - buf = NULL; - break; - } + if (unlikely(!link_working_working(l_ptr))) { + if (msg_user(msg) == LINK_PROTOCOL) { + link_recv_proto_msg(l_ptr, buf); + head = link_insert_deferred_queue(l_ptr, head); tipc_node_unlock(n_ptr); - tipc_net_route_msg(buf); continue; } + + /* Traffic message. Conditionally activate link */ + link_state_event(l_ptr, TRAFFIC_MSG_EVT); + + if (link_working_working(l_ptr)) { + /* Re-insert buffer in front of queue */ + buf->next = head; + head = buf; + tipc_node_unlock(n_ptr); + continue; + } + goto unlock_discard; + } + + /* Link is now in state WORKING_WORKING */ + if (unlikely(seq_no != mod(l_ptr->next_in_no))) { link_handle_out_of_seq_msg(l_ptr, buf); head = link_insert_deferred_queue(l_ptr, head); tipc_node_unlock(n_ptr); continue; } - - /* Link is not in state WORKING_WORKING */ - if (msg_user(msg) == LINK_PROTOCOL) { - link_recv_proto_msg(l_ptr, buf); + l_ptr->next_in_no++; + if (unlikely(l_ptr->oldest_deferred_in)) head = link_insert_deferred_queue(l_ptr, head); +deliver: + if (likely(msg_isdata(msg))) { tipc_node_unlock(n_ptr); + tipc_port_recv_msg(buf); continue; } - - /* Traffic message. Conditionally activate link */ - link_state_event(l_ptr, TRAFFIC_MSG_EVT); - - if (link_working_working(l_ptr)) { - /* Re-insert buffer in front of queue */ - buf->next = head; - head = buf; + switch (msg_user(msg)) { + int ret; + case MSG_BUNDLER: + l_ptr->stats.recv_bundles++; + l_ptr->stats.recv_bundled += msg_msgcnt(msg); + tipc_node_unlock(n_ptr); + tipc_link_recv_bundle(buf); + continue; + case NAME_DISTRIBUTOR: + n_ptr->bclink.recv_permitted = true; tipc_node_unlock(n_ptr); + tipc_named_recv(buf); continue; + case BCAST_PROTOCOL: + tipc_link_recv_sync(n_ptr, buf); + tipc_node_unlock(n_ptr); + continue; + case CONN_MANAGER: + tipc_node_unlock(n_ptr); + tipc_port_recv_proto_msg(buf); + continue; + case MSG_FRAGMENTER: + l_ptr->stats.recv_fragments++; + ret = tipc_link_recv_fragment(&l_ptr->defragm_buf, + &buf, &msg); + if (ret == 1) { + l_ptr->stats.recv_fragmented++; + goto deliver; + } + if (ret == -1) + l_ptr->next_in_no--; + break; + case CHANGEOVER_PROTOCOL: + type = msg_type(msg); + if (link_recv_changeover_msg(&l_ptr, &buf)) { + msg = buf_msg(buf); + seq_no = msg_seqno(msg); + if (type == ORIGINAL_MSG) + goto deliver; + goto protocol_check; + } + break; + default: + kfree_skb(buf); + buf = NULL; + break; } tipc_node_unlock(n_ptr); -cont: + tipc_net_route_msg(buf); + continue; +unlock_discard: + + tipc_node_unlock(n_ptr); +discard: kfree_skb(buf); } read_unlock_bh(&tipc_net_lock); @@ -2585,25 +2495,21 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) static struct tipc_link *link_find_link(const char *name, struct tipc_node **node) { - struct tipc_link_name link_name_parts; - struct tipc_bearer *b_ptr; struct tipc_link *l_ptr; + struct tipc_node *n_ptr; + int i; - if (!link_name_validate(name, &link_name_parts)) - return NULL; - - b_ptr = tipc_bearer_find_interface(link_name_parts.if_local); - if (!b_ptr) - return NULL; - - *node = tipc_node_find(link_name_parts.addr_peer); - if (!*node) - return NULL; - - l_ptr = (*node)->links[b_ptr->identity]; - if (!l_ptr || strcmp(l_ptr->name, name)) - return NULL; - + list_for_each_entry(n_ptr, &tipc_node_list, list) { + for (i = 0; i < MAX_BEARERS; i++) { + l_ptr = n_ptr->links[i]; + if (l_ptr && !strcmp(l_ptr->name, name)) + goto found; + } + } + l_ptr = NULL; + n_ptr = NULL; +found: + *node = n_ptr; return l_ptr; } @@ -2646,6 +2552,7 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) struct tipc_link *l_ptr; struct tipc_bearer *b_ptr; struct tipc_media *m_ptr; + int res = 0; l_ptr = link_find_link(name, &node); if (l_ptr) { @@ -2668,9 +2575,12 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) case TIPC_CMD_SET_LINK_WINDOW: tipc_link_set_queue_limits(l_ptr, new_value); break; + default: + res = -EINVAL; + break; } tipc_node_unlock(node); - return 0; + return res; } b_ptr = tipc_bearer_find(name); @@ -2678,15 +2588,18 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) switch (cmd) { case TIPC_CMD_SET_LINK_TOL: b_ptr->tolerance = new_value; - return 0; + break; case TIPC_CMD_SET_LINK_PRI: b_ptr->priority = new_value; - return 0; + break; case TIPC_CMD_SET_LINK_WINDOW: b_ptr->window = new_value; - return 0; + break; + default: + res = -EINVAL; + break; } - return -EINVAL; + return res; } m_ptr = tipc_media_find(name); @@ -2695,15 +2608,18 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) switch (cmd) { case TIPC_CMD_SET_LINK_TOL: m_ptr->tolerance = new_value; - return 0; + break; case TIPC_CMD_SET_LINK_PRI: m_ptr->priority = new_value; - return 0; + break; case TIPC_CMD_SET_LINK_WINDOW: m_ptr->window = new_value; - return 0; + break; + default: + res = -EINVAL; + break; } - return -EINVAL; + return res; } struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, diff --git a/net/tipc/link.h b/net/tipc/link.h index c048ed1cbd76..55cf8554a08b 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -227,9 +227,7 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, - const u32 num_sect, - unsigned int total_len, - u32 destnode); + unsigned int len, u32 destnode); void tipc_link_recv_bundle(struct sk_buff *buf); int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ced60e2fc4f7..e525f8ce1dee 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -73,13 +73,13 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, * Returns message data size or errno */ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, int max_size, - struct sk_buff **buf) + unsigned int len, int max_size, struct sk_buff **buf) { - int dsz, sz, hsz, pos, res, cnt; + int dsz, sz, hsz; + unsigned char *to; - dsz = total_len; - pos = hsz = msg_hdr_sz(hdr); + dsz = len; + hsz = msg_hdr_sz(hdr); sz = hsz + dsz; msg_set_size(hdr, sz); if (unlikely(sz > max_size)) { @@ -91,16 +91,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, if (!(*buf)) return -ENOMEM; skb_copy_to_linear_data(*buf, hdr, hsz); - for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { - skb_copy_to_linear_data_offset(*buf, pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); - pos += msg_sect[cnt].iov_len; + to = (*buf)->data + hsz; + if (len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) { + kfree_skb(*buf); + *buf = NULL; + return -EFAULT; } - if (likely(res)) - return dsz; - - kfree_skb(*buf); - *buf = NULL; - return -EFAULT; + return dsz; } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 5e4ccf5c27df..559b73a9bf35 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -722,6 +722,5 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m); void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, int max_size, - struct sk_buff **buf); + unsigned int len, int max_size, struct sk_buff **buf); #endif diff --git a/net/tipc/port.c b/net/tipc/port.c index b3ed2fcab4fb..c081a7632302 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -90,8 +90,7 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg) * tipc_multicast - send a multicast message to local and remote destinations */ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, - u32 num_sect, struct iovec const *msg_sect, - unsigned int total_len) + struct iovec const *msg_sect, unsigned int len) { struct tipc_msg *hdr; struct sk_buff *buf; @@ -114,8 +113,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, msg_set_namelower(hdr, seq->lower); msg_set_nameupper(hdr, seq->upper); msg_set_hdr_sz(hdr, MCAST_H_SIZE); - res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - &buf); + res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); if (unlikely(!buf)) return res; @@ -436,14 +434,13 @@ exit: } int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, u32 num_sect, - unsigned int total_len, int err) + struct iovec const *msg_sect, unsigned int len, + int err) { struct sk_buff *buf; int res; - res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - &buf); + res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); if (!buf) return res; @@ -918,15 +915,14 @@ int tipc_port_recv_msg(struct sk_buff *buf) * tipc_port_recv_sections(): Concatenate and deliver sectioned * message for this node. */ -static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect, +static int tipc_port_recv_sections(struct tipc_port *sender, struct iovec const *msg_sect, - unsigned int total_len) + unsigned int len) { struct sk_buff *buf; int res; - res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len, - MAX_MSG_SIZE, &buf); + res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); if (likely(buf)) tipc_port_recv_msg(buf); return res; @@ -935,8 +931,7 @@ static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_se /** * tipc_send - send message sections on connection */ -int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len) +int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len) { struct tipc_port *p_ptr; u32 destnode; @@ -950,11 +945,10 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect, if (!tipc_port_congested(p_ptr)) { destnode = port_peernode(p_ptr); if (likely(!in_own_node(destnode))) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, - total_len, destnode); + res = tipc_link_send_sections_fast(p_ptr, msg_sect, + len, destnode); else - res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect, - total_len); + res = tipc_port_recv_sections(p_ptr, msg_sect, len); if (likely(res != -ELINKCONG)) { p_ptr->congested = 0; @@ -965,7 +959,7 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect, } if (port_unreliable(p_ptr)) { p_ptr->congested = 0; - return total_len; + return len; } return -ELINKCONG; } @@ -974,8 +968,7 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect, * tipc_send2name - send message sections to port name */ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len) + struct iovec const *msg_sect, unsigned int len) { struct tipc_port *p_ptr; struct tipc_msg *msg; @@ -999,36 +992,32 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, if (likely(destport || destnode)) { if (likely(in_own_node(destnode))) - res = tipc_port_recv_sections(p_ptr, num_sect, - msg_sect, total_len); + res = tipc_port_recv_sections(p_ptr, msg_sect, len); else if (tipc_own_addr) res = tipc_link_send_sections_fast(p_ptr, msg_sect, - num_sect, total_len, - destnode); + len, destnode); else res = tipc_port_reject_sections(p_ptr, msg, msg_sect, - num_sect, total_len, - TIPC_ERR_NO_NODE); + len, TIPC_ERR_NO_NODE); if (likely(res != -ELINKCONG)) { if (res > 0) p_ptr->sent++; return res; } if (port_unreliable(p_ptr)) { - return total_len; + return len; } return -ELINKCONG; } - return tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NAME); + return tipc_port_reject_sections(p_ptr, msg, msg_sect, len, + TIPC_ERR_NO_NAME); } /** * tipc_send2port - send message sections to port identity */ int tipc_send2port(u32 ref, struct tipc_portid const *dest, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len) + struct iovec const *msg_sect, unsigned int len) { struct tipc_port *p_ptr; struct tipc_msg *msg; @@ -1046,21 +1035,20 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, msg_set_hdr_sz(msg, BASIC_H_SIZE); if (in_own_node(dest->node)) - res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect, - total_len); + res = tipc_port_recv_sections(p_ptr, msg_sect, len); else if (tipc_own_addr) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, - total_len, dest->node); + res = tipc_link_send_sections_fast(p_ptr, msg_sect, len, + dest->node); else - res = tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NODE); + res = tipc_port_reject_sections(p_ptr, msg, msg_sect, len, + TIPC_ERR_NO_NODE); if (likely(res != -ELINKCONG)) { if (res > 0) p_ptr->sent++; return res; } if (port_unreliable(p_ptr)) { - return total_len; + return len; } return -ELINKCONG; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 5a7026b9c345..912253597343 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -151,24 +151,20 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); * TIPC messaging routines */ int tipc_port_recv_msg(struct sk_buff *buf); -int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len); +int tipc_send(u32 portref, struct iovec const *msg_sect, unsigned int len); int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len); + struct iovec const *msg_sect, unsigned int len); int tipc_send2port(u32 portref, struct tipc_portid const *dest, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len); + struct iovec const *msg_sect, unsigned int len); int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, - unsigned int section_count, struct iovec const *msg, - unsigned int total_len); + struct iovec const *msg, unsigned int len); int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, u32 num_sect, - unsigned int total_len, int err); + struct iovec const *msg_sect, unsigned int len, + int err); struct sk_buff *tipc_port_get_ports(void); void tipc_port_recv_proto_msg(struct sk_buff *buf); void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6cc7ddd2fb7c..3906527259d1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -338,7 +338,7 @@ static int release(struct socket *sock) buf = __skb_dequeue(&sk->sk_receive_queue); if (buf == NULL) break; - if (TIPC_SKB_CB(buf)->handle != 0) + if (TIPC_SKB_CB(buf)->handle != NULL) kfree_skb(buf); else { if ((sock->state == SS_CONNECTING) || @@ -622,13 +622,11 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = tipc_send2name(tport->ref, &dest->addr.name.name, dest->addr.name.domain, - m->msg_iovlen, m->msg_iov, total_len); } else if (dest->addrtype == TIPC_ADDR_ID) { res = tipc_send2port(tport->ref, &dest->addr.id, - m->msg_iovlen, m->msg_iov, total_len); } else if (dest->addrtype == TIPC_ADDR_MCAST) { @@ -641,7 +639,6 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, break; res = tipc_multicast(tport->ref, &dest->addr.nameseq, - m->msg_iovlen, m->msg_iov, total_len); } @@ -707,8 +704,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, break; } - res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov, - total_len); + res = tipc_send(tport->ref, m->msg_iov, total_len); if (likely(res != -ELINKCONG)) break; if (timeout_val <= 0L) { @@ -1368,7 +1364,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) return TIPC_ERR_OVERLOAD; /* Enqueue message */ - TIPC_SKB_CB(buf)->handle = 0; + TIPC_SKB_CB(buf)->handle = NULL; __skb_queue_tail(&sk->sk_receive_queue, buf); skb_set_owner_r(buf, sk); @@ -1691,7 +1687,7 @@ restart: /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ buf = __skb_dequeue(&sk->sk_receive_queue); if (buf) { - if (TIPC_SKB_CB(buf)->handle != 0) { + if (TIPC_SKB_CB(buf)->handle != NULL) { kfree_skb(buf); goto restart; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 86de99ad2976..c1f403bed683 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1246,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) return 0; } +static void unix_sock_inherit_flags(const struct socket *old, + struct socket *new) +{ + if (test_bit(SOCK_PASSCRED, &old->flags)) + set_bit(SOCK_PASSCRED, &new->flags); + if (test_bit(SOCK_PASSSEC, &old->flags)) + set_bit(SOCK_PASSSEC, &new->flags); +} + static int unix_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk = sock->sk; @@ -1280,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) /* attach accepted sock to socket */ unix_state_lock(tsk); newsock->state = SS_CONNECTED; + unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h index 1e743d214856..5dcd9c067bf0 100644 --- a/net/wimax/wimax-internal.h +++ b/net/wimax/wimax-internal.h @@ -63,11 +63,11 @@ void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state) { wimax_dev->state = state; } -extern void __wimax_state_change(struct wimax_dev *, enum wimax_st); +void __wimax_state_change(struct wimax_dev *, enum wimax_st); #ifdef CONFIG_DEBUG_FS -extern int wimax_debugfs_add(struct wimax_dev *); -extern void wimax_debugfs_rm(struct wimax_dev *); +int wimax_debugfs_add(struct wimax_dev *); +void wimax_debugfs_rm(struct wimax_dev *); #else static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev) { @@ -76,13 +76,13 @@ static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev) static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {} #endif -extern void wimax_id_table_add(struct wimax_dev *); -extern struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int); -extern void wimax_id_table_rm(struct wimax_dev *); -extern void wimax_id_table_release(void); +void wimax_id_table_add(struct wimax_dev *); +struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int); +void wimax_id_table_rm(struct wimax_dev *); +void wimax_id_table_release(void); -extern int wimax_rfkill_add(struct wimax_dev *); -extern void wimax_rfkill_rm(struct wimax_dev *); +int wimax_rfkill_add(struct wimax_dev *); +void wimax_rfkill_rm(struct wimax_dev *); extern struct genl_family wimax_gnl_family; extern struct genl_multicast_group wimax_gnl_mcg; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 50f6195c8b70..16f3c3a7b2c1 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -328,6 +328,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, width); } +EXPORT_SYMBOL(cfg80211_chandef_dfs_required); static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, diff --git a/net/wireless/core.c b/net/wireless/core.c index fe8d4f2be49b..aff959e5a1b3 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -958,8 +958,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) return notifier_from_errno(-EOPNOTSUPP); - if (rfkill_blocked(rdev->rfkill)) - return notifier_from_errno(-ERFKILL); ret = cfg80211_can_add_interface(rdev, wdev->iftype); if (ret) return notifier_from_errno(ret); diff --git a/net/wireless/core.h b/net/wireless/core.h index 9ad43c619c54..af10e59af2d8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -234,10 +234,10 @@ struct cfg80211_beacon_registration { }; /* free object */ -extern void cfg80211_dev_free(struct cfg80211_registered_device *rdev); +void cfg80211_dev_free(struct cfg80211_registered_device *rdev); -extern int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, - char *newname); +int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, + char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); @@ -382,15 +382,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode, u8 radar_detect); -/** - * cfg80211_chandef_dfs_required - checks if radar detection is required - * @wiphy: the wiphy to validate against - * @chandef: the channel definition to check - * Return: 1 if radar detection is required, 0 if it is not, < 0 on error - */ -int cfg80211_chandef_dfs_required(struct wiphy *wiphy, - const struct cfg80211_chan_def *c); - void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); @@ -411,6 +402,9 @@ static inline int cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; + return cfg80211_can_change_interface(rdev, NULL, iftype); } diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 90d050036624..454157717efa 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -47,17 +47,19 @@ static int ht_print_chan(struct ieee80211_channel *chan, return 0; if (chan->flags & IEEE80211_CHAN_DISABLED) - return snprintf(buf + offset, - buf_size - offset, - "%d Disabled\n", - chan->center_freq); - - return snprintf(buf + offset, - buf_size - offset, - "%d HT40 %c%c\n", - chan->center_freq, - (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) ? ' ' : '-', - (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) ? ' ' : '+'); + return scnprintf(buf + offset, + buf_size - offset, + "%d Disabled\n", + chan->center_freq); + + return scnprintf(buf + offset, + buf_size - offset, + "%d HT40 %c%c\n", + chan->center_freq, + (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) ? + ' ' : '-', + (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) ? + ' ' : '+'); } static ssize_t ht40allow_map_read(struct file *file, diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index 9392f8cbb901..42ed274e81f4 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -46,6 +46,12 @@ BEGIN { sub(/:/, "", country) printf "static const struct ieee80211_regdomain regdom_%s = {\n", country printf "\t.alpha2 = \"%s\",\n", country + if ($NF ~ /DFS-ETSI/) + printf "\t.dfs_region = NL80211_DFS_ETSI,\n" + else if ($NF ~ /DFS-FCC/) + printf "\t.dfs_region = NL80211_DFS_FCC,\n" + else if ($NF ~ /DFS-JP/) + printf "\t.dfs_region = NL80211_DFS_JP,\n" printf "\t.reg_rules = {\n" active = 1 regdb = regdb "\t®dom_" country ",\n" diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 626dc3b5fd8d..cbbef88a8ebd 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5591,6 +5591,9 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (err) return err; + if (netif_carrier_ok(dev)) + return -EBUSY; + if (wdev->cac_started) return -EBUSY; @@ -5634,15 +5637,26 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1]; u8 radar_detect_width = 0; int err; + bool need_new_beacon = false; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) return -EOPNOTSUPP; - /* may add IBSS support later */ - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + need_new_beacon = true; + + /* useless if AP is not running */ + if (!wdev->beacon_interval) + return -EINVAL; + break; + case NL80211_IFTYPE_ADHOC: + break; + default: return -EOPNOTSUPP; + } memset(¶ms, 0, sizeof(params)); @@ -5651,15 +5665,16 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) return -EINVAL; /* only important for AP, IBSS and mesh create IEs internally */ - if (!info->attrs[NL80211_ATTR_CSA_IES]) - return -EINVAL; - - /* useless if AP is not running */ - if (!wdev->beacon_interval) + if (need_new_beacon && + (!info->attrs[NL80211_ATTR_CSA_IES] || + !info->attrs[NL80211_ATTR_CSA_C_OFF_BEACON])) return -EINVAL; params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); + if (!need_new_beacon) + goto skip_beacons; + err = nl80211_parse_beacon(info->attrs, ¶ms.beacon_after); if (err) return err; @@ -5699,6 +5714,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } +skip_beacons: err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); if (err) return err; @@ -5706,12 +5722,17 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); - if (err < 0) { - return err; - } else if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; + /* DFS channels are only supported for AP/P2P GO ... for now. */ + if (dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) { + err = cfg80211_chandef_dfs_required(wdev->wiphy, + ¶ms.chandef); + if (err < 0) { + return err; + } else if (err) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; + } } err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, @@ -10740,7 +10761,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev_lock(wdev); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_ADHOC)) goto out; wdev->channel = chandef->chan; diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 7d604c06c3dc..a271c27fac77 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -97,6 +97,10 @@ int ieee80211_radiotap_iterator_init( struct ieee80211_radiotap_header *radiotap_header, int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns) { + /* check the radiotap header can actually be present */ + if (max_length < sizeof(struct ieee80211_radiotap_header)) + return -EINVAL; + /* Linux only supports version 0 radiotap format */ if (radiotap_header->it_version) return -EINVAL; @@ -131,7 +135,8 @@ int ieee80211_radiotap_iterator_init( */ if ((unsigned long)iterator->_arg - - (unsigned long)iterator->_rtheader > + (unsigned long)iterator->_rtheader + + sizeof(uint32_t) > (unsigned long)iterator->_max_length) return -EINVAL; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index de06d5d1287f..d62cb1e91475 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -172,11 +172,21 @@ static const struct ieee80211_regdomain world_regdom = { NL80211_RRF_NO_IBSS | NL80211_RRF_NO_OFDM), /* IEEE 802.11a, channel 36..48 */ - REG_RULE(5180-10, 5240+10, 80, 6, 20, + REG_RULE(5180-10, 5240+10, 160, 6, 20, NL80211_RRF_PASSIVE_SCAN | NL80211_RRF_NO_IBSS), - /* NB: 5260 MHz - 5700 MHz requires DFS */ + /* IEEE 802.11a, channel 52..64 - DFS required */ + REG_RULE(5260-10, 5320+10, 160, 6, 20, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + + /* IEEE 802.11a, channel 100..144 - DFS required */ + REG_RULE(5500-10, 5720+10, 160, 6, 20, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), /* IEEE 802.11a, channel 149..165 */ REG_RULE(5745-10, 5825+10, 80, 6, 20, diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h index 65acbebd3711..b533ed71daff 100644 --- a/net/wireless/sysfs.h +++ b/net/wireless/sysfs.h @@ -1,8 +1,8 @@ #ifndef __WIRELESS_SYSFS_H #define __WIRELESS_SYSFS_H -extern int wiphy_sysfs_init(void); -extern void wiphy_sysfs_exit(void); +int wiphy_sysfs_init(void); +void wiphy_sysfs_exit(void); extern struct class ieee80211_class; diff --git a/net/wireless/util.c b/net/wireless/util.c index ce090c1c5e4f..3c8be6104ba4 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -10,6 +10,7 @@ #include <net/cfg80211.h> #include <net/ip.h> #include <net/dsfield.h> +#include <linux/if_vlan.h> #include "core.h" #include "rdev-ops.h" @@ -691,6 +692,7 @@ EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); unsigned int cfg80211_classify8021d(struct sk_buff *skb) { unsigned int dscp; + unsigned char vlan_priority; /* skb->priority values from 256->263 are magic values to * directly indicate a specific 802.1d priority. This is used @@ -700,6 +702,13 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb) if (skb->priority >= 256 && skb->priority <= 263) return skb->priority - 256; + if (vlan_tx_tag_present(skb)) { + vlan_priority = (vlan_tx_tag_get(skb) & VLAN_PRIO_MASK) + >> VLAN_PRIO_SHIFT; + if (vlan_priority > 0) + return vlan_priority; + } + switch (skb->protocol) { case htons(ETH_P_IP): dscp = ipv4_get_dsfield(ip_hdr(skb)) & 0xfc; diff --git a/net/x25/Kconfig b/net/x25/Kconfig index c959312c45e3..e2fa133f9fba 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -16,8 +16,8 @@ config X25 if you want that) and the lower level data link layer protocol LAPB (say Y to "LAPB Data Link Driver" below if you want that). - You can read more about X.25 at <http://www.sangoma.com/x25.htm> and - <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>. + You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and + <http://docwiki.cisco.com/wiki/X.25>. Information about X.25 for Linux is contained in the files <file:Documentation/networking/x25.txt> and <file:Documentation/networking/x25-iface.txt>. diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 716502ada53b..0622d319e1f2 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -130,7 +130,7 @@ static inline unsigned int __addr_hash(const xfrm_address_t *daddr, return h & hmask; } -extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); -extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); +struct hlist_head *xfrm_hash_alloc(unsigned int sz); +void xfrm_hash_free(struct hlist_head *n, unsigned int sz); #endif /* _XFRM_HASH_H */ diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2906d520eea7..ccfdc7115a83 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -141,14 +141,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + struct crypto_comp *tfm; + u8 *scratch; int err; local_bh_disable(); + scratch = *this_cpu_ptr(ipcomp_scratches); + tfm = *this_cpu_ptr(ipcd->tfms); err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); if (err) goto out; @@ -158,13 +158,13 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) } memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); + local_bh_enable(); pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); return 0; out: - put_cpu(); + local_bh_enable(); return err; } @@ -220,8 +220,8 @@ static void ipcomp_free_scratches(void) static void * __percpu *ipcomp_alloc_scratches(void) { - int i; void * __percpu *scratches; + int i; if (ipcomp_scratch_users++) return ipcomp_scratches; @@ -233,7 +233,9 @@ static void * __percpu *ipcomp_alloc_scratches(void) ipcomp_scratches = scratches; for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); + void *scratch; + + scratch = vmalloc_node(IPCOMP_SCRATCH_SIZE, cpu_to_node(i)); if (!scratch) return NULL; *per_cpu_ptr(scratches, i) = scratch; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ed38d5d81f9e..9a91f7431c41 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -334,7 +334,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) atomic_inc(&policy->genid); - del_timer(&policy->polq.hold_timer); + if (del_timer(&policy->polq.hold_timer)) + xfrm_pol_put(policy); xfrm_queue_purge(&policy->polq.hold_queue); if (del_timer(&policy->timer)) @@ -589,7 +590,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice_init(&pq->hold_queue, &list); - del_timer(&pq->hold_timer); + if (del_timer(&pq->hold_timer)) + xfrm_pol_put(old); spin_unlock_bh(&pq->hold_queue.lock); if (skb_queue_empty(&list)) @@ -600,7 +602,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice(&list, &pq->hold_queue); pq->timeout = XFRM_QUEUE_TMO_MIN; - mod_timer(&pq->hold_timer, jiffies); + if (!mod_timer(&pq->hold_timer, jiffies)) + xfrm_pol_hold(new); spin_unlock_bh(&pq->hold_queue.lock); } @@ -1769,6 +1772,10 @@ static void xfrm_policy_queue_process(unsigned long arg) spin_lock(&pq->hold_queue.lock); skb = skb_peek(&pq->hold_queue); + if (!skb) { + spin_unlock(&pq->hold_queue.lock); + goto out; + } dst = skb_dst(skb); sk = skb->sk; xfrm_decode_session(skb, &fl, dst->ops->family); @@ -1787,8 +1794,9 @@ static void xfrm_policy_queue_process(unsigned long arg) goto purge_queue; pq->timeout = pq->timeout << 1; - mod_timer(&pq->hold_timer, jiffies + pq->timeout); - return; + if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) + xfrm_pol_hold(pol); + goto out; } dst_release(dst); @@ -1819,11 +1827,14 @@ static void xfrm_policy_queue_process(unsigned long arg) err = dst_output(skb); } +out: + xfrm_pol_put(pol); return; purge_queue: pq->timeout = 0; xfrm_queue_purge(&pq->hold_queue); + xfrm_pol_put(pol); } static int xdst_queue_output(struct sk_buff *skb) @@ -1831,7 +1842,15 @@ static int xdst_queue_output(struct sk_buff *skb) unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *) dst; - struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; + struct xfrm_policy *pol = xdst->pols[0]; + struct xfrm_policy_queue *pq = &pol->polq; + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) { + kfree_skb(skb); + return 0; + } if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { kfree_skb(skb); @@ -1850,10 +1869,12 @@ static int xdst_queue_output(struct sk_buff *skb) if (del_timer(&pq->hold_timer)) { if (time_before(pq->hold_timer.expires, sched_next)) sched_next = pq->hold_timer.expires; + xfrm_pol_put(pol); } __skb_queue_tail(&pq->hold_queue, skb); - mod_timer(&pq->hold_timer, sched_next); + if (!mod_timer(&pq->hold_timer, sched_next)) + xfrm_pol_hold(pol); spin_unlock_bh(&pq->hold_queue.lock); diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 8dafe6d3c6e4..dab57daae408 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -61,9 +61,9 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event) switch (event) { case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && - (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { + if (!x->replay_maxdiff || + ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) && + (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) { if (x->xflags & XFRM_TIME_DEFER) event = XFRM_REPLAY_TIMEOUT; else @@ -129,8 +129,7 @@ static int xfrm_replay_check(struct xfrm_state *x, return 0; diff = x->replay.seq - seq; - if (diff >= min_t(unsigned int, x->props.replay_window, - sizeof(x->replay.bitmap) * 8)) { + if (diff >= x->props.replay_window) { x->stats.replay_window++; goto err; } @@ -302,9 +301,10 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) switch (event) { case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && - (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) { + if (!x->replay_maxdiff || + ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && + (replay_esn->oseq - preplay_esn->oseq + < x->replay_maxdiff))) { if (x->xflags & XFRM_TIME_DEFER) event = XFRM_REPLAY_TIMEOUT; else @@ -353,28 +353,30 @@ static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) switch (event) { case XFRM_REPLAY_UPDATE: - if (!x->replay_maxdiff) - break; - - if (replay_esn->seq_hi == preplay_esn->seq_hi) - seq_diff = replay_esn->seq - preplay_esn->seq; - else - seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; - - if (replay_esn->oseq_hi == preplay_esn->oseq_hi) - oseq_diff = replay_esn->oseq - preplay_esn->oseq; - else - oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; - - if (seq_diff < x->replay_maxdiff && - oseq_diff < x->replay_maxdiff) { + if (x->replay_maxdiff) { + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = ~preplay_esn->seq + replay_esn->seq + + 1; - if (x->xflags & XFRM_TIME_DEFER) - event = XFRM_REPLAY_TIMEOUT; + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq + - preplay_esn->oseq; else - return; + oseq_diff = ~preplay_esn->oseq + + replay_esn->oseq + 1; + + if (seq_diff >= x->replay_maxdiff || + oseq_diff >= x->replay_maxdiff) + break; } + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + break; case XFRM_REPLAY_TIMEOUT: diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d6e7f98fbfbf..68c2f357a183 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -815,7 +815,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, xfrm_state_look_at(pol, x, fl, encap_family, &best, &acquire_in_progress, &error); } - if (best) + if (best || acquire_in_progress) goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); @@ -824,7 +824,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, encap_family) && + xfrm_addr_equal(&x->id.daddr, daddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3f565e495ac6..f964d4c00ffb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -446,7 +446,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->sel, &p->sel, sizeof(x->sel)); memcpy(&x->lft, &p->lft, sizeof(x->lft)); x->props.mode = p->mode; - x->props.replay_window = p->replay_window; + x->props.replay_window = min_t(unsigned int, p->replay_window, + sizeof(x->replay.bitmap) * 8); x->props.reqid = p->reqid; x->props.family = p->family; memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); @@ -1856,7 +1857,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (x->km.state != XFRM_STATE_VALID) goto out; - err = xfrm_replay_verify_len(x->replay_esn, rp); + err = xfrm_replay_verify_len(x->replay_esn, re); if (err) goto out; |