diff options
Diffstat (limited to 'drivers/net/bonding/bond_alb.c')
| -rw-r--r-- | drivers/net/bonding/bond_alb.c | 1115 |
1 files changed, 585 insertions, 530 deletions
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 4ea8ed150d46..2d37b07c8215 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1,27 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called LICENSE. - * */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -36,22 +17,14 @@ #include <linux/if_bonding.h> #include <linux/if_vlan.h> #include <linux/in.h> -#include <net/ipx.h> #include <net/arp.h> #include <net/ipv6.h> +#include <net/ndisc.h> #include <asm/byteorder.h> -#include "bonding.h" -#include "bond_alb.h" - +#include <net/bonding.h> +#include <net/bond_alb.h> - -#ifndef __long_aligned -#define __long_aligned __attribute__((aligned((sizeof(long))))) -#endif -static const u8 mac_bcast[ETH_ALEN] __long_aligned = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; -static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = { +static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 }; static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; @@ -77,13 +50,9 @@ struct arp_pkt { }; #pragma pack() -static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) -{ - return (struct arp_pkt *)skb_network_header(skb); -} - /* Forward declaration */ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); +static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[], + bool strict_match); static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); static void rlb_src_unlink(struct bonding *bond, u32 index); static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, @@ -94,36 +63,14 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size) int i; u8 hash = 0; - for (i = 0; i < hash_size; i++) { + for (i = 0; i < hash_size; i++) hash ^= hash_start[i]; - } return hash; } /*********************** tlb specific functions ***************************/ -static inline void _lock_tx_hashtbl_bh(struct bonding *bond) -{ - spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -static inline void _unlock_tx_hashtbl_bh(struct bonding *bond) -{ - spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -static inline void _lock_tx_hashtbl(struct bonding *bond) -{ - spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -static inline void _unlock_tx_hashtbl(struct bonding *bond) -{ - spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); -} - -/* Caller must hold tx_hashtbl lock */ static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) { if (save_load) { @@ -143,7 +90,6 @@ static inline void tlb_init_slave(struct slave *slave) SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; } -/* Caller must hold bond lock for read, BH disabled */ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) { @@ -158,6 +104,7 @@ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, index = SLAVE_TLB_INFO(slave).head; while (index != TLB_NULL_INDEX) { u32 next_index = tx_hash_table[index].next; + tlb_init_table_entry(&tx_hash_table[index], save_load); index = next_index; } @@ -166,13 +113,12 @@ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, tlb_init_slave(slave); } -/* Caller must hold bond lock for read */ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) { - _lock_tx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); __tlb_clear_slave(bond, slave, save_load); - _unlock_tx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /* Must be called before starting the monitor timer */ @@ -185,17 +131,16 @@ static int tlb_initialize(struct bonding *bond) new_hashtbl = kzalloc(size, GFP_KERNEL); if (!new_hashtbl) - return -1; + return -ENOMEM; - _lock_tx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); bond_info->tx_hashtbl = new_hashtbl; - for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) { + for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); - } - _unlock_tx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); return 0; } @@ -205,12 +150,12 @@ static void tlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - _lock_tx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); kfree(bond_info->tx_hashtbl); bond_info->tx_hashtbl = NULL; - _unlock_tx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } static long long compute_gap(struct slave *slave) @@ -219,19 +164,18 @@ static long long compute_gap(struct slave *slave) (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ } -/* Caller must hold bond lock for read */ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) { struct slave *slave, *least_loaded; + struct list_head *iter; long long max_gap; - int i; least_loaded = NULL; max_gap = LLONG_MIN; /* Find the slave with the largest gap */ - bond_for_each_slave(bond, slave, i) { - if (SLAVE_IS_OK(slave)) { + bond_for_each_slave_rcu(bond, slave, iter) { + if (bond_slave_can_tx(slave)) { long long gap = compute_gap(slave); if (max_gap < gap) { @@ -265,9 +209,8 @@ static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, hash_table[hash_index].next = next_index; hash_table[hash_index].prev = TLB_NULL_INDEX; - if (next_index != TLB_NULL_INDEX) { + if (next_index != TLB_NULL_INDEX) hash_table[next_index].prev = hash_index; - } slave_info->head = hash_index; slave_info->load += @@ -275,49 +218,29 @@ static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, } } - if (assigned_slave) { + if (assigned_slave) hash_table[hash_index].tx_bytes += skb_len; - } return assigned_slave; } -/* Caller must hold bond lock for read */ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) { struct slave *tx_slave; - /* - * We don't need to disable softirq here, becase + + /* We don't need to disable softirq here, because * tlb_choose_channel() is only called by bond_alb_xmit() * which already has softirq disabled. */ - _lock_tx_hashtbl(bond); + spin_lock(&bond->mode_lock); tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); - _unlock_tx_hashtbl(bond); + spin_unlock(&bond->mode_lock); + return tx_slave; } /*********************** rlb specific functions ***************************/ -static inline void _lock_rx_hashtbl_bh(struct bonding *bond) -{ - spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} - -static inline void _unlock_rx_hashtbl_bh(struct bonding *bond) -{ - spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} - -static inline void _lock_rx_hashtbl(struct bonding *bond) -{ - spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} - -static inline void _unlock_rx_hashtbl(struct bonding *bond) -{ - spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); -} /* when an ARP REPLY is received from a client update its info * in the rx_hashtbl @@ -328,9 +251,9 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); - hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); + hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); client_info = &(bond_info->rx_hashtbl[hash_index]); if ((client_info->assigned) && @@ -338,12 +261,12 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) (client_info->ip_dst == arp->ip_src) && (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) { /* update the clients MAC address */ - memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); + ether_addr_copy(client_info->mac_dst, arp->mac_src); client_info->ntt = 1; bond_info->rx_ntt = 1; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, @@ -373,40 +296,55 @@ static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, if (arp->op_code == htons(ARPOP_REPLY)) { /* update rx hash table for this ARP */ rlb_update_entry_from_arp(bond, arp); - pr_debug("Server received an ARP Reply from client\n"); + slave_dbg(bond->dev, slave->dev, "Server received an ARP Reply from client\n"); } out: return RX_HANDLER_ANOTHER; } -/* Caller must hold bond lock for read */ -static struct slave *rlb_next_rx_slave(struct bonding *bond) +/* Caller must hold rcu_read_lock() */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *rx_slave, *slave, *start_at; - int i = 0; - - if (bond_info->next_rx_slave) { - start_at = bond_info->next_rx_slave; - } else { - start_at = bond->first_slave; - } + struct slave *before = NULL, *rx_slave = NULL, *slave; + struct list_head *iter; + bool found = false; - rx_slave = NULL; - - bond_for_each_slave_from(bond, slave, i, start_at) { - if (SLAVE_IS_OK(slave)) { - if (!rx_slave) { - rx_slave = slave; - } else if (slave->speed > rx_slave->speed) { + bond_for_each_slave_rcu(bond, slave, iter) { + if (!bond_slave_can_tx(slave)) + continue; + if (!found) { + if (!before || before->speed < slave->speed) + before = slave; + } else { + if (!rx_slave || rx_slave->speed < slave->speed) rx_slave = slave; - } } + if (slave == bond_info->rx_slave) + found = true; } + /* we didn't find anything after the current or we have something + * better before and up to the current slave + */ + if (!rx_slave || (before && rx_slave->speed < before->speed)) + rx_slave = before; - if (rx_slave) { - bond_info->next_rx_slave = rx_slave->next; - } + if (rx_slave) + bond_info->rx_slave = rx_slave; + + return rx_slave; +} + +/* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */ +static struct slave *rlb_next_rx_slave(struct bonding *bond) +{ + struct slave *rx_slave; + + ASSERT_RTNL(); + + rcu_read_lock(); + rx_slave = __rlb_next_rx_slave(bond); + rcu_read_unlock(); return rx_slave; } @@ -414,16 +352,18 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond) /* teach the switch the mac of a disabled slave * on the primary for fault tolerance * - * Caller must hold bond->curr_slave_lock for write or bond lock for write + * Caller must hold RTNL */ -static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) +static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, + const u8 addr[]) { - if (!bond->curr_active_slave) { + struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); + + if (!curr_active) return; - } if (!bond->alb_info.primary_is_promisc) { - if (!dev_set_promiscuity(bond->curr_active_slave->dev, 1)) + if (!dev_set_promiscuity(curr_active->dev, 1)) bond->alb_info.primary_is_promisc = 1; else bond->alb_info.primary_is_promisc = 0; @@ -431,12 +371,12 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) bond->alb_info.rlb_promisc_timeout_counter = 0; - alb_send_learning_packets(bond->curr_active_slave, addr); + alb_send_learning_packets(curr_active, addr, true); } /* slave being removed should not be active at this point * - * Caller must hold bond lock for read + * Caller must hold rtnl. */ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) { @@ -445,7 +385,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) u32 index, next_index; /* clear slave from rx_hashtbl */ - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); rx_hash_table = bond_info->rx_hashtbl; index = bond_info->rx_hashtbl_used_head; @@ -456,8 +396,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) if (assigned_slave) { rx_hash_table[index].slave = assigned_slave; - if (!ether_addr_equal_64bits(rx_hash_table[index].mac_dst, - mac_bcast)) { + if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) { bond_info->rx_hashtbl[index].ntt = 1; bond_info->rx_ntt = 1; /* A slave has been removed from the @@ -476,24 +415,18 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) } } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); - write_lock_bh(&bond->curr_slave_lock); - - if (slave != bond->curr_active_slave) { + if (slave != rtnl_dereference(bond->curr_active_slave)) rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); - } - - write_unlock_bh(&bond->curr_slave_lock); } static void rlb_update_client(struct rlb_client_info *client_info) { int i; - if (!client_info->slave) { + if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst)) return; - } for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { struct sk_buff *skb; @@ -506,20 +439,17 @@ static void rlb_update_client(struct rlb_client_info *client_info) client_info->slave->dev->dev_addr, client_info->mac_dst); if (!skb) { - pr_err("%s: Error: failed to create an ARP packet\n", - client_info->slave->bond->dev->name); + slave_err(client_info->slave->bond->dev, + client_info->slave->dev, + "failed to create an ARP packet\n"); continue; } skb->dev = client_info->slave->dev; - if (client_info->tag) { - skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id); - if (!skb) { - pr_err("%s: Error: failed to insert VLAN tag\n", - client_info->slave->bond->dev->name); - continue; - } + if (client_info->vlan_id) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + client_info->vlan_id); } arp_xmit(skb); @@ -533,7 +463,7 @@ static void rlb_update_rx_clients(struct bonding *bond) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; @@ -541,9 +471,8 @@ static void rlb_update_rx_clients(struct bonding *bond) client_info = &(bond_info->rx_hashtbl[hash_index]); if (client_info->ntt) { rlb_update_client(client_info); - if (bond_info->rlb_update_retry_counter == 0) { + if (bond_info->rlb_update_retry_counter == 0) client_info->ntt = 0; - } } } @@ -552,7 +481,7 @@ static void rlb_update_rx_clients(struct bonding *bond) */ bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /* The slave was assigned a new mac address - update the clients */ @@ -563,7 +492,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla int ntt = 0; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; @@ -571,20 +500,20 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla client_info = &(bond_info->rx_hashtbl[hash_index]); if ((client_info->slave == slave) && - !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { + is_valid_ether_addr(client_info->mac_dst)) { client_info->ntt = 1; ntt = 1; } } - // update the team's flag only after the whole iteration + /* update the team's flag only after the whole iteration */ if (ntt) { bond_info->rx_ntt = 1; - //fasten the change + /* fasten the change */ bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /* mark all clients using src_ip to be updated */ @@ -594,7 +523,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) struct rlb_client_info *client_info; u32 hash_index; - _lock_rx_hashtbl(bond); + spin_lock(&bond->mode_lock); hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; @@ -602,36 +531,37 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) client_info = &(bond_info->rx_hashtbl[hash_index]); if (!client_info->slave) { - pr_err("%s: Error: found a client with no channel in the client's hash table\n", - bond->dev->name); + netdev_err(bond->dev, "found a client with no channel in the client's hash table\n"); continue; } - /*update all clients using this src_ip, that are not assigned + /* update all clients using this src_ip, that are not assigned * to the team's address (curr_active_slave) and have a known * unicast mac address. */ if ((client_info->ip_src == src_ip) && !ether_addr_equal_64bits(client_info->slave->dev->dev_addr, bond->dev->dev_addr) && - !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { + is_valid_ether_addr(client_info->mac_dst)) { client_info->ntt = 1; bond_info->rx_ntt = 1; } } - _unlock_rx_hashtbl(bond); + spin_unlock(&bond->mode_lock); } -/* Caller must hold both bond and ptr locks for read */ -static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct arp_pkt *arp = arp_pkt(skb); - struct slave *assigned_slave; + struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; - _lock_rx_hashtbl(bond); + spin_lock(&bond->mode_lock); + + curr_active_slave = rcu_dereference(bond->curr_active_slave); hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); client_info = &(bond_info->rx_hashtbl[hash_index]); @@ -640,15 +570,15 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon if ((client_info->ip_src == arp->ip_src) && (client_info->ip_dst == arp->ip_dst)) { /* the entry is already assigned to this client */ - if (!ether_addr_equal_64bits(arp->mac_dst, mac_bcast)) { + if (!is_broadcast_ether_addr(arp->mac_dst)) { /* update mac address from arp */ - memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + ether_addr_copy(client_info->mac_dst, arp->mac_dst); } - memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN); + ether_addr_copy(client_info->mac_src, arp->mac_src); assigned_slave = client_info->slave; if (assigned_slave) { - _unlock_rx_hashtbl(bond); + spin_unlock(&bond->mode_lock); return assigned_slave; } } else { @@ -656,15 +586,15 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon * move the old client to primary (curr_active_slave) so * that the new client can be assigned to this entry. */ - if (bond->curr_active_slave && - client_info->slave != bond->curr_active_slave) { - client_info->slave = bond->curr_active_slave; + if (curr_active_slave && + client_info->slave != curr_active_slave) { + client_info->slave = curr_active_slave; rlb_update_client(client_info); } } } /* assign a new slave */ - assigned_slave = rlb_next_rx_slave(bond); + assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave) { if (!(client_info->assigned && @@ -680,28 +610,27 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon client_info->ip_src = arp->ip_src; client_info->ip_dst = arp->ip_dst; - /* arp->mac_dst is broadcast for arp reqeusts. + /* arp->mac_dst is broadcast for arp requests. * will be updated with clients actual unicast mac address * upon receiving an arp reply. */ - memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); - memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN); + ether_addr_copy(client_info->mac_dst, arp->mac_dst); + ether_addr_copy(client_info->mac_src, arp->mac_src); client_info->slave = assigned_slave; - if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { + if (is_valid_ether_addr(client_info->mac_dst)) { client_info->ntt = 1; bond->alb_info.rx_ntt = 1; } else { client_info->ntt = 0; } - if (bond_vlan_used(bond)) { - if (!vlan_get_tag(skb, &client_info->vlan_id)) - client_info->tag = 1; - } + if (vlan_get_tag(skb, &client_info->vlan_id)) + client_info->vlan_id = 0; if (!client_info->assigned) { u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; + bond_info->rx_hashtbl_used_head = hash_index; client_info->used_next = prev_tbl_head; if (prev_tbl_head != RLB_NULL_INDEX) { @@ -712,7 +641,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon } } - _unlock_rx_hashtbl(bond); + spin_unlock(&bond->mode_lock); return assigned_slave; } @@ -723,31 +652,44 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { - struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + struct net_device *dev; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); - /* Don't modify or load balance ARPs that do not originate locally - * (e.g.,arrive via a bridge). + /* Don't modify or load balance ARPs that do not originate + * from the bond itself or a VLAN directly above the bond. */ - if (!bond_slave_has_mac(bond, arp->mac_src)) + if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) return NULL; - if (arp->op_code == htons(ARPOP_REPLY)) { - /* the arp must be sent on the selected - * rx channel - */ - tx_slave = rlb_choose_channel(skb, bond); - if (tx_slave) { - memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); + dev = ip_dev_find(dev_net(bond->dev), arp->ip_src); + if (dev) { + if (netif_is_any_bridge_master(dev)) { + dev_put(dev); + return NULL; } - pr_debug("Server sent ARP Reply packet\n"); + dev_put(dev); + } + + if (arp->op_code == htons(ARPOP_REPLY)) { + /* the arp must be sent on the selected rx channel */ + tx_slave = rlb_choose_channel(skb, bond, arp); + if (tx_slave) + bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, + tx_slave->dev->addr_len); + netdev_dbg(bond->dev, "(slave %s): Server sent ARP Reply packet\n", + tx_slave ? tx_slave->dev->name : "NULL"); } else if (arp->op_code == htons(ARPOP_REQUEST)) { /* Create an entry in the rx_hashtbl for this client as a * place holder. * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ - rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. @@ -760,13 +702,13 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) * updated with their assigned mac. */ rlb_req_update_subnet_clients(bond, arp->ip_src); - pr_debug("Server sent ARP Request packet\n"); + netdev_dbg(bond->dev, "(slave %s): Server sent ARP Request packet\n", + tx_slave ? tx_slave->dev->name : "NULL"); } return tx_slave; } -/* Caller must hold bond lock for read */ static void rlb_rebalance(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); @@ -775,36 +717,37 @@ static void rlb_rebalance(struct bonding *bond) int ntt; u32 hash_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); ntt = 0; hash_index = bond_info->rx_hashtbl_used_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); - assigned_slave = rlb_next_rx_slave(bond); + assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave && (client_info->slave != assigned_slave)) { client_info->slave = assigned_slave; - client_info->ntt = 1; - ntt = 1; + if (!is_zero_ether_addr(client_info->mac_dst)) { + client_info->ntt = 1; + ntt = 1; + } } } /* update the team's flag only after the whole iteration */ - if (ntt) { + if (ntt) bond_info->rx_ntt = 1; - } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } -/* Caller must hold rx_hashtbl lock */ +/* Caller must hold mode_lock */ static void rlb_init_table_entry_dst(struct rlb_client_info *entry) { entry->used_next = RLB_NULL_INDEX; entry->used_prev = RLB_NULL_INDEX; entry->assigned = 0; entry->slave = NULL; - entry->tag = 0; + entry->vlan_id = 0; } static void rlb_init_table_entry_src(struct rlb_client_info *entry) { @@ -885,26 +828,28 @@ static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; } -/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does - * not match arp->mac_src */ +/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does + * not match arp->mac_src + */ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); + u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); u32 index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); index = bond_info->rx_hashtbl[ip_src_hash].src_first; while (index != RLB_NULL_INDEX) { struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); u32 next_index = entry->src_next; + if (entry->ip_src == arp->ip_src && !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) - rlb_delete_table_entry(bond, index); + rlb_delete_table_entry(bond, index); index = next_index; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } static int rlb_initialize(struct bonding *bond) @@ -918,17 +863,16 @@ static int rlb_initialize(struct bonding *bond) if (!new_hashtbl) return -1; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); bond_info->rx_hashtbl = new_hashtbl; bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; - for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) { + for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) rlb_init_table_entry(bond_info->rx_hashtbl + i); - } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); /* register to receive ARPs */ bond->recv_probe = rlb_arp_recv; @@ -940,13 +884,13 @@ static void rlb_deinitialize(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); kfree(bond_info->rx_hashtbl); bond_info->rx_hashtbl = NULL; bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) @@ -954,134 +898,182 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); u32 curr_index; - _lock_rx_hashtbl_bh(bond); + spin_lock_bh(&bond->mode_lock); curr_index = bond_info->rx_hashtbl_used_head; while (curr_index != RLB_NULL_INDEX) { struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; - if (curr->tag && (curr->vlan_id == vlan_id)) + if (curr->vlan_id == vlan_id) rlb_delete_table_entry(bond, curr_index); curr_index = next_index; } - _unlock_rx_hashtbl_bh(bond); + spin_unlock_bh(&bond->mode_lock); } /*********************** tlb/rlb shared functions *********************/ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +static void alb_send_lp_vid(struct slave *slave, const u8 mac_addr[], + __be16 vlan_proto, u16 vid) { - struct bonding *bond = bond_get_bond_by_slave(slave); struct learning_pkt pkt; + struct sk_buff *skb; int size = sizeof(struct learning_pkt); - int i; memset(&pkt, 0, size); - memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); - memcpy(pkt.mac_src, mac_addr, ETH_ALEN); - pkt.type = cpu_to_be16(ETH_P_LOOP); + ether_addr_copy(pkt.mac_dst, mac_addr); + ether_addr_copy(pkt.mac_src, mac_addr); + pkt.type = cpu_to_be16(ETH_P_LOOPBACK); - for (i = 0; i < MAX_LP_BURST; i++) { - struct sk_buff *skb; - char *data; + skb = dev_alloc_skb(size); + if (!skb) + return; - skb = dev_alloc_skb(size); - if (!skb) { - return; - } + skb_put_data(skb, &pkt, size); - data = skb_put(skb, size); - memcpy(data, &pkt, size); + skb_reset_mac_header(skb); + skb->network_header = skb->mac_header + ETH_HLEN; + skb->protocol = pkt.type; + skb->priority = TC_PRIO_CONTROL; + skb->dev = slave->dev; - skb_reset_mac_header(skb); - skb->network_header = skb->mac_header + ETH_HLEN; - skb->protocol = pkt.type; - skb->priority = TC_PRIO_CONTROL; - skb->dev = slave->dev; + slave_dbg(slave->bond->dev, slave->dev, + "Send learning packet: mac %pM vlan %d\n", mac_addr, vid); - if (bond_vlan_used(bond)) { - struct vlan_entry *vlan; + if (vid) + __vlan_hwaccel_put_tag(skb, vlan_proto, vid); - vlan = bond_next_vlan(bond, - bond->alb_info.current_alb_vlan); + dev_queue_xmit(skb); +} - bond->alb_info.current_alb_vlan = vlan; - if (!vlan) { - kfree_skb(skb); - continue; - } +struct alb_walk_data { + struct bonding *bond; + struct slave *slave; + const u8 *mac_addr; + bool strict_match; +}; - skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id); - if (!skb) { - pr_err("%s: Error: failed to insert VLAN tag\n", - bond->dev->name); - continue; - } +static int alb_upper_dev_walk(struct net_device *upper, + struct netdev_nested_priv *priv) +{ + struct alb_walk_data *data = (struct alb_walk_data *)priv->data; + bool strict_match = data->strict_match; + const u8 *mac_addr = data->mac_addr; + struct bonding *bond = data->bond; + struct slave *slave = data->slave; + struct bond_vlan_tag *tags; + + if (is_vlan_dev(upper) && + bond->dev->lower_level == upper->lower_level - 1) { + if (upper->addr_assign_type == NET_ADDR_STOLEN) { + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } else { + alb_send_lp_vid(slave, upper->dev_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); } + } + + /* If this is a macvlan device, then only send updates + * when strict_match is turned off. + */ + if (netif_is_macvlan(upper) && !strict_match) { + tags = bond_verify_device_path(bond->dev, upper, 0); + if (IS_ERR_OR_NULL(tags)) + return -ENOMEM; - dev_queue_xmit(skb); + alb_send_lp_vid(slave, upper->dev_addr, + tags[0].vlan_proto, tags[0].vlan_id); + kfree(tags); } + + return 0; +} + +static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[], + bool strict_match) +{ + struct bonding *bond = bond_get_bond_by_slave(slave); + struct netdev_nested_priv priv; + struct alb_walk_data data = { + .strict_match = strict_match, + .mac_addr = mac_addr, + .slave = slave, + .bond = bond, + }; + + priv.data = (void *)&data; + /* send untagged */ + alb_send_lp_vid(slave, mac_addr, 0, 0); + + /* loop through all devices and see if we need to send a packet + * for that device. + */ + rcu_read_lock(); + netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &priv); + rcu_read_unlock(); } -static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) +static int alb_set_slave_mac_addr(struct slave *slave, const u8 addr[], + unsigned int len) { struct net_device *dev = slave->dev; - struct sockaddr s_addr; + struct sockaddr_storage ss; - if (slave->bond->params.mode == BOND_MODE_TLB) { - memcpy(dev->dev_addr, addr, dev->addr_len); + if (BOND_MODE(slave->bond) == BOND_MODE_TLB) { + __dev_addr_set(dev, addr, len); return 0; } - /* for rlb each slave must have a unique hw mac addresses so that */ - /* each slave will receive packets destined to a different mac */ - memcpy(s_addr.sa_data, addr, dev->addr_len); - s_addr.sa_family = dev->type; - if (dev_set_mac_address(dev, &s_addr)) { - pr_err("%s: Error: dev_set_mac_address of dev %s failed!\n" - "ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n", - slave->bond->dev->name, dev->name); + /* for rlb each slave must have a unique hw mac addresses so that + * each slave will receive packets destined to a different mac + */ + memcpy(ss.__data, addr, len); + ss.ss_family = dev->type; + if (dev_set_mac_address(dev, &ss, NULL)) { + slave_err(slave->bond->dev, dev, "dev_set_mac_address on slave failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n"); return -EOPNOTSUPP; } return 0; } -/* - * Swap MAC addresses between two slaves. +/* Swap MAC addresses between two slaves. * * Called with RTNL held, and no other locks. - * */ - static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) { - u8 tmp_mac_addr[ETH_ALEN]; + u8 tmp_mac_addr[MAX_ADDR_LEN]; - memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); - alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr); - alb_set_slave_mac_addr(slave2, tmp_mac_addr); + bond_hw_addr_copy(tmp_mac_addr, slave1->dev->dev_addr, + slave1->dev->addr_len); + alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, + slave2->dev->addr_len); + alb_set_slave_mac_addr(slave2, tmp_mac_addr, + slave1->dev->addr_len); } -/* - * Send learning packets after MAC address swap. +/* Send learning packets after MAC address swap. * * Called with RTNL and no other locks */ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, struct slave *slave2) { - int slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); + int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2)); struct slave *disabled_slave = NULL; ASSERT_RTNL(); /* fasten the change in the switch */ - if (SLAVE_IS_OK(slave1)) { - alb_send_learning_packets(slave1, slave1->dev->dev_addr); + if (bond_slave_can_tx(slave1)) { + alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed @@ -1092,8 +1084,8 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, disabled_slave = slave1; } - if (SLAVE_IS_OK(slave2)) { - alb_send_learning_packets(slave2, slave2->dev->dev_addr); + if (bond_slave_can_tx(slave2)) { + alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed @@ -1121,7 +1113,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, * If @slave's permanent hw address is different both from its current * address and from @bond's address, then somewhere in the bond there's * a slave that has @slave's permanet address as its current address. - * We'll make sure that that slave no longer uses @slave's permanent address. + * We'll make sure that slave no longer uses @slave's permanent address. * * Caller must hold RTNL and no other locks */ @@ -1140,7 +1132,6 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); if (found_slave) { - /* locking: needs RTNL and nothing else */ alb_swap_mac_addr(slave, found_slave); alb_fasten_mac_swap(bond, slave, found_slave); } @@ -1171,11 +1162,11 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla */ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) { + struct slave *has_bond_addr = rcu_access_pointer(bond->curr_active_slave); struct slave *tmp_slave1, *free_mac_slave = NULL; - struct slave *has_bond_addr = bond->curr_active_slave; - int i; + struct list_head *iter; - if (bond->slave_cnt == 0) { + if (!bond_has_slaves(bond)) { /* this is the first slave */ return 0; } @@ -1189,14 +1180,16 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav return 0; /* Try setting slave mac to bond address and fall-through - to code handling that situation below... */ - alb_set_slave_mac_addr(slave, bond->dev->dev_addr); + * to code handling that situation below... + */ + alb_set_slave_mac_addr(slave, bond->dev->dev_addr, + bond->dev->addr_len); } /* The slave's address is equal to the address of the bond. * Search for a spare address in the bond for this slave. */ - bond_for_each_slave(bond, tmp_slave1, i) { + bond_for_each_slave(bond, tmp_slave1, iter) { if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) { /* no slave has tmp_slave1's perm addr * as its curr addr @@ -1215,15 +1208,14 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav } if (free_mac_slave) { - alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr); + alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, + free_mac_slave->dev->addr_len); - pr_warning("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", - bond->dev->name, slave->dev->name, + slave_warn(bond->dev, slave->dev, "the slave hw address is in use by the bond; giving it the hw address of %s\n", free_mac_slave->dev->name); } else if (has_bond_addr) { - pr_err("%s: Error: the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n", - bond->dev->name, slave->dev->name); + slave_err(bond->dev, slave->dev, "the slave hw address is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n"); return -EFAULT; } @@ -1232,8 +1224,8 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav /** * alb_set_mac_address - * @bond: - * @addr: + * @bond: bonding we're working on + * @addr: MAC address to set * * In TLB mode all slaves are configured to the bond's hw address, but set * their dev_addr field to different addresses (based on their permanent hw @@ -1246,24 +1238,24 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav */ static int alb_set_mac_address(struct bonding *bond, void *addr) { - struct sockaddr sa; - struct slave *slave, *stop_at; - char tmp_addr[ETH_ALEN]; + struct slave *slave, *rollback_slave; + struct list_head *iter; + struct sockaddr_storage ss; + char tmp_addr[MAX_ADDR_LEN]; int res; - int i; - if (bond->alb_info.rlb_enabled) { + if (bond->alb_info.rlb_enabled) return 0; - } - bond_for_each_slave(bond, slave, i) { + bond_for_each_slave(bond, slave, iter) { /* save net_device's current hw address */ - memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); + bond_hw_addr_copy(tmp_addr, slave->dev->dev_addr, + slave->dev->addr_len); - res = dev_set_mac_address(slave->dev, addr); + res = dev_set_mac_address(slave->dev, addr, NULL); /* restore net_device's hw address */ - memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); + dev_addr_set(slave->dev, tmp_addr); if (res) goto unwind; @@ -1272,39 +1264,60 @@ static int alb_set_mac_address(struct bonding *bond, void *addr) return 0; unwind: - memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); - sa.sa_family = bond->dev->type; + memcpy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len); + ss.ss_family = bond->dev->type; /* unwind from head to the slave that failed */ - stop_at = slave; - bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { - memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); - dev_set_mac_address(slave->dev, &sa); - memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); + bond_for_each_slave(bond, rollback_slave, iter) { + if (rollback_slave == slave) + break; + bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr, + rollback_slave->dev->addr_len); + dev_set_mac_address(rollback_slave->dev, &ss, NULL); + dev_addr_set(rollback_slave->dev, tmp_addr); } return res; } -/************************ exported alb funcions ************************/ +/* determine if the packet is NA or NS */ +static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond) +{ + struct ipv6hdr *ip6hdr; + struct icmp6hdr *hdr; + + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) + return true; + + ip6hdr = ipv6_hdr(skb); + if (ip6hdr->nexthdr != IPPROTO_ICMPV6) + return false; + + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr))) + return true; + + hdr = icmp6_hdr(skb); + return hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT || + hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION; +} + +/************************ exported alb functions ************************/ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) { int res; res = tlb_initialize(bond); - if (res) { + if (res) return res; - } if (rlb_enabled) { - bond->alb_info.rlb_enabled = 1; - /* initialize rlb */ res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); return res; } + bond->alb_info.rlb_enabled = 1; } else { bond->alb_info.rlb_enabled = 0; } @@ -1318,52 +1331,131 @@ void bond_alb_deinitialize(struct bonding *bond) tlb_deinitialize(bond); - if (bond_info->rlb_enabled) { + if (bond_info->rlb_enabled) rlb_deinitialize(bond); +} + +static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, + struct slave *tx_slave) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct ethhdr *eth_data = eth_hdr(skb); + + if (!tx_slave) { + /* unbalanced or unassigned, send through primary */ + tx_slave = rcu_dereference(bond->curr_active_slave); + if (bond->params.tlb_dynamic_lb) + bond_info->unbalanced_load += skb->len; + } + + if (tx_slave && bond_slave_can_tx(tx_slave)) { + if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) { + ether_addr_copy(eth_data->h_source, + tx_slave->dev->dev_addr); + } + + return bond_dev_queue_xmit(bond, skb, tx_slave->dev); } + + if (tx_slave && bond->params.tlb_dynamic_lb) { + spin_lock(&bond->mode_lock); + __tlb_clear_slave(bond, tx_slave, 0); + spin_unlock(&bond->mode_lock); + } + + /* no suitable interface, frame not sent */ + return bond_tx_drop(bond->dev, skb); } -int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave = NULL; struct ethhdr *eth_data; + u32 hash_index; + + skb_reset_mac_header(skb); + eth_data = eth_hdr(skb); + + /* Do not TX balance any multicast or broadcast */ + if (!is_multicast_ether_addr(eth_data->h_dest)) { + switch (skb->protocol) { + case htons(ETH_P_IPV6): + if (alb_determine_nd(skb, bond)) + break; + fallthrough; + case htons(ETH_P_IP): + hash_index = bond_xmit_hash(bond, skb); + if (bond->params.tlb_dynamic_lb) { + tx_slave = tlb_choose_channel(bond, + hash_index & 0xFF, + skb->len); + } else { + struct bond_up_slave *slaves; + unsigned int count; + + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (likely(count)) + tx_slave = slaves->arr[hash_index % + count]; + } + break; + } + } + return tx_slave; +} + +netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave; + + tx_slave = bond_xmit_tlb_slave_get(bond, skb); + return bond_do_alb_xmit(skb, bond, tx_slave); +} + +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *tx_slave = NULL; static const __be32 ip_bcast = htonl(0xffffffff); - int hash_size = 0; - int do_tx_balance = 1; - u32 hash_index = 0; + struct slave *tx_slave = NULL; const u8 *hash_start = NULL; - int res = 1; - struct ipv6hdr *ip6hdr; + bool do_tx_balance = true; + struct ethhdr *eth_data; + u32 hash_index = 0; + int hash_size = 0; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); - /* make sure that the curr_active_slave do not change during tx - */ - read_lock(&bond->curr_slave_lock); - switch (ntohs(skb->protocol)) { case ETH_P_IP: { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; - if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) || - (iph->daddr == ip_bcast) || - (iph->protocol == IPPROTO_IGMP)) { - do_tx_balance = 0; + if (is_broadcast_ether_addr(eth_data->h_dest) || + !pskb_network_may_pull(skb, sizeof(*iph))) { + do_tx_balance = false; + break; + } + iph = ip_hdr(skb); + if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { + do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); hash_size = sizeof(iph->daddr); - } break; - case ETH_P_IPV6: + } + case ETH_P_IPV6: { + const struct ipv6hdr *ip6hdr; + /* IPv6 doesn't really use broadcast mac address, but leave * that here just in case. */ - if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) { - do_tx_balance = 0; + if (is_broadcast_ether_addr(eth_data->h_dest)) { + do_tx_balance = false; break; } @@ -1371,87 +1463,70 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) * broadcasts in IPv4. */ if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { - do_tx_balance = 0; + do_tx_balance = false; + break; + } + + if (alb_determine_nd(skb, bond)) { + do_tx_balance = false; break; } - /* Additianally, DAD probes should not be tx-balanced as that + /* The IPv6 header is pulled by alb_determine_nd */ + /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. */ ip6hdr = ipv6_hdr(skb); if (ipv6_addr_any(&ip6hdr->saddr)) { - do_tx_balance = 0; - break; - } - - hash_start = (char *)&(ipv6_hdr(skb)->daddr); - hash_size = sizeof(ipv6_hdr(skb)->daddr); - break; - case ETH_P_IPX: - if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { - /* something is wrong with this packet */ - do_tx_balance = 0; - break; - } - - if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { - /* The only protocol worth balancing in - * this family since it has an "ARP" like - * mechanism - */ - do_tx_balance = 0; + do_tx_balance = false; break; } - hash_start = (char*)eth_data->h_dest; - hash_size = ETH_ALEN; + hash_start = (char *)&ip6hdr->daddr; + hash_size = sizeof(ip6hdr->daddr); break; + } case ETH_P_ARP: - do_tx_balance = 0; - if (bond_info->rlb_enabled) { + do_tx_balance = false; + if (bond_info->rlb_enabled) tx_slave = rlb_arp_xmit(skb, bond); - } break; default: - do_tx_balance = 0; + do_tx_balance = false; break; } if (do_tx_balance) { - hash_index = _simple_hash(hash_start, hash_size); - tx_slave = tlb_choose_channel(bond, hash_index, skb->len); - } - - if (!tx_slave) { - /* unbalanced or unassigned, send through primary */ - tx_slave = bond->curr_active_slave; - bond_info->unbalanced_load += skb->len; - } + if (bond->params.tlb_dynamic_lb) { + hash_index = _simple_hash(hash_start, hash_size); + tx_slave = tlb_choose_channel(bond, hash_index, skb->len); + } else { + /* + * do_tx_balance means we are free to select the tx_slave + * So we do exactly what tlb would do for hash selection + */ - if (tx_slave && SLAVE_IS_OK(tx_slave)) { - if (tx_slave != bond->curr_active_slave) { - memcpy(eth_data->h_source, - tx_slave->dev->dev_addr, - ETH_ALEN); - } + struct bond_up_slave *slaves; + unsigned int count; - res = bond_dev_queue_xmit(bond, skb, tx_slave->dev); - } else { - if (tx_slave) { - _lock_tx_hashtbl(bond); - __tlb_clear_slave(bond, tx_slave, 0); - _unlock_tx_hashtbl(bond); + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (likely(count)) + tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % + count]; } } + return tx_slave; +} - read_unlock(&bond->curr_slave_lock); +netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave = NULL; - if (res) { - /* no suitable interface, frame not sent */ - kfree_skb(skb); - } - return NETDEV_TX_OK; + tx_slave = bond_xmit_alb_slave_get(bond, skb); + return bond_do_alb_xmit(skb, bond, tx_slave); } void bond_alb_monitor(struct work_struct *work) @@ -1459,72 +1534,62 @@ void bond_alb_monitor(struct work_struct *work) struct bonding *bond = container_of(work, struct bonding, alb_work.work); struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct list_head *iter; struct slave *slave; - int i; - read_lock(&bond->lock); - - if (bond->slave_cnt == 0) { - bond_info->tx_rebalance_counter = 0; + if (!bond_has_slaves(bond)) { + atomic_set(&bond_info->tx_rebalance_counter, 0); bond_info->lp_counter = 0; goto re_arm; } - bond_info->tx_rebalance_counter++; + rcu_read_lock(); + + atomic_inc(&bond_info->tx_rebalance_counter); bond_info->lp_counter++; /* send learning packets */ - if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) { - /* change of curr_active_slave involves swapping of mac addresses. - * in order to avoid this swapping from happening while - * sending the learning packets, the curr_slave_lock must be held for - * read. - */ - read_lock(&bond->curr_slave_lock); - - bond_for_each_slave(bond, slave, i) { - alb_send_learning_packets(slave, slave->dev->dev_addr); + if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { + bool strict_match; + + bond_for_each_slave_rcu(bond, slave, iter) { + /* If updating current_active, use all currently + * user mac addresses (!strict_match). Otherwise, only + * use mac of the slave device. + * In RLB mode, we always use strict matches. + */ + strict_match = (slave != rcu_access_pointer(bond->curr_active_slave) || + bond_info->rlb_enabled); + alb_send_learning_packets(slave, slave->dev->dev_addr, + strict_match); } - - read_unlock(&bond->curr_slave_lock); - bond_info->lp_counter = 0; } /* rebalance tx traffic */ - if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { - - read_lock(&bond->curr_slave_lock); - - bond_for_each_slave(bond, slave, i) { + if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { + bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); - if (slave == bond->curr_active_slave) { + if (slave == rcu_access_pointer(bond->curr_active_slave)) { SLAVE_TLB_INFO(slave).load = bond_info->unbalanced_load / BOND_TLB_REBALANCE_INTERVAL; bond_info->unbalanced_load = 0; } } - - read_unlock(&bond->curr_slave_lock); - - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); } - /* handle rlb stuff */ if (bond_info->rlb_enabled) { if (bond_info->primary_is_promisc && (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { - /* - * dev_set_promiscuity requires rtnl and + /* dev_set_promiscuity requires rtnl and * nothing else. Avoid race with bond_close. */ - read_unlock(&bond->lock); - if (!rtnl_trylock()) { - read_lock(&bond->lock); + rcu_read_unlock(); + if (!rtnl_trylock()) goto re_arm; - } bond_info->rlb_promisc_timeout_counter = 0; @@ -1532,11 +1597,12 @@ void bond_alb_monitor(struct work_struct *work) * because a slave was disabled then * it can now leave promiscuous mode. */ - dev_set_promiscuity(bond->curr_active_slave->dev, -1); + dev_set_promiscuity(rtnl_dereference(bond->curr_active_slave)->dev, + -1); bond_info->primary_is_promisc = 0; rtnl_unlock(); - read_lock(&bond->lock); + rcu_read_lock(); } if (bond_info->rlb_rebalance) { @@ -1550,19 +1616,16 @@ void bond_alb_monitor(struct work_struct *work) --bond_info->rlb_update_delay_counter; } else { rlb_update_rx_clients(bond); - if (bond_info->rlb_update_retry_counter) { + if (bond_info->rlb_update_retry_counter) --bond_info->rlb_update_retry_counter; - } else { + else bond_info->rx_ntt = 0; - } } } } - + rcu_read_unlock(); re_arm: queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); - - read_unlock(&bond->lock); } /* assumption: called before the slave is attached to the bond @@ -1572,61 +1635,58 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) { int res; - res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr); - if (res) { + res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, + slave->dev->addr_len); + if (res) return res; - } res = alb_handle_addr_collision_on_attach(bond, slave); - if (res) { + if (res) return res; - } tlb_init_slave(slave); /* order a rebalance ASAP */ - bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond->alb_info.tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); - if (bond->alb_info.rlb_enabled) { + if (bond->alb_info.rlb_enabled) bond->alb_info.rlb_rebalance = 1; - } return 0; } -/* - * Remove slave from tlb and rlb hash tables, and fix up MAC addresses +/* Remove slave from tlb and rlb hash tables, and fix up MAC addresses * if necessary. * * Caller must hold RTNL and no other locks */ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) { - if (bond->slave_cnt > 1) { + if (bond_has_slaves(bond)) alb_change_hw_addr_on_detach(bond, slave); - } tlb_clear_slave(bond, slave, 0); if (bond->alb_info.rlb_enabled) { - bond->alb_info.next_rx_slave = NULL; + bond->alb_info.rx_slave = NULL; rlb_clear_slave(bond, slave); } + } -/* Caller must hold bond lock for read */ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); if (link == BOND_LINK_DOWN) { tlb_clear_slave(bond, slave, 0); - if (bond->alb_info.rlb_enabled) { + if (bond->alb_info.rlb_enabled) rlb_clear_slave(bond, slave); - } } else if (link == BOND_LINK_UP) { /* order a rebalance ASAP */ - bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond_info->tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) { bond->alb_info.rlb_rebalance = 1; /* If the updelay module parameter is smaller than the @@ -1636,6 +1696,11 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char */ } } + + if (bond_is_nondyn_tlb(bond)) { + if (bond_update_slave_arr(bond, NULL)) + pr_err("Failed to build slave-array for TLB mode.\n"); + } } /** @@ -1646,37 +1711,28 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char * Set the bond->curr_active_slave to @new_slave and handle * mac address swapping and promiscuity changes as needed. * - * If new_slave is NULL, caller must hold curr_slave_lock or - * bond->lock for write. - * - * If new_slave is not NULL, caller must hold RTNL, bond->lock for - * read and curr_slave_lock for write. Processing here may sleep, so - * no other locks may be held. + * Caller must hold RTNL */ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) - __releases(&bond->curr_slave_lock) - __releases(&bond->lock) - __acquires(&bond->lock) - __acquires(&bond->curr_slave_lock) { struct slave *swap_slave; + struct slave *curr_active; - if (bond->curr_active_slave == new_slave) { + curr_active = rtnl_dereference(bond->curr_active_slave); + if (curr_active == new_slave) return; - } - if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) { - dev_set_promiscuity(bond->curr_active_slave->dev, -1); + if (curr_active && bond->alb_info.primary_is_promisc) { + dev_set_promiscuity(curr_active->dev, -1); bond->alb_info.primary_is_promisc = 0; bond->alb_info.rlb_promisc_timeout_counter = 0; } - swap_slave = bond->curr_active_slave; - bond->curr_active_slave = new_slave; + swap_slave = curr_active; + rcu_assign_pointer(bond->curr_active_slave, new_slave); - if (!new_slave || (bond->slave_cnt == 0)) { + if (!new_slave || !bond_has_slaves(bond)) return; - } /* set the new curr_active_slave to the bonds mac address * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave @@ -1684,83 +1740,88 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave if (!swap_slave) swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); - /* - * Arrange for swap_slave and new_slave to temporarily be + /* Arrange for swap_slave and new_slave to temporarily be * ignored so we can mess with their MAC addresses without * fear of interference from transmit activity. */ - if (swap_slave) { + if (swap_slave) tlb_clear_slave(bond, swap_slave, 1); - } tlb_clear_slave(bond, new_slave, 1); - write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); + /* in TLB mode, the slave might flip down/up with the old dev_addr, + * and thus filter bond->dev_addr's packets, so force bond's mac + */ + if (BOND_MODE(bond) == BOND_MODE_TLB) { + struct sockaddr_storage ss; + u8 tmp_addr[MAX_ADDR_LEN]; + + bond_hw_addr_copy(tmp_addr, new_slave->dev->dev_addr, + new_slave->dev->addr_len); - ASSERT_RTNL(); + bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, + bond->dev->addr_len); + ss.ss_family = bond->dev->type; + /* we don't care if it can't change its mac, best effort */ + dev_set_mac_address(new_slave->dev, &ss, NULL); + + dev_addr_set(new_slave->dev, tmp_addr); + } /* curr_active_slave must be set before calling alb_swap_mac_addr */ if (swap_slave) { /* swap mac address */ alb_swap_mac_addr(swap_slave, new_slave); alb_fasten_mac_swap(bond, swap_slave, new_slave); - read_lock(&bond->lock); } else { /* set the new_slave to the bond mac address */ - alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); - read_lock(&bond->lock); - alb_send_learning_packets(new_slave, bond->dev->dev_addr); + alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, + bond->dev->addr_len); + alb_send_learning_packets(new_slave, bond->dev->dev_addr, + false); } - - write_lock_bh(&bond->curr_slave_lock); } -/* - * Called with RTNL - */ +/* Called with RTNL */ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) - __acquires(&bond->lock) - __releases(&bond->lock) { struct bonding *bond = netdev_priv(bond_dev); - struct sockaddr *sa = addr; + struct sockaddr_storage *ss = addr; + struct slave *curr_active; struct slave *swap_slave; int res; - if (!is_valid_ether_addr(sa->sa_data)) { + if (!is_valid_ether_addr(ss->__data)) return -EADDRNOTAVAIL; - } res = alb_set_mac_address(bond, addr); - if (res) { + if (res) return res; - } - memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); + dev_addr_set(bond_dev, ss->__data); /* If there is no curr_active_slave there is nothing else to do. * Otherwise we'll need to pass the new address to it and handle * duplications. */ - if (!bond->curr_active_slave) { + curr_active = rtnl_dereference(bond->curr_active_slave); + if (!curr_active) return 0; - } swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr); if (swap_slave) { - alb_swap_mac_addr(swap_slave, bond->curr_active_slave); - alb_fasten_mac_swap(bond, swap_slave, bond->curr_active_slave); + alb_swap_mac_addr(swap_slave, curr_active); + alb_fasten_mac_swap(bond, swap_slave, curr_active); } else { - alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr); + alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr, + bond_dev->addr_len); - read_lock(&bond->lock); - alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); + alb_send_learning_packets(curr_active, + bond_dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform clients mac address has changed */ - rlb_req_update_slave_clients(bond, bond->curr_active_slave); + rlb_req_update_slave_clients(bond, curr_active); } - read_unlock(&bond->lock); } return 0; @@ -1768,13 +1829,7 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) { - if (bond->alb_info.current_alb_vlan && - (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) { - bond->alb_info.current_alb_vlan = NULL; - } - - if (bond->alb_info.rlb_enabled) { + if (bond->alb_info.rlb_enabled) rlb_clear_vlan(bond, vlan_id); - } } |
