summaryrefslogtreecommitdiff
path: root/net/ipv4/sysctl_net_ipv4.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-06-12 16:42:18 -0700
committerJakub Kicinski <kuba@kernel.org>2024-06-12 16:42:18 -0700
commit05f43db7f011d0ebf95bd28562d7f23e30d7e107 (patch)
tree4ca9e9ef05433d9621909f12bd8d93a05333cc34 /net/ipv4/sysctl_net_ipv4.c
parent32b06603f8790b34835af43d259152eb57827413 (diff)
parent5f90d93b6108098f389f992e267588924e446049 (diff)
Merge branch 'allow-configuration-of-multipath-hash-seed'
Petr Machata says: ==================== Allow configuration of multipath hash seed Let me just quote the commit message of patch #2 here to inform the motivation and some of the implementation: When calculating hashes for the purpose of multipath forwarding, both IPv4 and IPv6 code currently fall back on flow_hash_from_keys(). That uses a randomly-generated seed. That's a fine choice by default, but unfortunately some deployments may need a tighter control over the seed used. In this patchset, make the seed configurable by adding a new sysctl key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used specifically for multipath forwarding and not for the other concerns that flow_hash_from_keys() is used for, such as queue selection. Expose the knob as sysctl because other such settings, such as headers to hash, are also handled that way. Despite being placed in the net.ipv4 namespace, the multipath seed sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of TCP variables. Like those, the multipath hash seed is a per-netns variable. The seed used by flow_hash_from_keys() is a 128-bit quantity. However it seems that usually the seed is a much more modest value. 32 bits seem typical (Cisco, Cumulus), some systems go even lower. For that reason, and to decouple the user interface from implementation details, go with a 32-bit quantity, which is then quadruplicated to form the siphash key. One example of use of this interface is avoiding hash polarization, where two ECMP routers, one behind the other, happen to make consistent hashing decisions, and as a result, part of the ECMP space of the latter router is never used. Another is a load balancer where several machines forward traffic to one of a number of leaves, and the forwarding decisions need to be made consistently. (This is a case of a desired hash polarization, mentioned e.g. in chapter 6.3 of [0].) There has already been a proposal to include a hash seed control interface in the past[1]. - Patches #1-#2 contain the substance of the work - Patch #3 is an mlxsw offload - Patches #4 and #5 are a selftest [0] https://www.usenix.org/system/files/conference/nsdi18/nsdi18-araujo.pdf [1] https://lore.kernel.org/netdev/YIlVpYMCn%2F8WfE1P@rnd/ ==================== Link: https://lore.kernel.org/r/20240607151357.421181-1-petrm@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/sysctl_net_ipv4.c')
-rw-r--r--net/ipv4/sysctl_net_ipv4.c66
1 files changed, 66 insertions, 0 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index bb64c0ef092d..9140d20eb2d4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
return ret;
}
+
+static u32 proc_fib_multipath_hash_rand_seed __ro_after_init;
+
+static void proc_fib_multipath_hash_init_rand_seed(void)
+{
+ get_random_bytes(&proc_fib_multipath_hash_rand_seed,
+ sizeof(proc_fib_multipath_hash_rand_seed));
+}
+
+static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
+{
+ struct sysctl_fib_multipath_hash_seed new = {
+ .user_seed = user_seed,
+ .mp_seed = (user_seed ? user_seed :
+ proc_fib_multipath_hash_rand_seed),
+ };
+
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
+}
+
+static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct sysctl_fib_multipath_hash_seed *mphs;
+ struct net *net = table->data;
+ struct ctl_table tmp;
+ u32 user_seed;
+ int ret;
+
+ mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
+ user_seed = mphs->user_seed;
+
+ tmp = *table;
+ tmp.data = &user_seed;
+
+ ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ proc_fib_multipath_hash_set_seed(net, user_seed);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
+ }
+
+ return ret;
+}
+#else
+
+static void proc_fib_multipath_hash_init_rand_seed(void)
+{
+}
+
+static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
+{
+}
+
#endif
static struct ctl_table ipv4_table[] = {
@@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &fib_multipath_hash_fields_all_mask,
},
+ {
+ .procname = "fib_multipath_hash_seed",
+ .data = &init_net,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_fib_multipath_hash_seed,
+ },
#endif
{
.procname = "ip_unprivileged_port_start",
@@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!net->ipv4.sysctl_local_reserved_ports)
goto err_ports;
+ proc_fib_multipath_hash_set_seed(net, 0);
+
return 0;
err_ports:
@@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void)
if (!hdr)
return -ENOMEM;
+ proc_fib_multipath_hash_init_rand_seed();
+
if (register_pernet_subsys(&ipv4_sysctl_ops)) {
unregister_net_sysctl_table(hdr);
return -ENOMEM;