diff options
Diffstat (limited to 'drivers/net/netdevsim')
| -rw-r--r-- | drivers/net/netdevsim/Makefile | 18 | ||||
| -rw-r--r-- | drivers/net/netdevsim/bpf.c | 151 | ||||
| -rw-r--r-- | drivers/net/netdevsim/bus.c | 532 | ||||
| -rw-r--r-- | drivers/net/netdevsim/dev.c | 1885 | ||||
| -rw-r--r-- | drivers/net/netdevsim/devlink.c | 295 | ||||
| -rw-r--r-- | drivers/net/netdevsim/ethtool.c | 273 | ||||
| -rw-r--r-- | drivers/net/netdevsim/fib.c | 1580 | ||||
| -rw-r--r-- | drivers/net/netdevsim/health.c | 231 | ||||
| -rw-r--r-- | drivers/net/netdevsim/hwstats.c | 480 | ||||
| -rw-r--r-- | drivers/net/netdevsim/ipsec.c | 90 | ||||
| -rw-r--r-- | drivers/net/netdevsim/macsec.c | 351 | ||||
| -rw-r--r-- | drivers/net/netdevsim/netdev.c | 1332 | ||||
| -rw-r--r-- | drivers/net/netdevsim/netdevsim.h | 378 | ||||
| -rw-r--r-- | drivers/net/netdevsim/psample.c | 265 | ||||
| -rw-r--r-- | drivers/net/netdevsim/psp.c | 252 | ||||
| -rw-r--r-- | drivers/net/netdevsim/udp_tunnels.c | 210 |
16 files changed, 7411 insertions, 912 deletions
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index 0fee1d06c084..14a553e000ec 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -3,17 +3,25 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ - netdev.o \ + netdev.o dev.o ethtool.o fib.o bus.o health.o hwstats.o udp_tunnels.o ifeq ($(CONFIG_BPF_SYSCALL),y) netdevsim-objs += \ bpf.o endif -ifneq ($(CONFIG_NET_DEVLINK),) -netdevsim-objs += devlink.o fib.o -endif - ifneq ($(CONFIG_XFRM_OFFLOAD),) netdevsim-objs += ipsec.o endif + +ifneq ($(CONFIG_PSAMPLE),) +netdevsim-objs += psample.o +endif + +ifneq ($(CONFIG_INET_PSP),) +netdevsim-objs += psp.o +endif + +ifneq ($(CONFIG_MACSEC),) +netdevsim-objs += macsec.o +endif diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 172b271c8bd2..49537d3c4120 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -27,7 +27,7 @@ bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__) struct nsim_bpf_bound_prog { - struct netdevsim *ns; + struct nsim_dev *nsim_dev; struct bpf_prog *prog; struct dentry *ddir; const char *state; @@ -63,15 +63,20 @@ static int nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) { struct nsim_bpf_bound_prog *state; + int ret = 0; state = env->prog->aux->offload->dev_priv; - if (state->ns->bpf_bind_verifier_delay && !insn_idx) - msleep(state->ns->bpf_bind_verifier_delay); + if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx) + msleep(state->nsim_dev->bpf_bind_verifier_delay); - if (insn_idx == env->prog->len - 1) + if (insn_idx == env->prog->len - 1) { pr_vlog(env, "Hello from netdevsim!\n"); - return 0; + if (!state->nsim_dev->bpf_bind_verifier_accept) + ret = -EOPNOTSUPP; + } + + return ret; } static int nsim_bpf_finalize(struct bpf_verifier_env *env) @@ -88,7 +93,7 @@ static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded) { struct nsim_bpf_bound_prog *state; - if (!prog || !prog->aux->offload) + if (!prog || !bpf_prog_is_offloaded(prog->aux)) return; state = prog->aux->offload->dev_priv; @@ -190,9 +195,6 @@ nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf, { int err; - if (!xdp_attachment_flags_ok(xdp, bpf)) - return -EBUSY; - if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS"); return -EOPNOTSUPP; @@ -213,25 +215,28 @@ nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf, return 0; } -static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) +static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev, + struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; char name[16]; + int ret; state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; - state->ns = ns; + state->nsim_dev = nsim_dev; state->prog = prog; state->state = "verify"; /* Program id is not populated yet when we create the state. */ - sprintf(name, "%u", ns->sdev->prog_id_gen++); - state->ddir = debugfs_create_dir(name, ns->sdev->ddir_bpf_bound_progs); - if (IS_ERR_OR_NULL(state->ddir)) { + sprintf(name, "%u", nsim_dev->prog_id_gen++); + state->ddir = debugfs_create_dir(name, nsim_dev->ddir_bpf_bound_progs); + if (IS_ERR(state->ddir)) { + ret = PTR_ERR(state->ddir); kfree(state); - return -ENOMEM; + return ret; } debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id); @@ -239,7 +244,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) &state->state, &nsim_bpf_string_fops); debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded); - list_add_tail(&state->l, &ns->sdev->bpf_bound_progs); + list_add_tail(&state->l, &nsim_dev->bpf_bound_progs); prog->aux->offload->dev_priv = state; @@ -248,12 +253,13 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) static int nsim_bpf_verifier_prep(struct bpf_prog *prog) { - struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev); + struct nsim_dev *nsim_dev = + bpf_offload_dev_priv(prog->aux->offload->offdev); - if (!ns->bpf_bind_accept) + if (!nsim_dev->bpf_bind_accept) return -EOPNOTSUPP; - return nsim_bpf_create_prog(ns, prog); + return nsim_bpf_create_prog(nsim_dev, prog); } static int nsim_bpf_translate(struct bpf_prog *prog) @@ -290,7 +296,8 @@ static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) NSIM_EA(bpf->extack, "attempt to load offloaded prog to drv"); return -EINVAL; } - if (ns->netdev->mtu > NSIM_XDP_MAX_MTU) { + if (bpf->prog && !bpf->prog->aux->xdp_has_frags && + ns->netdev->mtu > NSIM_XDP_MAX_MTU) { NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled"); return -EINVAL; } @@ -305,14 +312,10 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) if (!bpf->prog) return 0; - if (!bpf->prog->aux->offload) { + if (!bpf_prog_is_offloaded(bpf->prog->aux)) { NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); return -EINVAL; } - if (!bpf_offload_dev_match(bpf->prog, ns->netdev)) { - NSIM_EA(bpf->extack, "program bound to different dev"); - return -EINVAL; - } state = bpf->prog->aux->offload->dev_priv; if (WARN_ON(strcmp(state->state, "xlated"))) { @@ -345,10 +348,12 @@ nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; - nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); + nmap->entry[idx].key = kmalloc(offmap->map.key_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].key) return -ENOMEM; - nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); + nmap->entry[idx].value = kmalloc(offmap->map.value_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].value) { kfree(nmap->entry[idx].key); nmap->entry[idx].key = NULL; @@ -490,7 +495,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) if (offmap->map.map_flags) return -EINVAL; - nmap = kzalloc(sizeof(*nmap), GFP_USER); + nmap = kzalloc(sizeof(*nmap), GFP_KERNEL_ACCOUNT); if (!nmap) return -ENOMEM; @@ -508,11 +513,12 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) goto err_free; key = nmap->entry[i].key; *key = i; + memset(nmap->entry[i].value, 0, offmap->map.value_size); } } offmap->dev_ops = &nsim_bpf_map_ops; - list_add_tail(&nmap->l, &ns->sdev->bpf_bound_maps); + list_add_tail(&nmap->l, &ns->nsim_dev->bpf_bound_maps); return 0; @@ -547,10 +553,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case XDP_QUERY_PROG: - return xdp_attachment_query(&ns->xdp, bpf); - case XDP_QUERY_PROG_HW: - return xdp_attachment_query(&ns->xdp_hw, bpf); case XDP_SETUP_PROG: err = nsim_setup_prog_checks(ns, bpf); if (err) @@ -576,60 +578,71 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) } } -int nsim_bpf_init(struct netdevsim *ns) +int nsim_bpf_dev_init(struct nsim_dev *nsim_dev) { int err; - if (ns->sdev->refcnt == 1) { - INIT_LIST_HEAD(&ns->sdev->bpf_bound_progs); - INIT_LIST_HEAD(&ns->sdev->bpf_bound_maps); + INIT_LIST_HEAD(&nsim_dev->bpf_bound_progs); + INIT_LIST_HEAD(&nsim_dev->bpf_bound_maps); - ns->sdev->ddir_bpf_bound_progs = - debugfs_create_dir("bpf_bound_progs", ns->sdev->ddir); - if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs)) - return -ENOMEM; + nsim_dev->ddir_bpf_bound_progs = debugfs_create_dir("bpf_bound_progs", + nsim_dev->ddir); + if (IS_ERR(nsim_dev->ddir_bpf_bound_progs)) + return PTR_ERR(nsim_dev->ddir_bpf_bound_progs); - ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops); - err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev); - if (err) - return err; - } + nsim_dev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, nsim_dev); + err = PTR_ERR_OR_ZERO(nsim_dev->bpf_dev); + if (err) + return err; + + nsim_dev->bpf_bind_accept = true; + debugfs_create_bool("bpf_bind_accept", 0600, nsim_dev->ddir, + &nsim_dev->bpf_bind_accept); + debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir, + &nsim_dev->bpf_bind_verifier_delay); + nsim_dev->bpf_bind_verifier_accept = true; + debugfs_create_bool("bpf_bind_verifier_accept", 0600, nsim_dev->ddir, + &nsim_dev->bpf_bind_verifier_accept); + return 0; +} + +void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev) +{ + WARN_ON(!list_empty(&nsim_dev->bpf_bound_progs)); + WARN_ON(!list_empty(&nsim_dev->bpf_bound_maps)); + bpf_offload_dev_destroy(nsim_dev->bpf_dev); +} + +int nsim_bpf_init(struct netdevsim *ns) +{ + struct dentry *ddir = ns->nsim_dev_port->ddir; + int err; - err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev); + err = bpf_offload_dev_netdev_register(ns->nsim_dev->bpf_dev, + ns->netdev); if (err) - goto err_destroy_bdev; + return err; - debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir, + debugfs_create_u32("bpf_offloaded_id", 0400, ddir, &ns->bpf_offloaded_id); - ns->bpf_bind_accept = true; - debugfs_create_bool("bpf_bind_accept", 0600, ns->ddir, - &ns->bpf_bind_accept); - debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir, - &ns->bpf_bind_verifier_delay); - ns->bpf_tc_accept = true; - debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir, + debugfs_create_bool("bpf_tc_accept", 0600, ddir, &ns->bpf_tc_accept); - debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ns->ddir, + debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ddir, &ns->bpf_tc_non_bound_accept); ns->bpf_xdpdrv_accept = true; - debugfs_create_bool("bpf_xdpdrv_accept", 0600, ns->ddir, + debugfs_create_bool("bpf_xdpdrv_accept", 0600, ddir, &ns->bpf_xdpdrv_accept); ns->bpf_xdpoffload_accept = true; - debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir, + debugfs_create_bool("bpf_xdpoffload_accept", 0600, ddir, &ns->bpf_xdpoffload_accept); ns->bpf_map_accept = true; - debugfs_create_bool("bpf_map_accept", 0600, ns->ddir, + debugfs_create_bool("bpf_map_accept", 0600, ddir, &ns->bpf_map_accept); return 0; - -err_destroy_bdev: - if (ns->sdev->refcnt == 1) - bpf_offload_dev_destroy(ns->sdev->bpf_dev); - return err; } void nsim_bpf_uninit(struct netdevsim *ns) @@ -637,11 +650,5 @@ void nsim_bpf_uninit(struct netdevsim *ns) WARN_ON(ns->xdp.prog); WARN_ON(ns->xdp_hw.prog); WARN_ON(ns->bpf_offloaded); - bpf_offload_dev_netdev_unregister(ns->sdev->bpf_dev, ns->netdev); - - if (ns->sdev->refcnt == 1) { - WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs)); - WARN_ON(!list_empty(&ns->sdev->bpf_bound_maps)); - bpf_offload_dev_destroy(ns->sdev->bpf_dev); - } + bpf_offload_dev_netdev_unregister(ns->nsim_dev->bpf_dev, ns->netdev); } diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c new file mode 100644 index 000000000000..70e8c38ddad6 --- /dev/null +++ b/drivers/net/netdevsim/bus.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2017 Netronome Systems, Inc. + * Copyright (C) 2019 Mellanox Technologies. All rights reserved + */ + +#include <linux/completion.h> +#include <linux/device.h> +#include <linux/idr.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/refcount.h> +#include <linux/slab.h> +#include <linux/sysfs.h> + +#include "netdevsim.h" + +static DEFINE_IDA(nsim_bus_dev_ids); +static LIST_HEAD(nsim_bus_dev_list); +static DEFINE_MUTEX(nsim_bus_dev_list_lock); +static bool nsim_bus_enable; +static refcount_t nsim_bus_devs; /* Including the bus itself. */ +static DECLARE_COMPLETION(nsim_bus_devs_released); + +static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev) +{ + return container_of(dev, struct nsim_bus_dev, dev); +} + +static ssize_t +nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + unsigned int num_vfs; + int ret; + + ret = kstrtouint(buf, 0, &num_vfs); + if (ret) + return ret; + + device_lock(dev); + ret = -ENOENT; + if (dev_get_drvdata(dev)) + ret = nsim_drv_configure_vfs(nsim_bus_dev, num_vfs); + device_unlock(dev); + + return ret ? ret : count; +} + +static ssize_t +nsim_bus_dev_numvfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + + return sprintf(buf, "%u\n", nsim_bus_dev->num_vfs); +} + +static struct device_attribute nsim_bus_dev_numvfs_attr = + __ATTR(sriov_numvfs, 0664, nsim_bus_dev_numvfs_show, + nsim_bus_dev_numvfs_store); + +static ssize_t +new_port_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + u8 eth_addr[ETH_ALEN] = {}; + unsigned int port_index; + bool addr_set = false; + int ret; + + /* Prevent to use nsim_bus_dev before initialization. */ + if (!smp_load_acquire(&nsim_bus_dev->init)) + return -EBUSY; + + ret = sscanf(buf, "%u %hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &port_index, + ð_addr[0], ð_addr[1], ð_addr[2], ð_addr[3], + ð_addr[4], ð_addr[5]); + switch (ret) { + case 7: + if (!is_valid_ether_addr(eth_addr)) { + pr_err("The supplied perm_addr is not a valid MAC address\n"); + return -EINVAL; + } + addr_set = true; + fallthrough; + case 1: + break; + default: + pr_err("Format for adding new port is \"id [perm_addr]\" (uint MAC).\n"); + return -EINVAL; + } + + ret = nsim_drv_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index, + addr_set ? eth_addr : NULL); + return ret ? ret : count; +} + +static struct device_attribute nsim_bus_dev_new_port_attr = __ATTR_WO(new_port); + +static ssize_t +del_port_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + unsigned int port_index; + int ret; + + /* Prevent to use nsim_bus_dev before initialization. */ + if (!smp_load_acquire(&nsim_bus_dev->init)) + return -EBUSY; + ret = kstrtouint(buf, 0, &port_index); + if (ret) + return ret; + + ret = nsim_drv_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); + return ret ? ret : count; +} + +static struct device_attribute nsim_bus_dev_del_port_attr = __ATTR_WO(del_port); + +static struct attribute *nsim_bus_dev_attrs[] = { + &nsim_bus_dev_numvfs_attr.attr, + &nsim_bus_dev_new_port_attr.attr, + &nsim_bus_dev_del_port_attr.attr, + NULL, +}; + +static const struct attribute_group nsim_bus_dev_attr_group = { + .attrs = nsim_bus_dev_attrs, +}; + +static const struct attribute_group *nsim_bus_dev_attr_groups[] = { + &nsim_bus_dev_attr_group, + NULL, +}; + +static void nsim_bus_dev_release(struct device *dev) +{ + struct nsim_bus_dev *nsim_bus_dev; + + nsim_bus_dev = container_of(dev, struct nsim_bus_dev, dev); + kfree(nsim_bus_dev); + if (refcount_dec_and_test(&nsim_bus_devs)) + complete(&nsim_bus_devs_released); +} + +static const struct device_type nsim_bus_dev_type = { + .groups = nsim_bus_dev_attr_groups, + .release = nsim_bus_dev_release, +}; + +static struct nsim_bus_dev * +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues); + +static ssize_t +new_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + unsigned int id, port_count, num_queues; + struct nsim_bus_dev *nsim_bus_dev; + int err; + + err = sscanf(buf, "%u %u %u", &id, &port_count, &num_queues); + switch (err) { + case 1: + port_count = 1; + fallthrough; + case 2: + num_queues = 1; + fallthrough; + case 3: + if (id > INT_MAX) { + pr_err("Value of \"id\" is too big.\n"); + return -EINVAL; + } + break; + default: + pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n"); + return -EINVAL; + } + + mutex_lock(&nsim_bus_dev_list_lock); + /* Prevent to use resource before initialization. */ + if (!smp_load_acquire(&nsim_bus_enable)) { + err = -EBUSY; + goto err; + } + + nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues); + if (IS_ERR(nsim_bus_dev)) { + err = PTR_ERR(nsim_bus_dev); + goto err; + } + + refcount_inc(&nsim_bus_devs); + /* Allow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, true); + + list_add_tail(&nsim_bus_dev->list, &nsim_bus_dev_list); + mutex_unlock(&nsim_bus_dev_list_lock); + + return count; +err: + mutex_unlock(&nsim_bus_dev_list_lock); + return err; +} +static BUS_ATTR_WO(new_device); + +static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev); + +static ssize_t +del_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct nsim_bus_dev *nsim_bus_dev, *tmp; + unsigned int id; + int err; + + err = sscanf(buf, "%u", &id); + switch (err) { + case 1: + if (id > INT_MAX) { + pr_err("Value of \"id\" is too big.\n"); + return -EINVAL; + } + break; + default: + pr_err("Format for deleting device is \"id\" (uint).\n"); + return -EINVAL; + } + + err = -ENOENT; + mutex_lock(&nsim_bus_dev_list_lock); + /* Prevent to use resource before initialization. */ + if (!smp_load_acquire(&nsim_bus_enable)) { + mutex_unlock(&nsim_bus_dev_list_lock); + return -EBUSY; + } + list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { + if (nsim_bus_dev->dev.id != id) + continue; + list_del(&nsim_bus_dev->list); + nsim_bus_dev_del(nsim_bus_dev); + err = 0; + break; + } + mutex_unlock(&nsim_bus_dev_list_lock); + return !err ? count : err; +} +static BUS_ATTR_WO(del_device); + +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct netdevsim *nsim_a, *nsim_b, *peer; + struct net_device *dev_a, *dev_b; + unsigned int ifidx_a, ifidx_b; + int netnsfd_a, netnsfd_b, err; + struct net *ns_a, *ns_b; + + err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b, + &ifidx_b); + if (err != 4) { + pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n"); + return -EINVAL; + } + + ns_a = get_net_ns_by_fd(netnsfd_a); + if (IS_ERR(ns_a)) { + pr_err("Could not find netns with fd: %d\n", netnsfd_a); + return -EINVAL; + } + + ns_b = get_net_ns_by_fd(netnsfd_b); + if (IS_ERR(ns_b)) { + pr_err("Could not find netns with fd: %d\n", netnsfd_b); + put_net(ns_a); + return -EINVAL; + } + + err = -EINVAL; + rtnl_lock(); + dev_a = __dev_get_by_index(ns_a, ifidx_a); + if (!dev_a) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx_a, netnsfd_a); + goto out_err; + } + + if (!netdev_is_nsim(dev_a)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx_a, netnsfd_a); + goto out_err; + } + + dev_b = __dev_get_by_index(ns_b, ifidx_b); + if (!dev_b) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx_b, netnsfd_b); + goto out_err; + } + + if (!netdev_is_nsim(dev_b)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx_b, netnsfd_b); + goto out_err; + } + + if (dev_a == dev_b) { + pr_err("Cannot link a netdevsim to itself\n"); + goto out_err; + } + + err = -EBUSY; + nsim_a = netdev_priv(dev_a); + peer = rtnl_dereference(nsim_a->peer); + if (peer) { + pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a, + ifidx_a); + goto out_err; + } + + nsim_b = netdev_priv(dev_b); + peer = rtnl_dereference(nsim_b->peer); + if (peer) { + pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b, + ifidx_b); + goto out_err; + } + + err = 0; + rcu_assign_pointer(nsim_a->peer, nsim_b); + rcu_assign_pointer(nsim_b->peer, nsim_a); + +out_err: + put_net(ns_b); + put_net(ns_a); + rtnl_unlock(); + + return !err ? count : err; +} +static BUS_ATTR_WO(link_device); + +static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct netdevsim *nsim, *peer; + struct net_device *dev; + unsigned int ifidx; + int netnsfd, err; + struct net *ns; + + err = sscanf(buf, "%u:%u", &netnsfd, &ifidx); + if (err != 2) { + pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n"); + return -EINVAL; + } + + ns = get_net_ns_by_fd(netnsfd); + if (IS_ERR(ns)) { + pr_err("Could not find netns with fd: %d\n", netnsfd); + return -EINVAL; + } + + err = -EINVAL; + rtnl_lock(); + dev = __dev_get_by_index(ns, ifidx); + if (!dev) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx, netnsfd); + goto out_put_netns; + } + + if (!netdev_is_nsim(dev)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx, netnsfd); + goto out_put_netns; + } + + nsim = netdev_priv(dev); + peer = rtnl_dereference(nsim->peer); + if (!peer) + goto out_put_netns; + + err = 0; + RCU_INIT_POINTER(nsim->peer, NULL); + RCU_INIT_POINTER(peer->peer, NULL); + synchronize_net(); + netif_tx_wake_all_queues(dev); + netif_tx_wake_all_queues(peer->netdev); + +out_put_netns: + put_net(ns); + rtnl_unlock(); + + return !err ? count : err; +} +static BUS_ATTR_WO(unlink_device); + +static struct attribute *nsim_bus_attrs[] = { + &bus_attr_new_device.attr, + &bus_attr_del_device.attr, + &bus_attr_link_device.attr, + &bus_attr_unlink_device.attr, + NULL +}; +ATTRIBUTE_GROUPS(nsim_bus); + +static int nsim_bus_probe(struct device *dev) +{ + struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + + return nsim_drv_probe(nsim_bus_dev); +} + +static void nsim_bus_remove(struct device *dev) +{ + struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + + nsim_drv_remove(nsim_bus_dev); +} + +static int nsim_num_vf(struct device *dev) +{ + struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + + return nsim_bus_dev->num_vfs; +} + +static const struct bus_type nsim_bus = { + .name = DRV_NAME, + .dev_name = DRV_NAME, + .bus_groups = nsim_bus_groups, + .probe = nsim_bus_probe, + .remove = nsim_bus_remove, + .num_vf = nsim_num_vf, +}; + +#define NSIM_BUS_DEV_MAX_VFS 4 + +static struct nsim_bus_dev * +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues) +{ + struct nsim_bus_dev *nsim_bus_dev; + int err; + + nsim_bus_dev = kzalloc(sizeof(*nsim_bus_dev), GFP_KERNEL); + if (!nsim_bus_dev) + return ERR_PTR(-ENOMEM); + + err = ida_alloc_range(&nsim_bus_dev_ids, id, id, GFP_KERNEL); + if (err < 0) + goto err_nsim_bus_dev_free; + nsim_bus_dev->dev.id = err; + nsim_bus_dev->dev.bus = &nsim_bus; + nsim_bus_dev->dev.type = &nsim_bus_dev_type; + nsim_bus_dev->port_count = port_count; + nsim_bus_dev->num_queues = num_queues; + nsim_bus_dev->initial_net = current->nsproxy->net_ns; + nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; + /* Disallow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, false); + + err = device_register(&nsim_bus_dev->dev); + if (err) + goto err_nsim_bus_dev_id_free; + + return nsim_bus_dev; + +err_nsim_bus_dev_id_free: + ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); + put_device(&nsim_bus_dev->dev); + nsim_bus_dev = NULL; +err_nsim_bus_dev_free: + kfree(nsim_bus_dev); + return ERR_PTR(err); +} + +static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev) +{ + /* Disallow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, false); + ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); + device_unregister(&nsim_bus_dev->dev); +} + +static struct device_driver nsim_driver = { + .name = DRV_NAME, + .bus = &nsim_bus, + .owner = THIS_MODULE, +}; + +int nsim_bus_init(void) +{ + int err; + + err = bus_register(&nsim_bus); + if (err) + return err; + err = driver_register(&nsim_driver); + if (err) + goto err_bus_unregister; + refcount_set(&nsim_bus_devs, 1); + /* Allow using resources */ + smp_store_release(&nsim_bus_enable, true); + return 0; + +err_bus_unregister: + bus_unregister(&nsim_bus); + return err; +} + +void nsim_bus_exit(void) +{ + struct nsim_bus_dev *nsim_bus_dev, *tmp; + + /* Disallow using resources */ + smp_store_release(&nsim_bus_enable, false); + if (refcount_dec_and_test(&nsim_bus_devs)) + complete(&nsim_bus_devs_released); + + mutex_lock(&nsim_bus_dev_list_lock); + list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { + list_del(&nsim_bus_dev->list); + nsim_bus_dev_del(nsim_bus_dev); + } + mutex_unlock(&nsim_bus_dev_list_lock); + + wait_for_completion(&nsim_bus_devs_released); + + driver_unregister(&nsim_driver); + bus_unregister(&nsim_bus); +} diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c new file mode 100644 index 000000000000..2683a989873e --- /dev/null +++ b/drivers/net/netdevsim/dev.c @@ -0,0 +1,1885 @@ +/* + * Copyright (c) 2018 Cumulus Networks. All rights reserved. + * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com> + * Copyright (c) 2019 Mellanox Technologies. All rights reserved. + * + * This software is licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree. + * + * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE + * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME + * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + */ + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/etherdevice.h> +#include <linux/inet.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> +#include <net/devlink.h> +#include <net/ip.h> +#include <net/flow_offload.h> +#include <uapi/linux/devlink.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/udp.h> + +#include "netdevsim.h" + +static unsigned int +nsim_dev_port_index(enum nsim_dev_port_type type, unsigned int port_index) +{ + switch (type) { + case NSIM_DEV_PORT_TYPE_VF: + port_index = NSIM_DEV_VF_PORT_INDEX_BASE + port_index; + break; + case NSIM_DEV_PORT_TYPE_PF: + break; + } + + return port_index; +} + +static inline unsigned int nsim_dev_port_index_to_vf_index(unsigned int port_index) +{ + return port_index - NSIM_DEV_VF_PORT_INDEX_BASE; +} + +static struct dentry *nsim_dev_ddir; + +unsigned int nsim_dev_get_vfs(struct nsim_dev *nsim_dev) +{ + WARN_ON(!lockdep_rtnl_is_held() && + !devl_lock_is_held(priv_to_devlink(nsim_dev))); + + return nsim_dev->nsim_bus_dev->num_vfs; +} + +static void +nsim_bus_dev_set_vfs(struct nsim_bus_dev *nsim_bus_dev, unsigned int num_vfs) +{ + rtnl_lock(); + nsim_bus_dev->num_vfs = num_vfs; + rtnl_unlock(); +} + +#define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32) + +static int +nsim_dev_take_snapshot(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u8 **data) +{ + void *dummy_data; + + dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL); + if (!dummy_data) + return -ENOMEM; + + get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE); + + *data = dummy_data; + + return 0; +} + +static ssize_t nsim_dev_take_snapshot_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev *nsim_dev = file->private_data; + struct devlink *devlink; + u8 *dummy_data; + int err; + u32 id; + + devlink = priv_to_devlink(nsim_dev); + + err = nsim_dev_take_snapshot(devlink, NULL, NULL, &dummy_data); + if (err) + return err; + + err = devlink_region_snapshot_id_get(devlink, &id); + if (err) { + pr_err("Failed to get snapshot id\n"); + kfree(dummy_data); + return err; + } + err = devlink_region_snapshot_create(nsim_dev->dummy_region, + dummy_data, id); + devlink_region_snapshot_id_put(devlink, id); + if (err) { + pr_err("Failed to create region snapshot\n"); + kfree(dummy_data); + return err; + } + + return count; +} + +static const struct file_operations nsim_dev_take_snapshot_fops = { + .open = simple_open, + .write = nsim_dev_take_snapshot_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +static ssize_t nsim_dev_trap_fa_cookie_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev *nsim_dev = file->private_data; + struct flow_action_cookie *fa_cookie; + unsigned int buf_len; + ssize_t ret; + char *buf; + + spin_lock(&nsim_dev->fa_cookie_lock); + fa_cookie = nsim_dev->fa_cookie; + if (!fa_cookie) { + ret = -EINVAL; + goto errout; + } + buf_len = fa_cookie->cookie_len * 2; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (!buf) { + ret = -ENOMEM; + goto errout; + } + bin2hex(buf, fa_cookie->cookie, fa_cookie->cookie_len); + spin_unlock(&nsim_dev->fa_cookie_lock); + + ret = simple_read_from_buffer(data, count, ppos, buf, buf_len); + + kfree(buf); + return ret; + +errout: + spin_unlock(&nsim_dev->fa_cookie_lock); + return ret; +} + +static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev *nsim_dev = file->private_data; + struct flow_action_cookie *fa_cookie; + size_t cookie_len; + ssize_t ret; + char *buf; + + if (*ppos != 0) + return -EINVAL; + cookie_len = (count - 1) / 2; + if ((count - 1) % 2) + return -EINVAL; + + buf = memdup_user(data, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len, + GFP_KERNEL | __GFP_NOWARN); + if (!fa_cookie) { + ret = -ENOMEM; + goto free_buf; + } + + fa_cookie->cookie_len = cookie_len; + ret = hex2bin(fa_cookie->cookie, buf, cookie_len); + if (ret) + goto free_fa_cookie; + kfree(buf); + + spin_lock(&nsim_dev->fa_cookie_lock); + kfree(nsim_dev->fa_cookie); + nsim_dev->fa_cookie = fa_cookie; + spin_unlock(&nsim_dev->fa_cookie_lock); + + return count; + +free_fa_cookie: + kfree(fa_cookie); +free_buf: + kfree(buf); + return ret; +} + +static const struct file_operations nsim_dev_trap_fa_cookie_fops = { + .open = simple_open, + .read = nsim_dev_trap_fa_cookie_read, + .write = nsim_dev_trap_fa_cookie_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +static ssize_t nsim_bus_dev_max_vfs_read(struct file *file, char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev *nsim_dev = file->private_data; + char buf[11]; + ssize_t len; + + len = scnprintf(buf, sizeof(buf), "%u\n", + READ_ONCE(nsim_dev->nsim_bus_dev->max_vfs)); + + return simple_read_from_buffer(data, count, ppos, buf, len); +} + +static ssize_t nsim_bus_dev_max_vfs_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_vf_config *vfconfigs; + struct nsim_dev *nsim_dev; + char buf[10]; + ssize_t ret; + u32 val; + + if (*ppos != 0) + return 0; + + if (count >= sizeof(buf)) + return -ENOSPC; + + ret = copy_from_user(buf, data, count); + if (ret) + return -EFAULT; + buf[count] = '\0'; + + ret = kstrtouint(buf, 10, &val); + if (ret) + return -EINVAL; + + /* max_vfs limited by the maximum number of provided port indexes */ + if (val > NSIM_DEV_VF_PORT_INDEX_MAX - NSIM_DEV_VF_PORT_INDEX_BASE) + return -ERANGE; + + vfconfigs = kcalloc(val, sizeof(struct nsim_vf_config), + GFP_KERNEL | __GFP_NOWARN); + if (!vfconfigs) + return -ENOMEM; + + nsim_dev = file->private_data; + devl_lock(priv_to_devlink(nsim_dev)); + /* Reject if VFs are configured */ + if (nsim_dev_get_vfs(nsim_dev)) { + ret = -EBUSY; + } else { + swap(nsim_dev->vfconfigs, vfconfigs); + WRITE_ONCE(nsim_dev->nsim_bus_dev->max_vfs, val); + *ppos += count; + ret = count; + } + devl_unlock(priv_to_devlink(nsim_dev)); + + kfree(vfconfigs); + return ret; +} + +static const struct file_operations nsim_dev_max_vfs_fops = { + .open = simple_open, + .read = nsim_bus_dev_max_vfs_read, + .write = nsim_bus_dev_max_vfs_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) +{ + char dev_ddir_name[sizeof(DRV_NAME) + 10]; + int err; + + sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); + nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); + if (IS_ERR(nsim_dev->ddir)) + return PTR_ERR(nsim_dev->ddir); + nsim_dev->ports_ddir = debugfs_create_dir("ports", nsim_dev->ddir); + if (IS_ERR(nsim_dev->ports_ddir)) { + err = PTR_ERR(nsim_dev->ports_ddir); + goto err_ddir; + } + debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir, + &nsim_dev->fw_update_status); + debugfs_create_u32("fw_update_overwrite_mask", 0600, nsim_dev->ddir, + &nsim_dev->fw_update_overwrite_mask); + debugfs_create_u32("fw_update_flash_chunk_time_ms", 0600, nsim_dev->ddir, + &nsim_dev->fw_update_flash_chunk_time_ms); + debugfs_create_u32("max_macs", 0600, nsim_dev->ddir, + &nsim_dev->max_macs); + debugfs_create_bool("test1", 0600, nsim_dev->ddir, + &nsim_dev->test1); + debugfs_create_u32("test2", 0600, nsim_dev->ddir, + &nsim_dev->test2); + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", + 0200, + nsim_dev->ddir, + nsim_dev, + &nsim_dev_take_snapshot_fops); + debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir, + &nsim_dev->dont_allow_reload); + debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir, + &nsim_dev->fail_reload); + debugfs_create_file("trap_flow_action_cookie", 0600, nsim_dev->ddir, + nsim_dev, &nsim_dev_trap_fa_cookie_fops); + debugfs_create_bool("fail_trap_group_set", 0600, + nsim_dev->ddir, + &nsim_dev->fail_trap_group_set); + debugfs_create_bool("fail_trap_policer_set", 0600, + nsim_dev->ddir, + &nsim_dev->fail_trap_policer_set); + debugfs_create_bool("fail_trap_policer_counter_get", 0600, + nsim_dev->ddir, + &nsim_dev->fail_trap_policer_counter_get); + /* caution, dev_max_vfs write takes devlink lock */ + debugfs_create_file("max_vfs", 0600, nsim_dev->ddir, + nsim_dev, &nsim_dev_max_vfs_fops); + + nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir); + if (IS_ERR(nsim_dev->nodes_ddir)) { + err = PTR_ERR(nsim_dev->nodes_ddir); + goto err_ports_ddir; + } + debugfs_create_bool("fail_trap_drop_counter_get", 0600, + nsim_dev->ddir, + &nsim_dev->fail_trap_drop_counter_get); + nsim_udp_tunnels_debugfs_create(nsim_dev); + return 0; + +err_ports_ddir: + debugfs_remove_recursive(nsim_dev->ports_ddir); +err_ddir: + debugfs_remove_recursive(nsim_dev->ddir); + return err; +} + +static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev) +{ + debugfs_remove_recursive(nsim_dev->nodes_ddir); + debugfs_remove_recursive(nsim_dev->ports_ddir); + debugfs_remove_recursive(nsim_dev->ddir); +} + +static ssize_t nsim_dev_rate_parent_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + char **name_ptr = file->private_data; + size_t len; + + if (!*name_ptr) + return 0; + + len = strlen(*name_ptr); + return simple_read_from_buffer(data, count, ppos, *name_ptr, len); +} + +static const struct file_operations nsim_dev_rate_parent_fops = { + .open = simple_open, + .read = nsim_dev_rate_parent_read, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +static void nsim_dev_tc_bw_debugfs_init(struct dentry *ddir, u32 *tc_bw) +{ + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) { + char name[16]; + + snprintf(name, sizeof(name), "tc%d_bw", i); + debugfs_create_u32(name, 0400, ddir, &tc_bw[i]); + } +} +static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, + struct nsim_dev_port *nsim_dev_port) +{ + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + unsigned int port_index = nsim_dev_port->port_index; + char port_ddir_name[16]; + char dev_link_name[32]; + + sprintf(port_ddir_name, "%u", port_index); + nsim_dev_port->ddir = debugfs_create_dir(port_ddir_name, + nsim_dev->ports_ddir); + if (IS_ERR(nsim_dev_port->ddir)) + return PTR_ERR(nsim_dev_port->ddir); + + sprintf(dev_link_name, "../../../" DRV_NAME "%u", nsim_bus_dev->dev.id); + if (nsim_dev_port_is_vf(nsim_dev_port)) { + unsigned int vf_id = nsim_dev_port_index_to_vf_index(port_index); + + debugfs_create_u16("tx_share", 0400, nsim_dev_port->ddir, + &nsim_dev->vfconfigs[vf_id].min_tx_rate); + debugfs_create_u16("tx_max", 0400, nsim_dev_port->ddir, + &nsim_dev->vfconfigs[vf_id].max_tx_rate); + nsim_dev_port->rate_parent = debugfs_create_file("rate_parent", + 0400, + nsim_dev_port->ddir, + &nsim_dev_port->parent_name, + &nsim_dev_rate_parent_fops); + nsim_dev_tc_bw_debugfs_init(nsim_dev_port->ddir, + nsim_dev_port->tc_bw); + } + debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); + + return 0; +} + +static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port) +{ + debugfs_remove_recursive(nsim_dev_port->ddir); +} + +static int nsim_dev_resources_register(struct devlink *devlink) +{ + struct devlink_resource_size_params params = { + .size_max = (u64)-1, + .size_granularity = 1, + .unit = DEVLINK_RESOURCE_UNIT_ENTRY + }; + int err; + + /* Resources for IPv4 */ + err = devl_resource_register(devlink, "IPv4", (u64)-1, + NSIM_RESOURCE_IPV4, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); + if (err) { + pr_err("Failed to register IPv4 top resource\n"); + goto err_out; + } + + err = devl_resource_register(devlink, "fib", (u64)-1, + NSIM_RESOURCE_IPV4_FIB, + NSIM_RESOURCE_IPV4, ¶ms); + if (err) { + pr_err("Failed to register IPv4 FIB resource\n"); + goto err_out; + } + + err = devl_resource_register(devlink, "fib-rules", (u64)-1, + NSIM_RESOURCE_IPV4_FIB_RULES, + NSIM_RESOURCE_IPV4, ¶ms); + if (err) { + pr_err("Failed to register IPv4 FIB rules resource\n"); + goto err_out; + } + + /* Resources for IPv6 */ + err = devl_resource_register(devlink, "IPv6", (u64)-1, + NSIM_RESOURCE_IPV6, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); + if (err) { + pr_err("Failed to register IPv6 top resource\n"); + goto err_out; + } + + err = devl_resource_register(devlink, "fib", (u64)-1, + NSIM_RESOURCE_IPV6_FIB, + NSIM_RESOURCE_IPV6, ¶ms); + if (err) { + pr_err("Failed to register IPv6 FIB resource\n"); + goto err_out; + } + + err = devl_resource_register(devlink, "fib-rules", (u64)-1, + NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_IPV6, ¶ms); + if (err) { + pr_err("Failed to register IPv6 FIB rules resource\n"); + goto err_out; + } + + /* Resources for nexthops */ + err = devl_resource_register(devlink, "nexthops", (u64)-1, + NSIM_RESOURCE_NEXTHOPS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); + if (err) { + pr_err("Failed to register NEXTHOPS resource\n"); + goto err_out; + } + return 0; + +err_out: + devl_resources_unregister(devlink); + return err; +} + +enum nsim_devlink_param_id { + NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + NSIM_DEVLINK_PARAM_ID_TEST1, + NSIM_DEVLINK_PARAM_ID_TEST2, +}; + +static int +nsim_devlink_param_test2_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + ctx->val.vu32 = nsim_dev->test2; + return 0; +} + +static int +nsim_devlink_param_test2_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + nsim_dev->test2 = ctx->val.vu32; + return 0; +} + +#define NSIM_DEV_TEST2_DEFAULT 1234 + +static int +nsim_devlink_param_test2_get_default(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + ctx->val.vu32 = NSIM_DEV_TEST2_DEFAULT; + return 0; +} + +static int +nsim_devlink_param_test2_reset_default(struct devlink *devlink, u32 id, + enum devlink_param_cmode cmode, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + nsim_dev->test2 = NSIM_DEV_TEST2_DEFAULT; + return 0; +} + +static const struct devlink_param nsim_devlink_params[] = { + DEVLINK_PARAM_GENERIC(MAX_MACS, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1, + "test1", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER_WITH_DEFAULTS(NSIM_DEVLINK_PARAM_ID_TEST2, + "test2", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + nsim_devlink_param_test2_get, + nsim_devlink_param_test2_set, + NULL, + nsim_devlink_param_test2_get_default, + nsim_devlink_param_test2_reset_default), +}; + +static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev, + struct devlink *devlink) +{ + union devlink_param_value value; + + value.vu32 = nsim_dev->max_macs; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + value.vbool = nsim_dev->test1; + devl_param_driverinit_value_set(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + value); +} + +static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + union devlink_param_value saved_value; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &saved_value); + if (!err) + nsim_dev->max_macs = saved_value.vu32; + err = devl_param_driverinit_value_get(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + &saved_value); + if (!err) + nsim_dev->test1 = saved_value.vbool; +} + +#define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16 + +static const struct devlink_region_ops dummy_region_ops = { + .name = "dummy", + .destructor = &kfree, + .snapshot = nsim_dev_take_snapshot, +}; + +static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev, + struct devlink *devlink) +{ + nsim_dev->dummy_region = + devl_region_create(devlink, &dummy_region_ops, + NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX, + NSIM_DEV_DUMMY_REGION_SIZE); + return PTR_ERR_OR_ZERO(nsim_dev->dummy_region); +} + +static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) +{ + devl_region_destroy(nsim_dev->dummy_region); +} + +static int +__nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, + unsigned int port_index, u8 perm_addr[ETH_ALEN]); +static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port); + +static int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack) +{ + struct devlink *devlink = priv_to_devlink(nsim_dev); + struct nsim_dev_port *nsim_dev_port, *tmp; + + devl_rate_nodes_destroy(devlink); + list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) + if (nsim_dev_port_is_vf(nsim_dev_port)) + __nsim_dev_port_del(nsim_dev_port); + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; +} + +static int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port, *tmp; + int i, err; + + for (i = 0; i < nsim_dev_get_vfs(nsim_dev); i++) { + err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_VF, i, NULL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize VFs' netdevsim ports"); + pr_err("Failed to initialize VF id=%d. %d.\n", i, err); + goto err_port_add_vfs; + } + } + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + return 0; + +err_port_add_vfs: + list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) + if (nsim_dev_port_is_vf(nsim_dev_port)) + __nsim_dev_port_del(nsim_dev_port); + return err; +} + +static int nsim_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + if (mode == nsim_dev->esw_mode) + return 0; + + if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + return nsim_esw_legacy_enable(nsim_dev, extack); + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + return nsim_esw_switchdev_enable(nsim_dev, extack); + + return -EINVAL; +} + +static int nsim_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + *mode = nsim_dev->esw_mode; + return 0; +} + +struct nsim_trap_item { + void *trap_ctx; + enum devlink_trap_action action; +}; + +struct nsim_trap_data { + struct delayed_work trap_report_dw; + struct nsim_trap_item *trap_items_arr; + u64 *trap_policers_cnt_arr; + u64 trap_pkt_cnt; + struct nsim_dev *nsim_dev; + spinlock_t trap_lock; /* Protects trap_items_arr */ +}; + +/* All driver-specific traps must be documented in + * Documentation/networking/devlink/netdevsim.rst + */ +enum { + NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX, + NSIM_TRAP_ID_FID_MISS, +}; + +#define NSIM_TRAP_NAME_FID_MISS "fid_miss" + +#define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT + +#define NSIM_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + NSIM_TRAP_METADATA) +#define NSIM_TRAP_DROP_EXT(_id, _group_id, _metadata) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + NSIM_TRAP_METADATA | (_metadata)) +#define NSIM_TRAP_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + NSIM_TRAP_METADATA) +#define NSIM_TRAP_CONTROL(_id, _group_id, _action) \ + DEVLINK_TRAP_GENERIC(CONTROL, _action, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + NSIM_TRAP_METADATA) +#define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id, \ + NSIM_TRAP_NAME_##_id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + NSIM_TRAP_METADATA) + +#define NSIM_DEV_TRAP_POLICER_MIN_RATE 1 +#define NSIM_DEV_TRAP_POLICER_MAX_RATE 8000 +#define NSIM_DEV_TRAP_POLICER_MIN_BURST 8 +#define NSIM_DEV_TRAP_POLICER_MAX_BURST 65536 + +#define NSIM_TRAP_POLICER(_id, _rate, _burst) \ + DEVLINK_TRAP_POLICER(_id, _rate, _burst, \ + NSIM_DEV_TRAP_POLICER_MAX_RATE, \ + NSIM_DEV_TRAP_POLICER_MIN_RATE, \ + NSIM_DEV_TRAP_POLICER_MAX_BURST, \ + NSIM_DEV_TRAP_POLICER_MIN_BURST) + +static const struct devlink_trap_policer nsim_trap_policers_arr[] = { + NSIM_TRAP_POLICER(1, 1000, 128), + NSIM_TRAP_POLICER(2, 2000, 256), + NSIM_TRAP_POLICER(3, 3000, 512), +}; + +static const struct devlink_trap_group nsim_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0), + DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1), + DEVLINK_TRAP_GROUP_GENERIC(L3_EXCEPTIONS, 1), + DEVLINK_TRAP_GROUP_GENERIC(BUFFER_DROPS, 2), + DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 3), + DEVLINK_TRAP_GROUP_GENERIC(MC_SNOOPING, 3), +}; + +static const struct devlink_trap nsim_traps_arr[] = { + NSIM_TRAP_DROP(SMAC_MC, L2_DROPS), + NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), + NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), + NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), + NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), + NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS), + NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS), + NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_EXCEPTIONS), + NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS), + NSIM_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + NSIM_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + NSIM_TRAP_CONTROL(IGMP_QUERY, MC_SNOOPING, MIRROR), + NSIM_TRAP_CONTROL(IGMP_V1_REPORT, MC_SNOOPING, TRAP), +}; + +#define NSIM_TRAP_L4_DATA_LEN 100 + +static struct sk_buff *nsim_dev_trap_skb_build(void) +{ + int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN; + struct sk_buff *skb; + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NULL; + tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len; + + skb_reset_mac_header(skb); + eth = skb_put(skb, sizeof(struct ethhdr)); + eth_random_addr(eth->h_dest); + eth_random_addr(eth->h_source); + eth->h_proto = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); + + skb_set_network_header(skb, skb->len); + iph = skb_put(skb, sizeof(struct iphdr)); + iph->protocol = IPPROTO_UDP; + iph->saddr = in_aton("192.0.2.1"); + iph->daddr = in_aton("198.51.100.1"); + iph->version = 0x4; + iph->frag_off = 0; + iph->ihl = 0x5; + iph->tot_len = htons(tot_len); + iph->ttl = 100; + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + skb_set_transport_header(skb, skb->len); + udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len); + get_random_bytes(&udph->source, sizeof(u16)); + get_random_bytes(&udph->dest, sizeof(u16)); + udph->len = htons(sizeof(struct udphdr) + data_len); + + return skb; +} + +static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port) +{ + struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev; + struct devlink *devlink = priv_to_devlink(nsim_dev); + struct nsim_trap_data *nsim_trap_data; + int i; + + nsim_trap_data = nsim_dev->trap_data; + + spin_lock(&nsim_trap_data->trap_lock); + for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) { + struct flow_action_cookie *fa_cookie = NULL; + struct nsim_trap_item *nsim_trap_item; + struct sk_buff *skb; + bool has_fa_cookie; + + has_fa_cookie = nsim_traps_arr[i].metadata_cap & + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE; + + nsim_trap_item = &nsim_trap_data->trap_items_arr[i]; + if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP) + continue; + + skb = nsim_dev_trap_skb_build(); + if (!skb) + continue; + skb->dev = nsim_dev_port->ns->netdev; + + /* Trapped packets are usually passed to devlink in softIRQ, + * but in this case they are generated in a workqueue. Disable + * softIRQs to prevent lockdep from complaining about + * "incosistent lock state". + */ + + spin_lock_bh(&nsim_dev->fa_cookie_lock); + fa_cookie = has_fa_cookie ? nsim_dev->fa_cookie : NULL; + devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx, + &nsim_dev_port->devlink_port, fa_cookie); + spin_unlock_bh(&nsim_dev->fa_cookie_lock); + consume_skb(skb); + } + spin_unlock(&nsim_trap_data->trap_lock); +} + +#define NSIM_TRAP_REPORT_INTERVAL_MS 100 + +static void nsim_dev_trap_report_work(struct work_struct *work) +{ + struct nsim_trap_data *nsim_trap_data; + struct nsim_dev_port *nsim_dev_port; + struct nsim_dev *nsim_dev; + + nsim_trap_data = container_of(work, struct nsim_trap_data, + trap_report_dw.work); + nsim_dev = nsim_trap_data->nsim_dev; + + if (!devl_trylock(priv_to_devlink(nsim_dev))) { + queue_delayed_work(system_dfl_wq, + &nsim_dev->trap_data->trap_report_dw, 1); + return; + } + + /* For each running port and enabled packet trap, generate a UDP + * packet with a random 5-tuple and report it. + */ + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { + if (!netif_running(nsim_dev_port->ns->netdev)) + continue; + + nsim_dev_trap_report(nsim_dev_port); + cond_resched(); + } + devl_unlock(priv_to_devlink(nsim_dev)); + queue_delayed_work(system_dfl_wq, + &nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); +} + +static int nsim_dev_traps_init(struct devlink *devlink) +{ + size_t policers_count = ARRAY_SIZE(nsim_trap_policers_arr); + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_data *nsim_trap_data; + int err; + + nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL); + if (!nsim_trap_data) + return -ENOMEM; + + nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr), + sizeof(struct nsim_trap_item), + GFP_KERNEL); + if (!nsim_trap_data->trap_items_arr) { + err = -ENOMEM; + goto err_trap_data_free; + } + + nsim_trap_data->trap_policers_cnt_arr = kcalloc(policers_count, + sizeof(u64), + GFP_KERNEL); + if (!nsim_trap_data->trap_policers_cnt_arr) { + err = -ENOMEM; + goto err_trap_items_free; + } + + /* The lock is used to protect the action state of the registered + * traps. The value is written by user and read in delayed work when + * iterating over all the traps. + */ + spin_lock_init(&nsim_trap_data->trap_lock); + nsim_trap_data->nsim_dev = nsim_dev; + nsim_dev->trap_data = nsim_trap_data; + + err = devl_trap_policers_register(devlink, nsim_trap_policers_arr, + policers_count); + if (err) + goto err_trap_policers_cnt_free; + + err = devl_trap_groups_register(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); + if (err) + goto err_trap_policers_unregister; + + err = devl_traps_register(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr), NULL); + if (err) + goto err_trap_groups_unregister; + + INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw, + nsim_dev_trap_report_work); + queue_delayed_work(system_dfl_wq, + &nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); + + return 0; + +err_trap_groups_unregister: + devl_trap_groups_unregister(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); +err_trap_policers_unregister: + devl_trap_policers_unregister(devlink, nsim_trap_policers_arr, + ARRAY_SIZE(nsim_trap_policers_arr)); +err_trap_policers_cnt_free: + kfree(nsim_trap_data->trap_policers_cnt_arr); +err_trap_items_free: + kfree(nsim_trap_data->trap_items_arr); +err_trap_data_free: + kfree(nsim_trap_data); + return err; +} + +static void nsim_dev_traps_exit(struct devlink *devlink) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + /* caution, trap work takes devlink lock */ + cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw); + devl_traps_unregister(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr)); + devl_trap_groups_unregister(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); + devl_trap_policers_unregister(devlink, nsim_trap_policers_arr, + ARRAY_SIZE(nsim_trap_policers_arr)); + kfree(nsim_dev->trap_data->trap_policers_cnt_arr); + kfree(nsim_dev->trap_data->trap_items_arr); + kfree(nsim_dev->trap_data); +} + +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack); +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev); + +static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + if (nsim_dev->dont_allow_reload) { + /* For testing purposes, user set debugfs dont_allow_reload + * value to true. So forbid it. + */ + NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); + return -EOPNOTSUPP; + } + + nsim_dev_reload_destroy(nsim_dev); + return 0; +} + +static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + if (nsim_dev->fail_reload) { + /* For testing purposes, user set debugfs fail_reload + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); + return -EINVAL; + } + + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + + return nsim_dev_reload_create(nsim_dev, extack); +} + +static int nsim_dev_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + int err; + + err = devlink_info_version_stored_put_ext(req, "fw.mgmt", "10.20.30", + DEVLINK_INFO_VERSION_TYPE_COMPONENT); + if (err) + return err; + return devlink_info_version_running_put_ext(req, "fw.mgmt", "10.20.30", + DEVLINK_INFO_VERSION_TYPE_COMPONENT); +} + +#define NSIM_DEV_FLASH_SIZE 50000 +#define NSIM_DEV_FLASH_CHUNK_SIZE 1000 +#define NSIM_DEV_FLASH_CHUNK_TIME_MS_DEFAULT 100 + +static int nsim_dev_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + int i; + + if ((params->overwrite_mask & ~nsim_dev->fw_update_overwrite_mask) != 0) + return -EOPNOTSUPP; + + if (nsim_dev->fw_update_status) { + devlink_flash_update_status_notify(devlink, + "Preparing to flash", + params->component, 0, 0); + } + + for (i = 0; i < NSIM_DEV_FLASH_SIZE / NSIM_DEV_FLASH_CHUNK_SIZE; i++) { + if (nsim_dev->fw_update_status) + devlink_flash_update_status_notify(devlink, "Flashing", + params->component, + i * NSIM_DEV_FLASH_CHUNK_SIZE, + NSIM_DEV_FLASH_SIZE); + msleep(nsim_dev->fw_update_flash_chunk_time_ms ?: 1); + } + + if (nsim_dev->fw_update_status) { + devlink_flash_update_status_notify(devlink, "Flashing", + params->component, + NSIM_DEV_FLASH_SIZE, + NSIM_DEV_FLASH_SIZE); + devlink_flash_update_timeout_notify(devlink, "Flash select", + params->component, 81); + devlink_flash_update_status_notify(devlink, "Flashing done", + params->component, 0, 0); + } + + return 0; +} + +static struct nsim_trap_item * +nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id) +{ + struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data; + int i; + + for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) { + if (nsim_traps_arr[i].id == trap_id) + return &nsim_trap_data->trap_items_arr[i]; + } + + return NULL; +} + +static int nsim_dev_devlink_trap_init(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_item *nsim_trap_item; + + nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id); + if (WARN_ON(!nsim_trap_item)) + return -ENOENT; + + nsim_trap_item->trap_ctx = trap_ctx; + nsim_trap_item->action = trap->init_action; + + return 0; +} + +static int +nsim_dev_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_item *nsim_trap_item; + + nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id); + if (WARN_ON(!nsim_trap_item)) + return -ENOENT; + + spin_lock(&nsim_dev->trap_data->trap_lock); + nsim_trap_item->action = action; + spin_unlock(&nsim_dev->trap_data->trap_lock); + + return 0; +} + +static int +nsim_dev_devlink_trap_group_set(struct devlink *devlink, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + if (nsim_dev->fail_trap_group_set) + return -EINVAL; + + return 0; +} + +static int +nsim_dev_devlink_trap_policer_set(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + if (nsim_dev->fail_trap_policer_set) { + NL_SET_ERR_MSG_MOD(extack, "User setup the operation to fail for testing purposes"); + return -EINVAL; + } + + return 0; +} + +static int +nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 *p_drops) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + u64 *cnt; + + if (nsim_dev->fail_trap_policer_counter_get) + return -EINVAL; + + cnt = &nsim_dev->trap_data->trap_policers_cnt_arr[policer->id - 1]; + *p_drops = (*cnt)++; + + return 0; +} + +#define NSIM_LINK_SPEED_MAX 5000 /* Mbps */ +#define NSIM_LINK_SPEED_UNIT 125000 /* 1 Mbps given in bytes/sec to avoid + * u64 overflow during conversion from + * bytes to bits. + */ + +static int nsim_rate_bytes_to_units(char *name, u64 *rate, struct netlink_ext_ack *extack) +{ + u64 val; + u32 rem; + + val = div_u64_rem(*rate, NSIM_LINK_SPEED_UNIT, &rem); + if (rem) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps."); + return -EINVAL; + } + + if (val > NSIM_LINK_SPEED_MAX) { + pr_err("%s rate value %lluMbps exceed link maximum speed 5000Mbps.\n", + name, val); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed 5000Mbps."); + return -EINVAL; + } + *rate = val; + return 0; +} + +static int nsim_leaf_tc_bw_set(struct devlink_rate *devlink_rate, + void *priv, u32 *tc_bw, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) + nsim_dev_port->tc_bw[i] = tc_bw[i]; + + return 0; +} + +static int nsim_leaf_tx_share_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev; + int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index); + int err; + + err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack); + if (err) + return err; + + nsim_dev->vfconfigs[vf_id].min_tx_rate = tx_share; + return 0; +} + +static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev; + int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index); + int err; + + err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack); + if (err) + return err; + + nsim_dev->vfconfigs[vf_id].max_tx_rate = tx_max; + return 0; +} + +struct nsim_rate_node { + struct dentry *ddir; + struct dentry *rate_parent; + char *parent_name; + u16 tx_share; + u16 tx_max; + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; +}; + +static int nsim_node_tc_bw_set(struct devlink_rate *devlink_rate, void *priv, + u32 *tc_bw, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) + nsim_node->tc_bw[i] = tc_bw[i]; + + return 0; +} + +static int nsim_node_tx_share_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int err; + + err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack); + if (err) + return err; + + nsim_node->tx_share = tx_share; + return 0; +} + +static int nsim_node_tx_max_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int err; + + err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack); + if (err) + return err; + + nsim_node->tx_max = tx_max; + return 0; +} + +static int nsim_rate_node_new(struct devlink_rate *node, void **priv, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(node->devlink); + struct nsim_rate_node *nsim_node; + + if (!nsim_esw_mode_is_switchdev(nsim_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Node creation allowed only in switchdev mode."); + return -EOPNOTSUPP; + } + + nsim_node = kzalloc(sizeof(*nsim_node), GFP_KERNEL); + if (!nsim_node) + return -ENOMEM; + + nsim_node->ddir = debugfs_create_dir(node->name, nsim_dev->nodes_ddir); + + debugfs_create_u16("tx_share", 0400, nsim_node->ddir, &nsim_node->tx_share); + debugfs_create_u16("tx_max", 0400, nsim_node->ddir, &nsim_node->tx_max); + nsim_node->rate_parent = debugfs_create_file("rate_parent", 0400, + nsim_node->ddir, + &nsim_node->parent_name, + &nsim_dev_rate_parent_fops); + + nsim_dev_tc_bw_debugfs_init(nsim_node->ddir, nsim_node->tc_bw); + + *priv = nsim_node; + return 0; +} + +static int nsim_rate_node_del(struct devlink_rate *node, void *priv, + struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + + debugfs_remove(nsim_node->rate_parent); + debugfs_remove_recursive(nsim_node->ddir); + kfree(nsim_node); + return 0; +} + +static int nsim_rate_leaf_parent_set(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv_child; + + if (parent) + nsim_dev_port->parent_name = parent->name; + else + nsim_dev_port->parent_name = NULL; + return 0; +} + +static int nsim_rate_node_parent_set(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv_child; + + if (parent) + nsim_node->parent_name = parent->name; + else + nsim_node->parent_name = NULL; + return 0; +} + +static int +nsim_dev_devlink_trap_drop_counter_get(struct devlink *devlink, + const struct devlink_trap *trap, + u64 *p_drops) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + u64 *cnt; + + if (nsim_dev->fail_trap_drop_counter_get) + return -EINVAL; + + cnt = &nsim_dev->trap_data->trap_pkt_cnt; + *p_drops = (*cnt)++; + + return 0; +} + +static const struct devlink_ops nsim_dev_devlink_ops = { + .eswitch_mode_set = nsim_devlink_eswitch_mode_set, + .eswitch_mode_get = nsim_devlink_eswitch_mode_get, + .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = nsim_dev_reload_down, + .reload_up = nsim_dev_reload_up, + .info_get = nsim_dev_info_get, + .flash_update = nsim_dev_flash_update, + .trap_init = nsim_dev_devlink_trap_init, + .trap_action_set = nsim_dev_devlink_trap_action_set, + .trap_group_set = nsim_dev_devlink_trap_group_set, + .trap_policer_set = nsim_dev_devlink_trap_policer_set, + .trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get, + .rate_leaf_tx_share_set = nsim_leaf_tx_share_set, + .rate_leaf_tx_max_set = nsim_leaf_tx_max_set, + .rate_leaf_tc_bw_set = nsim_leaf_tc_bw_set, + .rate_node_tx_share_set = nsim_node_tx_share_set, + .rate_node_tx_max_set = nsim_node_tx_max_set, + .rate_node_tc_bw_set = nsim_node_tc_bw_set, + .rate_node_new = nsim_rate_node_new, + .rate_node_del = nsim_rate_node_del, + .rate_leaf_parent_set = nsim_rate_leaf_parent_set, + .rate_node_parent_set = nsim_rate_node_parent_set, + .trap_drop_counter_get = nsim_dev_devlink_trap_drop_counter_get, +}; + +#define NSIM_DEV_MAX_MACS_DEFAULT 32 +#define NSIM_DEV_TEST1_DEFAULT true + +static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, + unsigned int port_index, u8 perm_addr[ETH_ALEN]) +{ + struct devlink_port_attrs attrs = {}; + struct nsim_dev_port *nsim_dev_port; + struct devlink_port *devlink_port; + int err; + + if (type == NSIM_DEV_PORT_TYPE_VF && !nsim_dev_get_vfs(nsim_dev)) + return -EINVAL; + + nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); + if (!nsim_dev_port) + return -ENOMEM; + nsim_dev_port->port_index = nsim_dev_port_index(type, port_index); + nsim_dev_port->port_type = type; + + devlink_port = &nsim_dev_port->devlink_port; + if (nsim_dev_port_is_pf(nsim_dev_port)) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = port_index + 1; + } else { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; + attrs.pci_vf.pf = 0; + attrs.pci_vf.vf = port_index; + } + memcpy(attrs.switch_id.id, nsim_dev->switch_id.id, nsim_dev->switch_id.id_len); + attrs.switch_id.id_len = nsim_dev->switch_id.id_len; + devlink_port_attrs_set(devlink_port, &attrs); + err = devl_port_register(priv_to_devlink(nsim_dev), devlink_port, + nsim_dev_port->port_index); + if (err) + goto err_port_free; + + err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); + if (err) + goto err_dl_port_unregister; + + nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port, perm_addr); + if (IS_ERR(nsim_dev_port->ns)) { + err = PTR_ERR(nsim_dev_port->ns); + goto err_port_debugfs_exit; + } + + if (nsim_dev_port_is_vf(nsim_dev_port)) { + err = devl_rate_leaf_create(&nsim_dev_port->devlink_port, + nsim_dev_port, NULL); + if (err) + goto err_nsim_destroy; + } + + list_add(&nsim_dev_port->list, &nsim_dev->port_list); + + return 0; + +err_nsim_destroy: + nsim_destroy(nsim_dev_port->ns); +err_port_debugfs_exit: + nsim_dev_port_debugfs_exit(nsim_dev_port); +err_dl_port_unregister: + devl_port_unregister(devlink_port); +err_port_free: + kfree(nsim_dev_port); + return err; +} + +static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) +{ + struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; + + list_del(&nsim_dev_port->list); + if (nsim_dev_port_is_vf(nsim_dev_port)) + devl_rate_leaf_destroy(&nsim_dev_port->devlink_port); + nsim_destroy(nsim_dev_port->ns); + nsim_dev_port_debugfs_exit(nsim_dev_port); + devl_port_unregister(devlink_port); + kfree(nsim_dev_port); +} + +static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_port *nsim_dev_port, *tmp; + + list_for_each_entry_safe(nsim_dev_port, tmp, + &nsim_dev->port_list, list) + __nsim_dev_port_del(nsim_dev_port); +} + +static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, + unsigned int port_count) +{ + int i, err; + + for (i = 0; i < port_count; i++) { + err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_PF, i, NULL); + if (err) + goto err_port_del_all; + } + return 0; + +err_port_del_all: + nsim_dev_port_del_all(nsim_dev); + return err; +} + +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack) +{ + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + struct devlink *devlink; + int err; + + devlink = priv_to_devlink(nsim_dev); + nsim_dev = devlink_priv(devlink); + INIT_LIST_HEAD(&nsim_dev->port_list); + nsim_dev->fw_update_status = true; + nsim_dev->fw_update_overwrite_mask = 0; + + nsim_devlink_param_load_driverinit_values(devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + return err; + + err = nsim_dev_traps_init(devlink); + if (err) + goto err_dummy_region_exit; + + nsim_dev->fib_data = nsim_fib_create(devlink, extack); + if (IS_ERR(nsim_dev->fib_data)) { + err = PTR_ERR(nsim_dev->fib_data); + goto err_traps_exit; + } + + err = nsim_dev_health_init(nsim_dev, devlink); + if (err) + goto err_fib_destroy; + + err = nsim_dev_psample_init(nsim_dev); + if (err) + goto err_health_exit; + + err = nsim_dev_hwstats_init(nsim_dev); + if (err) + goto err_psample_exit; + + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_hwstats_exit; + + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", + 0200, + nsim_dev->ddir, + nsim_dev, + &nsim_dev_take_snapshot_fops); + return 0; + +err_hwstats_exit: + nsim_dev_hwstats_exit(nsim_dev); +err_psample_exit: + nsim_dev_psample_exit(nsim_dev); +err_health_exit: + nsim_dev_health_exit(nsim_dev); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); +err_traps_exit: + nsim_dev_traps_exit(devlink); +err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); + return err; +} + +int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) +{ + struct nsim_dev *nsim_dev; + struct devlink *devlink; + int err; + + devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev), + nsim_bus_dev->initial_net, &nsim_bus_dev->dev); + if (!devlink) + return -ENOMEM; + devl_lock(devlink); + nsim_dev = devlink_priv(devlink); + nsim_dev->nsim_bus_dev = nsim_bus_dev; + nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); + get_random_bytes(nsim_dev->switch_id.id, nsim_dev->switch_id.id_len); + INIT_LIST_HEAD(&nsim_dev->port_list); + nsim_dev->fw_update_status = true; + nsim_dev->fw_update_overwrite_mask = 0; + nsim_dev->fw_update_flash_chunk_time_ms = NSIM_DEV_FLASH_CHUNK_TIME_MS_DEFAULT; + nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; + nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT; + nsim_dev->test2 = NSIM_DEV_TEST2_DEFAULT; + spin_lock_init(&nsim_dev->fa_cookie_lock); + + dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); + + nsim_dev->vfconfigs = kcalloc(nsim_bus_dev->max_vfs, + sizeof(struct nsim_vf_config), + GFP_KERNEL | __GFP_NOWARN); + if (!nsim_dev->vfconfigs) { + err = -ENOMEM; + goto err_devlink_unlock; + } + + err = devl_register(devlink); + if (err) + goto err_vfc_free; + + err = nsim_dev_resources_register(devlink); + if (err) + goto err_dl_unregister; + + err = devl_params_register(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); + if (err) + goto err_resource_unregister; + nsim_devlink_set_params_init_values(nsim_dev, devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + goto err_params_unregister; + + err = nsim_dev_traps_init(devlink); + if (err) + goto err_dummy_region_exit; + + err = nsim_dev_debugfs_init(nsim_dev); + if (err) + goto err_traps_exit; + + nsim_dev->fib_data = nsim_fib_create(devlink, NULL); + if (IS_ERR(nsim_dev->fib_data)) { + err = PTR_ERR(nsim_dev->fib_data); + goto err_debugfs_exit; + } + + err = nsim_dev_health_init(nsim_dev, devlink); + if (err) + goto err_fib_destroy; + + err = nsim_bpf_dev_init(nsim_dev); + if (err) + goto err_health_exit; + + err = nsim_dev_psample_init(nsim_dev); + if (err) + goto err_bpf_dev_exit; + + err = nsim_dev_hwstats_init(nsim_dev); + if (err) + goto err_psample_exit; + + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_hwstats_exit; + + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; + devl_unlock(devlink); + return 0; + +err_hwstats_exit: + nsim_dev_hwstats_exit(nsim_dev); +err_psample_exit: + nsim_dev_psample_exit(nsim_dev); +err_bpf_dev_exit: + nsim_bpf_dev_exit(nsim_dev); +err_health_exit: + nsim_dev_health_exit(nsim_dev); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); +err_debugfs_exit: + nsim_dev_debugfs_exit(nsim_dev); +err_traps_exit: + nsim_dev_traps_exit(devlink); +err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); +err_params_unregister: + devl_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); +err_resource_unregister: + devl_resources_unregister(devlink); +err_dl_unregister: + devl_unregister(devlink); +err_vfc_free: + kfree(nsim_dev->vfconfigs); +err_devlink_unlock: + devl_unlock(devlink); + devlink_free(devlink); + dev_set_drvdata(&nsim_bus_dev->dev, NULL); + return err; +} + +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) +{ + struct devlink *devlink = priv_to_devlink(nsim_dev); + + if (devlink_is_reload_failed(devlink)) + return; + debugfs_remove(nsim_dev->take_snapshot); + + if (nsim_dev_get_vfs(nsim_dev)) { + nsim_bus_dev_set_vfs(nsim_dev->nsim_bus_dev, 0); + if (nsim_esw_mode_is_switchdev(nsim_dev)) + nsim_esw_legacy_enable(nsim_dev, NULL); + } + + nsim_dev_port_del_all(nsim_dev); + nsim_dev_hwstats_exit(nsim_dev); + nsim_dev_psample_exit(nsim_dev); + nsim_dev_health_exit(nsim_dev); + nsim_fib_destroy(devlink, nsim_dev->fib_data); + nsim_dev_traps_exit(devlink); + nsim_dev_dummy_region_exit(nsim_dev); +} + +void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) +{ + struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + struct devlink *devlink = priv_to_devlink(nsim_dev); + + devl_lock(devlink); + nsim_dev_reload_destroy(nsim_dev); + + nsim_bpf_dev_exit(nsim_dev); + nsim_dev_debugfs_exit(nsim_dev); + devl_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); + devl_resources_unregister(devlink); + devl_unregister(devlink); + kfree(nsim_dev->vfconfigs); + kfree(nsim_dev->fa_cookie); + devl_unlock(devlink); + devlink_free(devlink); + dev_set_drvdata(&nsim_bus_dev->dev, NULL); +} + +static struct nsim_dev_port * +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, + unsigned int port_index) +{ + struct nsim_dev_port *nsim_dev_port; + + port_index = nsim_dev_port_index(type, port_index); + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) + if (nsim_dev_port->port_index == port_index) + return nsim_dev_port; + return NULL; +} + +int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, + unsigned int port_index, u8 perm_addr[ETH_ALEN]) +{ + struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + int err; + + devl_lock(priv_to_devlink(nsim_dev)); + if (__nsim_dev_port_lookup(nsim_dev, type, port_index)) + err = -EEXIST; + else + err = __nsim_dev_port_add(nsim_dev, type, port_index, perm_addr); + devl_unlock(priv_to_devlink(nsim_dev)); + return err; +} + +int nsim_drv_port_del(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, + unsigned int port_index) +{ + struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + struct nsim_dev_port *nsim_dev_port; + int err = 0; + + devl_lock(priv_to_devlink(nsim_dev)); + nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, type, port_index); + if (!nsim_dev_port) + err = -ENOENT; + else + __nsim_dev_port_del(nsim_dev_port); + devl_unlock(priv_to_devlink(nsim_dev)); + return err; +} + +int nsim_drv_configure_vfs(struct nsim_bus_dev *nsim_bus_dev, + unsigned int num_vfs) +{ + struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + struct devlink *devlink = priv_to_devlink(nsim_dev); + int ret = 0; + + devl_lock(devlink); + if (nsim_bus_dev->num_vfs == num_vfs) + goto exit_unlock; + if (nsim_bus_dev->num_vfs && num_vfs) { + ret = -EBUSY; + goto exit_unlock; + } + if (nsim_bus_dev->max_vfs < num_vfs) { + ret = -ENOMEM; + goto exit_unlock; + } + + nsim_bus_dev_set_vfs(nsim_bus_dev, num_vfs); + if (nsim_esw_mode_is_switchdev(nsim_dev)) { + if (num_vfs) { + ret = nsim_esw_switchdev_enable(nsim_dev, NULL); + if (ret) { + nsim_bus_dev_set_vfs(nsim_bus_dev, 0); + goto exit_unlock; + } + } else { + nsim_esw_legacy_enable(nsim_dev, NULL); + } + } + +exit_unlock: + devl_unlock(devlink); + + return ret; +} + +int nsim_dev_init(void) +{ + nsim_dev_ddir = debugfs_create_dir(DRV_NAME, NULL); + return PTR_ERR_OR_ZERO(nsim_dev_ddir); +} + +void nsim_dev_exit(void) +{ + debugfs_remove_recursive(nsim_dev_ddir); +} diff --git a/drivers/net/netdevsim/devlink.c b/drivers/net/netdevsim/devlink.c deleted file mode 100644 index 5135fc371f01..000000000000 --- a/drivers/net/netdevsim/devlink.c +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright (c) 2018 Cumulus Networks. All rights reserved. - * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com> - * - * This software is licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree. - * - * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" - * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE - * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME - * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - */ - -#include <linux/device.h> -#include <net/devlink.h> -#include <net/netns/generic.h> - -#include "netdevsim.h" - -static unsigned int nsim_devlink_id; - -/* place holder until devlink and namespaces is sorted out */ -static struct net *nsim_devlink_net(struct devlink *devlink) -{ - return &init_net; -} - -/* IPv4 - */ -static u64 nsim_ipv4_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); -} - -static u64 nsim_ipv4_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); -} - -/* IPv6 - */ -static u64 nsim_ipv6_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); -} - -static u64 nsim_ipv6_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); -} - -static int devlink_resources_register(struct devlink *devlink) -{ - struct devlink_resource_size_params params = { - .size_max = (u64)-1, - .size_granularity = 1, - .unit = DEVLINK_RESOURCE_UNIT_ENTRY - }; - struct net *net = nsim_devlink_net(devlink); - int err; - u64 n; - - /* Resources for IPv4 */ - err = devlink_resource_register(devlink, "IPv4", (u64)-1, - NSIM_RESOURCE_IPV4, - DEVLINK_RESOURCE_ID_PARENT_TOP, - ¶ms); - if (err) { - pr_err("Failed to register IPv4 top resource\n"); - goto out; - } - - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); - err = devlink_resource_register(devlink, "fib", n, - NSIM_RESOURCE_IPV4_FIB, - NSIM_RESOURCE_IPV4, ¶ms); - if (err) { - pr_err("Failed to register IPv4 FIB resource\n"); - return err; - } - - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, - NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV4, ¶ms); - if (err) { - pr_err("Failed to register IPv4 FIB rules resource\n"); - return err; - } - - /* Resources for IPv6 */ - err = devlink_resource_register(devlink, "IPv6", (u64)-1, - NSIM_RESOURCE_IPV6, - DEVLINK_RESOURCE_ID_PARENT_TOP, - ¶ms); - if (err) { - pr_err("Failed to register IPv6 top resource\n"); - goto out; - } - - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); - err = devlink_resource_register(devlink, "fib", n, - NSIM_RESOURCE_IPV6_FIB, - NSIM_RESOURCE_IPV6, ¶ms); - if (err) { - pr_err("Failed to register IPv6 FIB resource\n"); - return err; - } - - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, - NSIM_RESOURCE_IPV6_FIB_RULES, - NSIM_RESOURCE_IPV6, ¶ms); - if (err) { - pr_err("Failed to register IPv6 FIB rules resource\n"); - return err; - } - - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB, - nsim_ipv4_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB_RULES, - nsim_ipv4_fib_rules_res_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB, - nsim_ipv6_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB_RULES, - nsim_ipv6_fib_rules_res_occ_get, - net); -out: - return err; -} - -static int nsim_devlink_reload(struct devlink *devlink, - struct netlink_ext_ack *extack) -{ - enum nsim_resource_id res_ids[] = { - NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES - }; - struct net *net = nsim_devlink_net(devlink); - int i; - - for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { - int err; - u64 val; - - err = devlink_resource_size_get(devlink, res_ids[i], &val); - if (!err) { - err = nsim_fib_set_max(net, res_ids[i], val, extack); - if (err) - return err; - } - } - - return 0; -} - -static void nsim_devlink_net_reset(struct net *net) -{ - enum nsim_resource_id res_ids[] = { - NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES - }; - int i; - - for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { - if (nsim_fib_set_max(net, res_ids[i], (u64)-1, NULL)) { - pr_err("Failed to reset limit for resource %u\n", - res_ids[i]); - } - } -} - -static const struct devlink_ops nsim_devlink_ops = { - .reload = nsim_devlink_reload, -}; - -/* once devlink / namespace issues are sorted out - * this needs to be net in which a devlink instance - * is to be created. e.g., dev_net(ns->netdev) - */ -static struct net *nsim_to_net(struct netdevsim *ns) -{ - return &init_net; -} - -void nsim_devlink_teardown(struct netdevsim *ns) -{ - if (ns->devlink) { - struct net *net = nsim_to_net(ns); - bool *reg_devlink = net_generic(net, nsim_devlink_id); - - devlink_resources_unregister(ns->devlink, NULL); - devlink_unregister(ns->devlink); - devlink_free(ns->devlink); - ns->devlink = NULL; - - nsim_devlink_net_reset(net); - *reg_devlink = true; - } -} - -int nsim_devlink_setup(struct netdevsim *ns) -{ - struct net *net = nsim_to_net(ns); - bool *reg_devlink = net_generic(net, nsim_devlink_id); - struct devlink *devlink; - int err; - - /* only one device per namespace controls devlink */ - if (!*reg_devlink) { - ns->devlink = NULL; - return 0; - } - - devlink = devlink_alloc(&nsim_devlink_ops, 0); - if (!devlink) - return -ENOMEM; - - err = devlink_register(devlink, &ns->dev); - if (err) - goto err_devlink_free; - - err = devlink_resources_register(devlink); - if (err) - goto err_dl_unregister; - - ns->devlink = devlink; - - *reg_devlink = false; - - return 0; - -err_dl_unregister: - devlink_unregister(devlink); -err_devlink_free: - devlink_free(devlink); - - return err; -} - -/* Initialize per network namespace state */ -static int __net_init nsim_devlink_netns_init(struct net *net) -{ - bool *reg_devlink = net_generic(net, nsim_devlink_id); - - *reg_devlink = true; - - return 0; -} - -static struct pernet_operations nsim_devlink_net_ops = { - .init = nsim_devlink_netns_init, - .id = &nsim_devlink_id, - .size = sizeof(bool), -}; - -void nsim_devlink_exit(void) -{ - unregister_pernet_subsys(&nsim_devlink_net_ops); - nsim_fib_exit(); -} - -int nsim_devlink_init(void) -{ - int err; - - err = nsim_fib_init(); - if (err) - goto err_out; - - err = register_pernet_subsys(&nsim_devlink_net_ops); - if (err) - nsim_fib_exit(); - -err_out: - return err; -} diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c new file mode 100644 index 000000000000..36a201533aae --- /dev/null +++ b/drivers/net/netdevsim/ethtool.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/debugfs.h> +#include <linux/random.h> +#include <net/netdev_queues.h> + +#include "netdevsim.h" + +static void +nsim_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (ns->ethtool.pauseparam.report_stats_rx) + pause_stats->rx_pause_frames = 1; + if (ns->ethtool.pauseparam.report_stats_tx) + pause_stats->tx_pause_frames = 2; +} + +static void +nsim_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) +{ + struct netdevsim *ns = netdev_priv(dev); + + pause->autoneg = 0; /* We don't support ksettings, so can't pretend */ + pause->rx_pause = ns->ethtool.pauseparam.rx; + pause->tx_pause = ns->ethtool.pauseparam.tx; +} + +static int +nsim_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (pause->autoneg) + return -EINVAL; + + ns->ethtool.pauseparam.rx = pause->rx_pause; + ns->ethtool.pauseparam.tx = pause->tx_pause; + return 0; +} + +static int nsim_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = netdev_priv(dev); + + memcpy(coal, &ns->ethtool.coalesce, sizeof(ns->ethtool.coalesce)); + return 0; +} + +static int nsim_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = netdev_priv(dev); + + memcpy(&ns->ethtool.coalesce, coal, sizeof(ns->ethtool.coalesce)); + return 0; +} + +static void nsim_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = netdev_priv(dev); + + memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); + kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; + + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; +} + +static int nsim_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = netdev_priv(dev); + + ns->ethtool.ring.rx_pending = ring->rx_pending; + ns->ethtool.ring.rx_jumbo_pending = ring->rx_jumbo_pending; + ns->ethtool.ring.rx_mini_pending = ring->rx_mini_pending; + ns->ethtool.ring.tx_pending = ring->tx_pending; + return 0; +} + +static void +nsim_get_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct netdevsim *ns = netdev_priv(dev); + + ch->max_combined = ns->nsim_bus_dev->num_queues; + ch->combined_count = ns->ethtool.channels; +} + +static void +nsim_wake_queues(struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *peer; + + synchronize_net(); + netif_tx_wake_all_queues(dev); + + rcu_read_lock(); + peer = rcu_dereference(ns->peer); + if (peer) + netif_tx_wake_all_queues(peer->netdev); + rcu_read_unlock(); +} + +static int +nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct netdevsim *ns = netdev_priv(dev); + int err; + + err = netif_set_real_num_queues(dev, ch->combined_count, + ch->combined_count); + if (err) + return err; + + ns->ethtool.channels = ch->combined_count; + + /* Only wake up queues if devices are linked */ + if (rcu_access_pointer(ns->peer)) + nsim_wake_queues(dev); + + return 0; +} + +static int +nsim_get_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (ns->ethtool.get_err) + return -ns->ethtool.get_err; + memcpy(fecparam, &ns->ethtool.fec, sizeof(ns->ethtool.fec)); + return 0; +} + +static int +nsim_set_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) +{ + struct netdevsim *ns = netdev_priv(dev); + u32 fec; + + if (ns->ethtool.set_err) + return -ns->ethtool.set_err; + memcpy(&ns->ethtool.fec, fecparam, sizeof(ns->ethtool.fec)); + fec = fecparam->fec; + if (fec == ETHTOOL_FEC_AUTO) + fec |= ETHTOOL_FEC_OFF; + fec |= ETHTOOL_FEC_NONE; + ns->ethtool.fec.active_fec = 1 << (fls(fec) - 1); + return 0; +} + +static const struct ethtool_fec_hist_range netdevsim_fec_ranges[] = { + { 0, 0}, + { 1, 3}, + { 4, 7}, + { 0, 0} +}; + +static void +nsim_get_fec_stats(struct net_device *dev, struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) +{ + struct ethtool_fec_hist_value *values = hist->values; + + hist->ranges = netdevsim_fec_ranges; + + fec_stats->corrected_blocks.total = 123; + fec_stats->uncorrectable_blocks.total = 4; + + values[0].per_lane[0] = 125; + values[0].per_lane[1] = 120; + values[0].per_lane[2] = 100; + values[0].per_lane[3] = 100; + values[1].sum = 12; + values[2].sum = 2; + values[2].per_lane[0] = 2; + values[2].per_lane[1] = 0; + values[2].per_lane[2] = 0; + values[2].per_lane[3] = 0; +} + +static int nsim_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *info) +{ + struct netdevsim *ns = netdev_priv(dev); + + info->phc_index = mock_phc_index(ns->phc); + + return 0; +} + +static const struct ethtool_ops nsim_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, + .get_pause_stats = nsim_get_pause_stats, + .get_pauseparam = nsim_get_pauseparam, + .set_pauseparam = nsim_set_pauseparam, + .set_coalesce = nsim_set_coalesce, + .get_coalesce = nsim_get_coalesce, + .get_ringparam = nsim_get_ringparam, + .set_ringparam = nsim_set_ringparam, + .get_channels = nsim_get_channels, + .set_channels = nsim_set_channels, + .get_fecparam = nsim_get_fecparam, + .set_fecparam = nsim_set_fecparam, + .get_fec_stats = nsim_get_fec_stats, + .get_ts_info = nsim_get_ts_info, +}; + +static void nsim_ethtool_ring_init(struct netdevsim *ns) +{ + ns->ethtool.ring.rx_pending = 512; + ns->ethtool.ring.rx_max_pending = 4096; + ns->ethtool.ring.rx_jumbo_max_pending = 4096; + ns->ethtool.ring.rx_mini_max_pending = 4096; + ns->ethtool.ring.tx_pending = 512; + ns->ethtool.ring.tx_max_pending = 4096; +} + +void nsim_ethtool_init(struct netdevsim *ns) +{ + struct dentry *ethtool, *dir; + + ns->netdev->ethtool_ops = &nsim_ethtool_ops; + + nsim_ethtool_ring_init(ns); + + ns->ethtool.pauseparam.report_stats_rx = true; + ns->ethtool.pauseparam.report_stats_tx = true; + + ns->ethtool.fec.fec = ETHTOOL_FEC_NONE; + ns->ethtool.fec.active_fec = ETHTOOL_FEC_NONE; + + ns->ethtool.channels = ns->nsim_bus_dev->num_queues; + + ethtool = debugfs_create_dir("ethtool", ns->nsim_dev_port->ddir); + + debugfs_create_u32("get_err", 0600, ethtool, &ns->ethtool.get_err); + debugfs_create_u32("set_err", 0600, ethtool, &ns->ethtool.set_err); + + dir = debugfs_create_dir("pause", ethtool); + debugfs_create_bool("report_stats_rx", 0600, dir, + &ns->ethtool.pauseparam.report_stats_rx); + debugfs_create_bool("report_stats_tx", 0600, dir, + &ns->ethtool.pauseparam.report_stats_tx); + + dir = debugfs_create_dir("ring", ethtool); + debugfs_create_u32("rx_max_pending", 0600, dir, + &ns->ethtool.ring.rx_max_pending); + debugfs_create_u32("rx_jumbo_max_pending", 0600, dir, + &ns->ethtool.ring.rx_jumbo_max_pending); + debugfs_create_u32("rx_mini_max_pending", 0600, dir, + &ns->ethtool.ring.rx_mini_max_pending); + debugfs_create_u32("tx_max_pending", 0600, dir, + &ns->ethtool.ring.tx_max_pending); +} diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index f61d094746c0..16c382c42227 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -14,17 +14,27 @@ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ +#include <linux/bitmap.h> +#include <linux/in6.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/rhashtable.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> #include <net/fib_notifier.h> +#include <net/inet_dscp.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/fib_rules.h> -#include <net/netns/generic.h> +#include <net/net_namespace.h> +#include <net/nexthop.h> +#include <linux/debugfs.h> #include "netdevsim.h" struct nsim_fib_entry { u64 max; - u64 num; + atomic64_t num; }; struct nsim_per_fib_data { @@ -33,15 +43,99 @@ struct nsim_per_fib_data { }; struct nsim_fib_data { + struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; + struct nsim_fib_entry nexthops; + struct rhashtable fib_rt_ht; + struct list_head fib_rt_list; + struct mutex fib_lock; /* Protects FIB HT and list */ + struct notifier_block nexthop_nb; + struct rhashtable nexthop_ht; + struct devlink *devlink; + struct work_struct fib_event_work; + struct work_struct fib_flush_work; + struct list_head fib_event_queue; + spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ + struct mutex nh_lock; /* Protects NH HT */ + struct dentry *ddir; + bool fail_route_offload; + bool fail_res_nexthop_group_replace; + bool fail_nexthop_bucket_replace; + bool fail_route_delete; }; -static unsigned int nsim_fib_net_id; +struct nsim_fib_rt_key { + unsigned char addr[sizeof(struct in6_addr)]; + unsigned char prefix_len; + int family; + u32 tb_id; +}; + +struct nsim_fib_rt { + struct nsim_fib_rt_key key; + struct rhash_head ht_node; + struct list_head list; /* Member of fib_rt_list */ +}; + +struct nsim_fib4_rt { + struct nsim_fib_rt common; + struct fib_info *fi; + dscp_t dscp; + u8 type; +}; + +struct nsim_fib6_rt { + struct nsim_fib_rt common; + struct list_head nh_list; + unsigned int nhs; +}; + +struct nsim_fib6_rt_nh { + struct list_head list; /* Member of nh_list */ + struct fib6_info *rt; +}; -u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) +struct nsim_fib6_event { + struct fib6_info **rt_arr; + unsigned int nrt6; +}; + +struct nsim_fib_event { + struct list_head list; /* node in fib queue */ + union { + struct fib_entry_notifier_info fen_info; + struct nsim_fib6_event fib6_event; + }; + struct nsim_fib_data *data; + unsigned long event; + int family; +}; + +static const struct rhashtable_params nsim_fib_rt_ht_params = { + .key_offset = offsetof(struct nsim_fib_rt, key), + .head_offset = offsetof(struct nsim_fib_rt, ht_node), + .key_len = sizeof(struct nsim_fib_rt_key), + .automatic_shrinking = true, +}; + +struct nsim_nexthop { + struct rhash_head ht_node; + u64 occ; + u32 id; + bool is_resilient; +}; + +static const struct rhashtable_params nsim_nexthop_ht_params = { + .key_offset = offsetof(struct nsim_nexthop, id), + .head_offset = offsetof(struct nsim_nexthop, ht_node), + .key_len = sizeof(u32), + .automatic_shrinking = true, +}; + +u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, bool max) { - struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; switch (res_id) { @@ -57,19 +151,20 @@ u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: return 0; } - return max ? entry->max : entry->num; + return max ? entry->max : atomic64_read(&entry->num); } -int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack) +static void nsim_fib_set_max(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, u64 val) { - struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; - int err = 0; switch (res_id) { case NSIM_RESOURCE_IPV4_FIB: @@ -84,21 +179,14 @@ int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: - return 0; - } - - /* not allowing a new max to be less than curren occupancy - * --> no means of evicting entries - */ - if (val < entry->num) { - NL_SET_ERR_MSG_MOD(extack, "New size is less than current occupancy"); - err = -EINVAL; - } else { - entry->max = val; + WARN_ON(1); + return; } - - return err; + entry->max = val; } static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, @@ -107,22 +195,20 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, int err = 0; if (add) { - if (entry->num < entry->max) { - entry->num++; - } else { + if (!atomic64_add_unless(&entry->num, 1, entry->max)) { err = -ENOSPC; NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib rule entries"); } } else { - entry->num--; + atomic64_dec_if_positive(&entry->num); } return err; } -static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add) +static int nsim_fib_rule_event(struct nsim_fib_data *data, + struct fib_notifier_info *info, bool add) { - struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -138,129 +224,1447 @@ static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add) return err; } -static int nsim_fib_account(struct nsim_fib_entry *entry, bool add, - struct netlink_ext_ack *extack) +static int nsim_fib_account(struct nsim_fib_entry *entry, bool add) { int err = 0; if (add) { - if (entry->num < entry->max) { - entry->num++; - } else { + if (!atomic64_add_unless(&entry->num, 1, entry->max)) err = -ENOSPC; - NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); - } } else { - entry->num--; + atomic64_dec_if_positive(&entry->num); } return err; } -static int nsim_fib_event(struct fib_notifier_info *info, bool add) +static void nsim_fib_rt_init(struct nsim_fib_data *data, + struct nsim_fib_rt *fib_rt, const void *addr, + size_t addr_len, unsigned int prefix_len, + int family, u32 tb_id) +{ + memcpy(fib_rt->key.addr, addr, addr_len); + fib_rt->key.prefix_len = prefix_len; + fib_rt->key.family = family; + fib_rt->key.tb_id = tb_id; + list_add(&fib_rt->list, &data->fib_rt_list); +} + +static void nsim_fib_rt_fini(struct nsim_fib_rt *fib_rt) +{ + list_del(&fib_rt->list); +} + +static struct nsim_fib_rt *nsim_fib_rt_lookup(struct rhashtable *fib_rt_ht, + const void *addr, size_t addr_len, + unsigned int prefix_len, + int family, u32 tb_id) +{ + struct nsim_fib_rt_key key; + + memset(&key, 0, sizeof(key)); + memcpy(key.addr, addr, addr_len); + key.prefix_len = prefix_len; + key.family = family; + key.tb_id = tb_id; + + return rhashtable_lookup_fast(fib_rt_ht, &key, nsim_fib_rt_ht_params); +} + +static struct nsim_fib4_rt * +nsim_fib4_rt_create(struct nsim_fib_data *data, + struct fib_entry_notifier_info *fen_info) +{ + struct nsim_fib4_rt *fib4_rt; + + fib4_rt = kzalloc(sizeof(*fib4_rt), GFP_KERNEL); + if (!fib4_rt) + return NULL; + + nsim_fib_rt_init(data, &fib4_rt->common, &fen_info->dst, sizeof(u32), + fen_info->dst_len, AF_INET, fen_info->tb_id); + + fib4_rt->fi = fen_info->fi; + fib_info_hold(fib4_rt->fi); + fib4_rt->dscp = fen_info->dscp; + fib4_rt->type = fen_info->type; + + return fib4_rt; +} + +static void nsim_fib4_rt_destroy(struct nsim_fib4_rt *fib4_rt) +{ + fib_info_put(fib4_rt->fi); + nsim_fib_rt_fini(&fib4_rt->common); + kfree(fib4_rt); +} + +static struct nsim_fib4_rt * +nsim_fib4_rt_lookup(struct rhashtable *fib_rt_ht, + const struct fib_entry_notifier_info *fen_info) +{ + struct nsim_fib_rt *fib_rt; + + fib_rt = nsim_fib_rt_lookup(fib_rt_ht, &fen_info->dst, sizeof(u32), + fen_info->dst_len, AF_INET, + fen_info->tb_id); + if (!fib_rt) + return NULL; + + return container_of(fib_rt, struct nsim_fib4_rt, common); +} + +static void +nsim_fib4_rt_offload_failed_flag_set(struct net *net, + struct fib_entry_notifier_info *fen_info) +{ + u32 *p_dst = (u32 *)&fen_info->dst; + struct fib_rt_info fri; + + fri.fi = fen_info->fi; + fri.tb_id = fen_info->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = fen_info->dst_len; + fri.dscp = fen_info->dscp; + fri.type = fen_info->type; + fri.offload = false; + fri.trap = false; + fri.offload_failed = true; + fib_alias_hw_flags_set(net, &fri); +} + +static void nsim_fib4_rt_hw_flags_set(struct net *net, + const struct nsim_fib4_rt *fib4_rt, + bool trap) +{ + u32 *p_dst = (u32 *) fib4_rt->common.key.addr; + int dst_len = fib4_rt->common.key.prefix_len; + struct fib_rt_info fri; + + fri.fi = fib4_rt->fi; + fri.tb_id = fib4_rt->common.key.tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.dscp = fib4_rt->dscp; + fri.type = fib4_rt->type; + fri.offload = false; + fri.trap = trap; + fri.offload_failed = false; + fib_alias_hw_flags_set(net, &fri); +} + +static int nsim_fib4_rt_add(struct nsim_fib_data *data, + struct nsim_fib4_rt *fib4_rt) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = rhashtable_insert_fast(&data->fib_rt_ht, + &fib4_rt->common.ht_node, + nsim_fib_rt_ht_params); + if (err) + goto err_fib_dismiss; + + /* Simulate hardware programming latency. */ + msleep(1); + nsim_fib4_rt_hw_flags_set(net, fib4_rt, true); + + return 0; + +err_fib_dismiss: + /* Drop the accounting that was increased from the notification + * context when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + nsim_fib_account(&data->ipv4.fib, false); + return err; +} + +static int nsim_fib4_rt_replace(struct nsim_fib_data *data, + struct nsim_fib4_rt *fib4_rt, + struct nsim_fib4_rt *fib4_rt_old) +{ + struct net *net = devlink_net(data->devlink); + int err; + + /* We are replacing a route, so need to remove the accounting which + * was increased when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + err = nsim_fib_account(&data->ipv4.fib, false); + if (err) + return err; + err = rhashtable_replace_fast(&data->fib_rt_ht, + &fib4_rt_old->common.ht_node, + &fib4_rt->common.ht_node, + nsim_fib_rt_ht_params); + if (err) + return err; + + msleep(1); + nsim_fib4_rt_hw_flags_set(net, fib4_rt, true); + + nsim_fib4_rt_hw_flags_set(net, fib4_rt_old, false); + nsim_fib4_rt_destroy(fib4_rt_old); + + return 0; +} + +static int nsim_fib4_rt_insert(struct nsim_fib_data *data, + struct fib_entry_notifier_info *fen_info) +{ + struct nsim_fib4_rt *fib4_rt, *fib4_rt_old; + int err; + + if (data->fail_route_offload) { + /* For testing purposes, user set debugfs fail_route_offload + * value to true. Simulate hardware programming latency and then + * fail. + */ + msleep(1); + return -EINVAL; + } + + fib4_rt = nsim_fib4_rt_create(data, fen_info); + if (!fib4_rt) + return -ENOMEM; + + fib4_rt_old = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info); + if (!fib4_rt_old) + err = nsim_fib4_rt_add(data, fib4_rt); + else + err = nsim_fib4_rt_replace(data, fib4_rt, fib4_rt_old); + + if (err) + nsim_fib4_rt_destroy(fib4_rt); + + return err; +} + +static void nsim_fib4_rt_remove(struct nsim_fib_data *data, + const struct fib_entry_notifier_info *fen_info) +{ + struct nsim_fib4_rt *fib4_rt; + + fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info); + if (!fib4_rt) + return; + + rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node, + nsim_fib_rt_ht_params); + nsim_fib4_rt_destroy(fib4_rt); +} + +static int nsim_fib4_event(struct nsim_fib_data *data, + struct fib_entry_notifier_info *fen_info, + unsigned long event) { - struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); - struct netlink_ext_ack *extack = info->extack; int err = 0; - switch (info->family) { + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + err = nsim_fib4_rt_insert(data, fen_info); + if (err) { + struct net *net = devlink_net(data->devlink); + + nsim_fib4_rt_offload_failed_flag_set(net, fen_info); + } + break; + case FIB_EVENT_ENTRY_DEL: + nsim_fib4_rt_remove(data, fen_info); + break; + default: + break; + } + + return err; +} + +static struct nsim_fib6_rt_nh * +nsim_fib6_rt_nh_find(const struct nsim_fib6_rt *fib6_rt, + const struct fib6_info *rt) +{ + struct nsim_fib6_rt_nh *fib6_rt_nh; + + list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) { + if (fib6_rt_nh->rt == rt) + return fib6_rt_nh; + } + + return NULL; +} + +static int nsim_fib6_rt_nh_add(struct nsim_fib6_rt *fib6_rt, + struct fib6_info *rt) +{ + struct nsim_fib6_rt_nh *fib6_rt_nh; + + fib6_rt_nh = kzalloc(sizeof(*fib6_rt_nh), GFP_KERNEL); + if (!fib6_rt_nh) + return -ENOMEM; + + fib6_info_hold(rt); + fib6_rt_nh->rt = rt; + list_add_tail(&fib6_rt_nh->list, &fib6_rt->nh_list); + fib6_rt->nhs++; + + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void nsim_rt6_release(struct fib6_info *rt) +{ + fib6_info_release(rt); +} +#else +static void nsim_rt6_release(struct fib6_info *rt) +{ +} +#endif + +static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt, + const struct fib6_info *rt) +{ + struct nsim_fib6_rt_nh *fib6_rt_nh; + + fib6_rt_nh = nsim_fib6_rt_nh_find(fib6_rt, rt); + if (!fib6_rt_nh) + return; + + fib6_rt->nhs--; + list_del(&fib6_rt_nh->list); + nsim_rt6_release(fib6_rt_nh->rt); + kfree(fib6_rt_nh); +} + +static struct nsim_fib6_rt * +nsim_fib6_rt_create(struct nsim_fib_data *data, + struct fib6_info **rt_arr, unsigned int nrt6) +{ + struct fib6_info *rt = rt_arr[0]; + struct nsim_fib6_rt *fib6_rt; + int i = 0; + int err; + + fib6_rt = kzalloc(sizeof(*fib6_rt), GFP_KERNEL); + if (!fib6_rt) + return ERR_PTR(-ENOMEM); + + nsim_fib_rt_init(data, &fib6_rt->common, &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), rt->fib6_dst.plen, AF_INET6, + rt->fib6_table->tb6_id); + + /* We consider a multipath IPv6 route as one entry, but it can be made + * up from several fib6_info structs (one for each nexthop), so we + * add them all to the same list under the entry. + */ + INIT_LIST_HEAD(&fib6_rt->nh_list); + + for (i = 0; i < nrt6; i++) { + err = nsim_fib6_rt_nh_add(fib6_rt, rt_arr[i]); + if (err) + goto err_fib6_rt_nh_del; + } + + return fib6_rt; + +err_fib6_rt_nh_del: + for (i--; i >= 0; i--) { + nsim_fib6_rt_nh_del(fib6_rt, rt_arr[i]); + } + nsim_fib_rt_fini(&fib6_rt->common); + kfree(fib6_rt); + return ERR_PTR(err); +} + +static void nsim_fib6_rt_destroy(struct nsim_fib6_rt *fib6_rt) +{ + struct nsim_fib6_rt_nh *iter, *tmp; + + list_for_each_entry_safe(iter, tmp, &fib6_rt->nh_list, list) + nsim_fib6_rt_nh_del(fib6_rt, iter->rt); + WARN_ON_ONCE(!list_empty(&fib6_rt->nh_list)); + nsim_fib_rt_fini(&fib6_rt->common); + kfree(fib6_rt); +} + +static struct nsim_fib6_rt * +nsim_fib6_rt_lookup(struct rhashtable *fib_rt_ht, const struct fib6_info *rt) +{ + struct nsim_fib_rt *fib_rt; + + fib_rt = nsim_fib_rt_lookup(fib_rt_ht, &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, AF_INET6, + rt->fib6_table->tb6_id); + if (!fib_rt) + return NULL; + + return container_of(fib_rt, struct nsim_fib6_rt, common); +} + +static int nsim_fib6_rt_append(struct nsim_fib_data *data, + struct nsim_fib6_event *fib6_event) +{ + struct fib6_info *rt = fib6_event->rt_arr[0]; + struct nsim_fib6_rt *fib6_rt; + int i, err; + + if (data->fail_route_offload) { + /* For testing purposes, user set debugfs fail_route_offload + * value to true. Simulate hardware programming latency and then + * fail. + */ + msleep(1); + return -EINVAL; + } + + fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); + if (!fib6_rt) + return -EINVAL; + + for (i = 0; i < fib6_event->nrt6; i++) { + err = nsim_fib6_rt_nh_add(fib6_rt, fib6_event->rt_arr[i]); + if (err) + goto err_fib6_rt_nh_del; + + WRITE_ONCE(fib6_event->rt_arr[i]->trap, true); + } + + return 0; + +err_fib6_rt_nh_del: + for (i--; i >= 0; i--) { + WRITE_ONCE(fib6_event->rt_arr[i]->trap, false); + nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]); + } + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data, + struct fib6_info **rt_arr, + unsigned int nrt6) + +{ + struct net *net = devlink_net(data->devlink); + int i; + + for (i = 0; i < nrt6; i++) + fib6_info_hw_flags_set(net, rt_arr[i], false, false, true); +} +#else +static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, + const struct nsim_fib6_rt *fib6_rt, + bool trap) +{ + struct net *net = devlink_net(data->devlink); + struct nsim_fib6_rt_nh *fib6_rt_nh; + + list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) + fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap, false); +} +#else +static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, + const struct nsim_fib6_rt *fib6_rt, + bool trap) +{ +} +#endif + +static int nsim_fib6_rt_add(struct nsim_fib_data *data, + struct nsim_fib6_rt *fib6_rt) +{ + int err; + + err = rhashtable_insert_fast(&data->fib_rt_ht, + &fib6_rt->common.ht_node, + nsim_fib_rt_ht_params); + + if (err) + goto err_fib_dismiss; + + msleep(1); + nsim_fib6_rt_hw_flags_set(data, fib6_rt, true); + + return 0; + +err_fib_dismiss: + /* Drop the accounting that was increased from the notification + * context when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + nsim_fib_account(&data->ipv6.fib, false); + return err; +} + +static int nsim_fib6_rt_replace(struct nsim_fib_data *data, + struct nsim_fib6_rt *fib6_rt, + struct nsim_fib6_rt *fib6_rt_old) +{ + int err; + + /* We are replacing a route, so need to remove the accounting which + * was increased when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + err = nsim_fib_account(&data->ipv6.fib, false); + if (err) + return err; + + err = rhashtable_replace_fast(&data->fib_rt_ht, + &fib6_rt_old->common.ht_node, + &fib6_rt->common.ht_node, + nsim_fib_rt_ht_params); + + if (err) + return err; + + msleep(1); + nsim_fib6_rt_hw_flags_set(data, fib6_rt, true); + + nsim_fib6_rt_hw_flags_set(data, fib6_rt_old, false); + nsim_fib6_rt_destroy(fib6_rt_old); + + return 0; +} + +static int nsim_fib6_rt_insert(struct nsim_fib_data *data, + struct nsim_fib6_event *fib6_event) +{ + struct fib6_info *rt = fib6_event->rt_arr[0]; + struct nsim_fib6_rt *fib6_rt, *fib6_rt_old; + int err; + + if (data->fail_route_offload) { + /* For testing purposes, user set debugfs fail_route_offload + * value to true. Simulate hardware programming latency and then + * fail. + */ + msleep(1); + return -EINVAL; + } + + fib6_rt = nsim_fib6_rt_create(data, fib6_event->rt_arr, + fib6_event->nrt6); + if (IS_ERR(fib6_rt)) + return PTR_ERR(fib6_rt); + + fib6_rt_old = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); + if (!fib6_rt_old) + err = nsim_fib6_rt_add(data, fib6_rt); + else + err = nsim_fib6_rt_replace(data, fib6_rt, fib6_rt_old); + + if (err) + nsim_fib6_rt_destroy(fib6_rt); + + return err; +} + +static void nsim_fib6_rt_remove(struct nsim_fib_data *data, + struct nsim_fib6_event *fib6_event) +{ + struct fib6_info *rt = fib6_event->rt_arr[0]; + struct nsim_fib6_rt *fib6_rt; + int i; + + /* Multipath routes are first added to the FIB trie and only then + * notified. If we vetoed the addition, we will get a delete + * notification for a route we do not have. Therefore, do not warn if + * route was not found. + */ + fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); + if (!fib6_rt) + return; + + /* If not all the nexthops are deleted, then only reduce the nexthop + * group. + */ + if (fib6_event->nrt6 != fib6_rt->nhs) { + for (i = 0; i < fib6_event->nrt6; i++) + nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]); + return; + } + + rhashtable_remove_fast(&data->fib_rt_ht, &fib6_rt->common.ht_node, + nsim_fib_rt_ht_params); + nsim_fib6_rt_destroy(fib6_rt); +} + +static int nsim_fib6_event_init(struct nsim_fib6_event *fib6_event, + struct fib6_entry_notifier_info *fen6_info) +{ + struct fib6_info *rt = fen6_info->rt; + struct fib6_info **rt_arr; + struct fib6_info *iter; + unsigned int nrt6; + int i = 0; + + nrt6 = fen6_info->nsiblings + 1; + + rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); + if (!rt_arr) + return -ENOMEM; + + fib6_event->rt_arr = rt_arr; + fib6_event->nrt6 = nrt6; + + rt_arr[0] = rt; + fib6_info_hold(rt); + + if (!fen6_info->nsiblings) + return 0; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (i == fen6_info->nsiblings) + break; + + rt_arr[i + 1] = iter; + fib6_info_hold(iter); + i++; + } + WARN_ON_ONCE(i != fen6_info->nsiblings); + + return 0; +} + +static void nsim_fib6_event_fini(struct nsim_fib6_event *fib6_event) +{ + int i; + + for (i = 0; i < fib6_event->nrt6; i++) + nsim_rt6_release(fib6_event->rt_arr[i]); + kfree(fib6_event->rt_arr); +} + +static int nsim_fib6_event(struct nsim_fib_data *data, + struct nsim_fib6_event *fib6_event, + unsigned long event) +{ + int err; + + if (fib6_event->rt_arr[0]->fib6_src.plen) + return 0; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + err = nsim_fib6_rt_insert(data, fib6_event); + if (err) + goto err_rt_offload_failed_flag_set; + break; + case FIB_EVENT_ENTRY_APPEND: + err = nsim_fib6_rt_append(data, fib6_event); + if (err) + goto err_rt_offload_failed_flag_set; + break; + case FIB_EVENT_ENTRY_DEL: + nsim_fib6_rt_remove(data, fib6_event); + break; + default: + break; + } + + return 0; + +err_rt_offload_failed_flag_set: + nsim_fib6_rt_offload_failed_flag_set(data, fib6_event->rt_arr, + fib6_event->nrt6); + return err; +} + +static void nsim_fib_event(struct nsim_fib_event *fib_event) +{ + switch (fib_event->family) { case AF_INET: - err = nsim_fib_account(&data->ipv4.fib, add, extack); + nsim_fib4_event(fib_event->data, &fib_event->fen_info, + fib_event->event); + fib_info_put(fib_event->fen_info.fi); break; case AF_INET6: - err = nsim_fib_account(&data->ipv6.fib, add, extack); + nsim_fib6_event(fib_event->data, &fib_event->fib6_event, + fib_event->event); + nsim_fib6_event_fini(&fib_event->fib6_event); + break; + } +} + +static int nsim_fib4_prepare_event(struct fib_notifier_info *info, + struct nsim_fib_event *fib_event, + unsigned long event) +{ + struct nsim_fib_data *data = fib_event->data; + struct fib_entry_notifier_info *fen_info; + struct netlink_ext_ack *extack; + int err = 0; + + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fib_event->fen_info = *fen_info; + extack = info->extack; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + err = nsim_fib_account(&data->ipv4.fib, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); + return err; + } + break; + case FIB_EVENT_ENTRY_DEL: + if (data->fail_route_delete) { + NL_SET_ERR_MSG_MOD(extack, "Failed to process route deletion"); + return -EINVAL; + } + nsim_fib_account(&data->ipv4.fib, false); + break; + } + + /* Take reference on fib_info to prevent it from being + * freed while event is queued. Release it afterwards. + */ + fib_info_hold(fib_event->fen_info.fi); + + return 0; +} + +static int nsim_fib6_prepare_event(struct fib_notifier_info *info, + struct nsim_fib_event *fib_event, + unsigned long event) +{ + struct nsim_fib_data *data = fib_event->data; + struct fib6_entry_notifier_info *fen6_info; + struct netlink_ext_ack *extack; + int err = 0; + + fen6_info = container_of(info, struct fib6_entry_notifier_info, + info); + + err = nsim_fib6_event_init(&fib_event->fib6_event, fen6_info); + if (err) + return err; + + extack = info->extack; + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + err = nsim_fib_account(&data->ipv6.fib, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); + goto err_fib6_event_fini; + } + break; + case FIB_EVENT_ENTRY_DEL: + if (data->fail_route_delete) { + err = -EINVAL; + NL_SET_ERR_MSG_MOD(extack, "Failed to process route deletion"); + goto err_fib6_event_fini; + } + nsim_fib_account(&data->ipv6.fib, false); break; } + return 0; + +err_fib6_event_fini: + nsim_fib6_event_fini(&fib_event->fib6_event); return err; } +static int nsim_fib_event_schedule_work(struct nsim_fib_data *data, + struct fib_notifier_info *info, + unsigned long event) +{ + struct nsim_fib_event *fib_event; + int err; + + if (info->family != AF_INET && info->family != AF_INET6) + /* netdevsim does not support 'RTNL_FAMILY_IP6MR' and + * 'RTNL_FAMILY_IPMR' and should ignore them. + */ + return NOTIFY_DONE; + + fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); + if (!fib_event) + goto err_fib_event_alloc; + + fib_event->data = data; + fib_event->event = event; + fib_event->family = info->family; + + switch (info->family) { + case AF_INET: + err = nsim_fib4_prepare_event(info, fib_event, event); + break; + case AF_INET6: + err = nsim_fib6_prepare_event(info, fib_event, event); + break; + } + + if (err) + goto err_fib_prepare_event; + + /* Enqueue the event and trigger the work */ + spin_lock_bh(&data->fib_event_queue_lock); + list_add_tail(&fib_event->list, &data->fib_event_queue); + spin_unlock_bh(&data->fib_event_queue_lock); + schedule_work(&data->fib_event_work); + + return NOTIFY_DONE; + +err_fib_prepare_event: + kfree(fib_event); +err_fib_event_alloc: + if (event == FIB_EVENT_ENTRY_DEL) + schedule_work(&data->fib_flush_work); + return NOTIFY_BAD; +} + static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + fib_nb); struct fib_notifier_info *info = ptr; - int err = 0; + int err; switch (event) { - case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_ADD: case FIB_EVENT_RULE_DEL: - err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD); + err = nsim_fib_rule_event(data, info, + event == FIB_EVENT_RULE_ADD); + return notifier_from_errno(err); + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_APPEND: + case FIB_EVENT_ENTRY_DEL: + return nsim_fib_event_schedule_work(data, info, event); + } + + return NOTIFY_DONE; +} + +static void nsim_fib4_rt_free(struct nsim_fib_rt *fib_rt, + struct nsim_fib_data *data) +{ + struct devlink *devlink = data->devlink; + struct nsim_fib4_rt *fib4_rt; + + fib4_rt = container_of(fib_rt, struct nsim_fib4_rt, common); + nsim_fib4_rt_hw_flags_set(devlink_net(devlink), fib4_rt, false); + nsim_fib_account(&data->ipv4.fib, false); + nsim_fib4_rt_destroy(fib4_rt); +} + +static void nsim_fib6_rt_free(struct nsim_fib_rt *fib_rt, + struct nsim_fib_data *data) +{ + struct nsim_fib6_rt *fib6_rt; + + fib6_rt = container_of(fib_rt, struct nsim_fib6_rt, common); + nsim_fib6_rt_hw_flags_set(data, fib6_rt, false); + nsim_fib_account(&data->ipv6.fib, false); + nsim_fib6_rt_destroy(fib6_rt); +} + +static void nsim_fib_rt_free(void *ptr, void *arg) +{ + struct nsim_fib_rt *fib_rt = ptr; + struct nsim_fib_data *data = arg; + + switch (fib_rt->key.family) { + case AF_INET: + nsim_fib4_rt_free(fib_rt, data); + break; + case AF_INET6: + nsim_fib6_rt_free(fib_rt, data); break; + default: + WARN_ON_ONCE(1); + } +} - case FIB_EVENT_ENTRY_ADD: /* fall through */ - case FIB_EVENT_ENTRY_DEL: - err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD); +/* inconsistent dump, trying again */ +static void nsim_fib_dump_inconsistent(struct notifier_block *nb) +{ + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + fib_nb); + struct nsim_fib_rt *fib_rt, *fib_rt_tmp; + + /* Flush the work to make sure there is no race with notifications. */ + flush_work(&data->fib_event_work); + + /* The notifier block is still not registered, so we do not need to + * take any locks here. + */ + list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) { + rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node, + nsim_fib_rt_ht_params); + nsim_fib_rt_free(fib_rt, data); + } + + atomic64_set(&data->ipv4.rules.num, 0ULL); + atomic64_set(&data->ipv6.rules.num, 0ULL); +} + +static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + u64 occ = 0; + int i; + + nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL); + if (!nexthop) + return ERR_PTR(-ENOMEM); + + nexthop->id = info->id; + + /* Determine the number of nexthop entries the new nexthop will + * occupy. + */ + + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + occ = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + for (i = 0; i < info->nh_grp->num_nh; i++) + occ += info->nh_grp->nh_entries[i].weight; break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + occ = info->nh_res_table->num_nh_buckets; + nexthop->is_resilient = true; + break; + default: + NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); + kfree(nexthop); + return ERR_PTR(-EOPNOTSUPP); } + nexthop->occ = occ; + return nexthop; +} + +static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop) +{ + kfree(nexthop); +} + +static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ, + bool add, struct netlink_ext_ack *extack) +{ + int i, err = 0; + + if (add) { + for (i = 0; i < occ; i++) + if (!atomic64_add_unless(&data->nexthops.num, 1, + data->nexthops.max)) { + err = -ENOSPC; + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops"); + goto err_num_decrease; + } + } else { + if (WARN_ON(occ > atomic64_read(&data->nexthops.num))) + return -EINVAL; + atomic64_sub(occ, &data->nexthops.num); + } + + return err; + +err_num_decrease: + atomic64_sub(i, &data->nexthops.num); + return err; + +} + +static void nsim_nexthop_hw_flags_set(struct net *net, + const struct nsim_nexthop *nexthop, + bool trap) +{ + int i; + + nexthop_set_hw_flags(net, nexthop->id, false, trap); + + if (!nexthop->is_resilient) + return; + + for (i = 0; i < nexthop->occ; i++) + nexthop_bucket_set_hw_flags(net, nexthop->id, i, false, trap); +} + +static int nsim_nexthop_add(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_insert_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to insert nexthop"); + goto err_nexthop_dismiss; + } + + nsim_nexthop_hw_flags_set(net, nexthop, true); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_replace(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct nsim_nexthop *nexthop_old, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_replace_fast(&data->nexthop_ht, + &nexthop_old->ht_node, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to replace nexthop"); + goto err_nexthop_dismiss; + } + + nsim_nexthop_hw_flags_set(net, nexthop, true); + nsim_nexthop_account(data, nexthop_old->occ, false, extack); + nsim_nexthop_destroy(nexthop_old); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_insert(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop, *nexthop_old; + int err; + + nexthop = nsim_nexthop_create(data, info); + if (IS_ERR(nexthop)) + return PTR_ERR(nexthop); + + nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop_old) + err = nsim_nexthop_add(data, nexthop, info->extack); + else + err = nsim_nexthop_replace(data, nexthop, nexthop_old, + info->extack); + + if (err) + nsim_nexthop_destroy(nexthop); + + return err; +} + +static void nsim_nexthop_remove(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + + nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop) + return; + + rhashtable_remove_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + nsim_nexthop_account(data, nexthop->occ, false, info->extack); + nsim_nexthop_destroy(nexthop); +} + +static int nsim_nexthop_res_table_pre_replace(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + if (data->fail_res_nexthop_group_replace) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace a resilient nexthop group"); + return -EINVAL; + } + + return 0; +} + +static int nsim_nexthop_bucket_replace(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + if (data->fail_nexthop_bucket_replace) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace nexthop bucket"); + return -EINVAL; + } + + nexthop_bucket_set_hw_flags(info->net, info->id, + info->nh_res_bucket->bucket_index, + false, true); + + return 0; +} + +static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + nexthop_nb); + struct nh_notifier_info *info = ptr; + int err = 0; + + mutex_lock(&data->nh_lock); + switch (event) { + case NEXTHOP_EVENT_REPLACE: + err = nsim_nexthop_insert(data, info); + break; + case NEXTHOP_EVENT_DEL: + nsim_nexthop_remove(data, info); + break; + case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE: + err = nsim_nexthop_res_table_pre_replace(data, info); + break; + case NEXTHOP_EVENT_BUCKET_REPLACE: + err = nsim_nexthop_bucket_replace(data, info); + break; + default: + break; + } + + mutex_unlock(&data->nh_lock); return notifier_from_errno(err); } -/* inconsistent dump, trying again */ -static void nsim_fib_dump_inconsistent(struct notifier_block *nb) +static void nsim_nexthop_free(void *ptr, void *arg) { - struct nsim_fib_data *data; + struct nsim_nexthop *nexthop = ptr; + struct nsim_fib_data *data = arg; struct net *net; - rcu_read_lock(); - for_each_net_rcu(net) { - data = net_generic(net, nsim_fib_net_id); + net = devlink_net(data->devlink); + nsim_nexthop_hw_flags_set(net, nexthop, false); + nsim_nexthop_account(data, nexthop->occ, false, NULL); + nsim_nexthop_destroy(nexthop); +} - data->ipv4.fib.num = 0ULL; - data->ipv4.rules.num = 0ULL; +static ssize_t nsim_nexthop_bucket_activity_write(struct file *file, + const char __user *user_buf, + size_t size, loff_t *ppos) +{ + struct nsim_fib_data *data = file->private_data; + struct net *net = devlink_net(data->devlink); + struct nsim_nexthop *nexthop; + unsigned long *activity; + loff_t pos = *ppos; + u16 bucket_index; + char buf[128]; + int err = 0; + u32 nhid; + + if (pos != 0) + return -EINVAL; + if (size > sizeof(buf) - 1) + return -EINVAL; + if (copy_from_user(buf, user_buf, size)) + return -EFAULT; + buf[size] = 0; + + if (sscanf(buf, "%u %hu", &nhid, &bucket_index) != 2) + return -EINVAL; - data->ipv6.fib.num = 0ULL; - data->ipv6.rules.num = 0ULL; + rtnl_lock(); + + nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &nhid, + nsim_nexthop_ht_params); + if (!nexthop || !nexthop->is_resilient || + bucket_index >= nexthop->occ) { + err = -EINVAL; + goto out; } - rcu_read_unlock(); + + activity = bitmap_zalloc(nexthop->occ, GFP_KERNEL); + if (!activity) { + err = -ENOMEM; + goto out; + } + + bitmap_set(activity, bucket_index, 1); + nexthop_res_grp_activity_update(net, nhid, nexthop->occ, activity); + bitmap_free(activity); + +out: + rtnl_unlock(); + + *ppos = size; + return err ?: size; } -static struct notifier_block nsim_fib_nb = { - .notifier_call = nsim_fib_event_nb, +static const struct file_operations nsim_nexthop_bucket_activity_fops = { + .open = simple_open, + .write = nsim_nexthop_bucket_activity_write, + .owner = THIS_MODULE, }; -/* Initialize per network namespace state */ -static int __net_init nsim_fib_netns_init(struct net *net) +static u64 nsim_fib_ipv4_resource_occ_get(void *priv) { - struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id); + struct nsim_fib_data *data = priv; - data->ipv4.fib.max = (u64)-1; - data->ipv4.rules.max = (u64)-1; + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB, false); +} - data->ipv6.fib.max = (u64)-1; - data->ipv6.rules.max = (u64)-1; +static u64 nsim_fib_ipv4_rules_res_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; - return 0; + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB_RULES, false); } -static struct pernet_operations nsim_fib_net_ops = { - .init = nsim_fib_netns_init, - .id = &nsim_fib_net_id, - .size = sizeof(struct nsim_fib_data), -}; +static u64 nsim_fib_ipv6_resource_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB, false); +} -void nsim_fib_exit(void) +static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) { - unregister_pernet_subsys(&nsim_fib_net_ops); - unregister_fib_notifier(&nsim_fib_nb); + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); +} + +static u64 nsim_fib_nexthops_res_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_NEXTHOPS, false); +} + +static void nsim_fib_set_max_all(struct nsim_fib_data *data, + struct devlink *devlink) +{ + static const enum nsim_resource_id res_ids[] = { + NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, + NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(res_ids); i++) { + int err; + u64 val; + + err = devl_resource_size_get(devlink, res_ids[i], &val); + if (err) + val = (u64) -1; + nsim_fib_set_max(data, res_ids[i], val); + } } -int nsim_fib_init(void) +static void nsim_fib_event_work(struct work_struct *work) { + struct nsim_fib_data *data = container_of(work, struct nsim_fib_data, + fib_event_work); + struct nsim_fib_event *fib_event, *next_fib_event; + + LIST_HEAD(fib_event_queue); + + spin_lock_bh(&data->fib_event_queue_lock); + list_splice_init(&data->fib_event_queue, &fib_event_queue); + spin_unlock_bh(&data->fib_event_queue_lock); + + mutex_lock(&data->fib_lock); + list_for_each_entry_safe(fib_event, next_fib_event, &fib_event_queue, + list) { + nsim_fib_event(fib_event); + list_del(&fib_event->list); + kfree(fib_event); + cond_resched(); + } + mutex_unlock(&data->fib_lock); +} + +static void nsim_fib_flush_work(struct work_struct *work) +{ + struct nsim_fib_data *data = container_of(work, struct nsim_fib_data, + fib_flush_work); + struct nsim_fib_rt *fib_rt, *fib_rt_tmp; + + /* Process pending work. */ + flush_work(&data->fib_event_work); + + mutex_lock(&data->fib_lock); + list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) { + rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node, + nsim_fib_rt_ht_params); + nsim_fib_rt_free(fib_rt, data); + } + mutex_unlock(&data->fib_lock); +} + +static int +nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev) +{ + data->ddir = debugfs_create_dir("fib", nsim_dev->ddir); + if (IS_ERR(data->ddir)) + return PTR_ERR(data->ddir); + + data->fail_route_offload = false; + debugfs_create_bool("fail_route_offload", 0600, data->ddir, + &data->fail_route_offload); + + data->fail_res_nexthop_group_replace = false; + debugfs_create_bool("fail_res_nexthop_group_replace", 0600, data->ddir, + &data->fail_res_nexthop_group_replace); + + data->fail_nexthop_bucket_replace = false; + debugfs_create_bool("fail_nexthop_bucket_replace", 0600, data->ddir, + &data->fail_nexthop_bucket_replace); + + debugfs_create_file("nexthop_bucket_activity", 0200, data->ddir, + data, &nsim_nexthop_bucket_activity_fops); + + data->fail_route_delete = false; + debugfs_create_bool("fail_route_delete", 0600, data->ddir, + &data->fail_route_delete); + return 0; +} + +static void nsim_fib_debugfs_exit(struct nsim_fib_data *data) +{ + debugfs_remove_recursive(data->ddir); +} + +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct nsim_fib_data *data; + struct nsim_dev *nsim_dev; int err; - err = register_pernet_subsys(&nsim_fib_net_ops); - if (err < 0) { - pr_err("Failed to register pernet subsystem\n"); - goto err_out; + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + data->devlink = devlink; + + nsim_dev = devlink_priv(devlink); + err = nsim_fib_debugfs_init(data, nsim_dev); + if (err) + goto err_data_free; + + mutex_init(&data->nh_lock); + err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params); + if (err) + goto err_debugfs_exit; + + mutex_init(&data->fib_lock); + INIT_LIST_HEAD(&data->fib_rt_list); + err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params); + if (err) + goto err_rhashtable_nexthop_destroy; + + INIT_WORK(&data->fib_event_work, nsim_fib_event_work); + INIT_WORK(&data->fib_flush_work, nsim_fib_flush_work); + INIT_LIST_HEAD(&data->fib_event_queue); + spin_lock_init(&data->fib_event_queue_lock); + + nsim_fib_set_max_all(data, devlink); + + data->nexthop_nb.notifier_call = nsim_nexthop_event_nb; + err = register_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb, + extack); + if (err) { + pr_err("Failed to register nexthop notifier\n"); + goto err_rhashtable_fib_destroy; } - err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent); - if (err < 0) { + data->fib_nb.notifier_call = nsim_fib_event_nb; + err = register_fib_notifier(devlink_net(devlink), &data->fib_nb, + nsim_fib_dump_inconsistent, extack); + if (err) { pr_err("Failed to register fib notifier\n"); - goto err_out; + goto err_nexthop_nb_unregister; } -err_out: - return err; + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB, + nsim_fib_ipv4_resource_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES, + nsim_fib_ipv4_rules_res_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB, + nsim_fib_ipv6_resource_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES, + nsim_fib_ipv6_rules_res_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_NEXTHOPS, + nsim_fib_nexthops_res_occ_get, + data); + return data; + +err_nexthop_nb_unregister: + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); +err_rhashtable_fib_destroy: + cancel_work_sync(&data->fib_flush_work); + flush_work(&data->fib_event_work); + rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, + data); +err_rhashtable_nexthop_destroy: + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); + mutex_destroy(&data->fib_lock); +err_debugfs_exit: + mutex_destroy(&data->nh_lock); + nsim_fib_debugfs_exit(data); +err_data_free: + kfree(data); + return ERR_PTR(err); +} + +void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) +{ + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_NEXTHOPS); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB); + unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); + cancel_work_sync(&data->fib_flush_work); + flush_work(&data->fib_event_work); + rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, + data); + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); + WARN_ON_ONCE(!list_empty(&data->fib_event_queue)); + WARN_ON_ONCE(!list_empty(&data->fib_rt_list)); + mutex_destroy(&data->fib_lock); + mutex_destroy(&data->nh_lock); + nsim_fib_debugfs_exit(data); + kfree(data); } diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c new file mode 100644 index 000000000000..3bd0e7a489c3 --- /dev/null +++ b/drivers/net/netdevsim/health.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "netdevsim.h" + +static int +nsim_dev_empty_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int +nsim_dev_empty_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static const +struct devlink_health_reporter_ops nsim_dev_empty_reporter_ops = { + .name = "empty", + .dump = nsim_dev_empty_reporter_dump, + .diagnose = nsim_dev_empty_reporter_diagnose, +}; + +struct nsim_dev_dummy_reporter_ctx { + char *break_msg; +}; + +static int +nsim_dev_dummy_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + + if (health->fail_recover) { + /* For testing purposes, user set debugfs fail_recover + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the recover to fail for testing purposes"); + return -EINVAL; + } + if (ctx) { + kfree(health->recovered_break_msg); + health->recovered_break_msg = kstrdup(ctx->break_msg, + GFP_KERNEL); + if (!health->recovered_break_msg) + return -ENOMEM; + } + return 0; +} + +static int nsim_dev_dummy_fmsg_put(struct devlink_fmsg *fmsg, u32 binary_len) +{ + char *binary; + int i; + + devlink_fmsg_bool_pair_put(fmsg, "test_bool", true); + devlink_fmsg_u8_pair_put(fmsg, "test_u8", 1); + devlink_fmsg_u32_pair_put(fmsg, "test_u32", 3); + devlink_fmsg_u64_pair_put(fmsg, "test_u64", 4); + devlink_fmsg_string_pair_put(fmsg, "test_string", "somestring"); + + binary = kmalloc(binary_len, GFP_KERNEL | __GFP_NOWARN); + if (!binary) + return -ENOMEM; + get_random_bytes(binary, binary_len); + devlink_fmsg_binary_pair_put(fmsg, "test_binary", binary, binary_len); + kfree(binary); + + devlink_fmsg_pair_nest_start(fmsg, "test_nest"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_bool_pair_put(fmsg, "nested_test_bool", false); + devlink_fmsg_u8_pair_put(fmsg, "nested_test_u8", false); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); + devlink_fmsg_arr_pair_nest_start(fmsg, "test_u32_array"); + + for (i = 0; i < 10; i++) + devlink_fmsg_u32_put(fmsg, i); + devlink_fmsg_arr_pair_nest_end(fmsg); + devlink_fmsg_arr_pair_nest_start(fmsg, "test_array_of_objects"); + + for (i = 0; i < 10; i++) { + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_bool_pair_put(fmsg, "in_array_nested_test_bool", + false); + devlink_fmsg_u8_pair_put(fmsg, "in_array_nested_test_u8", i); + devlink_fmsg_obj_nest_end(fmsg); + } + devlink_fmsg_arr_pair_nest_end(fmsg); + + return 0; +} + +static int +nsim_dev_dummy_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + + if (ctx) + devlink_fmsg_string_pair_put(fmsg, "break_message", ctx->break_msg); + + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static int +nsim_dev_dummy_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + + if (health->recovered_break_msg) + devlink_fmsg_string_pair_put(fmsg, "recovered_break_message", + health->recovered_break_msg); + + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static const +struct devlink_health_reporter_ops nsim_dev_dummy_reporter_ops = { + .name = "dummy", + .recover = nsim_dev_dummy_reporter_recover, + .dump = nsim_dev_dummy_reporter_dump, + .diagnose = nsim_dev_dummy_reporter_diagnose, +}; + +static ssize_t nsim_dev_health_break_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_health *health = file->private_data; + struct nsim_dev_dummy_reporter_ctx ctx; + char *break_msg; + int err; + + if (count == 0 || count > PAGE_SIZE) + return -EINVAL; + break_msg = memdup_user_nul(data, count); + if (IS_ERR(break_msg)) + return PTR_ERR(break_msg); + + if (break_msg[count - 1] == '\n') + break_msg[count - 1] = '\0'; + + ctx.break_msg = break_msg; + err = devlink_health_report(health->dummy_reporter, break_msg, &ctx); + if (err) + goto out; + +out: + kfree(break_msg); + return err ?: count; +} + +static const struct file_operations nsim_dev_health_break_fops = { + .open = simple_open, + .write = nsim_dev_health_break_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) +{ + struct nsim_dev_health *health = &nsim_dev->health; + int err; + + health->empty_reporter = + devl_health_reporter_create(devlink, + &nsim_dev_empty_reporter_ops, + health); + if (IS_ERR(health->empty_reporter)) + return PTR_ERR(health->empty_reporter); + + health->dummy_reporter = + devl_health_reporter_create(devlink, + &nsim_dev_dummy_reporter_ops, + health); + if (IS_ERR(health->dummy_reporter)) { + err = PTR_ERR(health->dummy_reporter); + goto err_empty_reporter_destroy; + } + + health->ddir = debugfs_create_dir("health", nsim_dev->ddir); + if (IS_ERR(health->ddir)) { + err = PTR_ERR(health->ddir); + goto err_dummy_reporter_destroy; + } + + health->recovered_break_msg = NULL; + debugfs_create_file("break_health", 0200, health->ddir, health, + &nsim_dev_health_break_fops); + health->binary_len = 16; + debugfs_create_u32("binary_len", 0600, health->ddir, + &health->binary_len); + health->fail_recover = false; + debugfs_create_bool("fail_recover", 0600, health->ddir, + &health->fail_recover); + return 0; + +err_dummy_reporter_destroy: + devl_health_reporter_destroy(health->dummy_reporter); +err_empty_reporter_destroy: + devl_health_reporter_destroy(health->empty_reporter); + return err; +} + +void nsim_dev_health_exit(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_health *health = &nsim_dev->health; + + debugfs_remove_recursive(health->ddir); + kfree(health->recovered_break_msg); + devl_health_reporter_destroy(health->dummy_reporter); + devl_health_reporter_destroy(health->empty_reporter); +} diff --git a/drivers/net/netdevsim/hwstats.c b/drivers/net/netdevsim/hwstats.c new file mode 100644 index 000000000000..1abe48e35ca3 --- /dev/null +++ b/drivers/net/netdevsim/hwstats.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/debugfs.h> + +#include "netdevsim.h" + +#define NSIM_DEV_HWSTATS_TRAFFIC_MS 100 + +static struct list_head * +nsim_dev_hwstats_get_list_head(struct nsim_dev_hwstats *hwstats, + enum netdev_offload_xstats_type type) +{ + switch (type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + return &hwstats->l3_list; + } + + WARN_ON_ONCE(1); + return NULL; +} + +static void nsim_dev_hwstats_traffic_bump(struct nsim_dev_hwstats *hwstats, + enum netdev_offload_xstats_type type) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + struct list_head *hwsdev_list; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, type); + if (WARN_ON(!hwsdev_list)) + return; + + list_for_each_entry(hwsdev, hwsdev_list, list) { + if (hwsdev->enabled) { + hwsdev->stats.rx_packets += 1; + hwsdev->stats.tx_packets += 2; + hwsdev->stats.rx_bytes += 100; + hwsdev->stats.tx_bytes += 300; + } + } +} + +static void nsim_dev_hwstats_traffic_work(struct work_struct *work) +{ + struct nsim_dev_hwstats *hwstats; + + hwstats = container_of(work, struct nsim_dev_hwstats, traffic_dw.work); + mutex_lock(&hwstats->hwsdev_list_lock); + nsim_dev_hwstats_traffic_bump(hwstats, NETDEV_OFFLOAD_XSTATS_TYPE_L3); + mutex_unlock(&hwstats->hwsdev_list_lock); + + schedule_delayed_work(&hwstats->traffic_dw, + msecs_to_jiffies(NSIM_DEV_HWSTATS_TRAFFIC_MS)); +} + +static struct nsim_dev_hwstats_netdev * +nsim_dev_hwslist_find_hwsdev(struct list_head *hwsdev_list, + int ifindex) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + + list_for_each_entry(hwsdev, hwsdev_list, list) { + if (hwsdev->netdev->ifindex == ifindex) + return hwsdev; + } + + return NULL; +} + +static int nsim_dev_hwsdev_enable(struct nsim_dev_hwstats_netdev *hwsdev, + struct netlink_ext_ack *extack) +{ + if (hwsdev->fail_enable) { + hwsdev->fail_enable = false; + NL_SET_ERR_MSG_MOD(extack, "Stats enablement set to fail"); + return -ECANCELED; + } + + hwsdev->enabled = true; + return 0; +} + +static void nsim_dev_hwsdev_disable(struct nsim_dev_hwstats_netdev *hwsdev) +{ + hwsdev->enabled = false; + memset(&hwsdev->stats, 0, sizeof(hwsdev->stats)); +} + +static int +nsim_dev_hwsdev_report_delta(struct nsim_dev_hwstats_netdev *hwsdev, + struct netdev_notifier_offload_xstats_info *info) +{ + netdev_offload_xstats_report_delta(info->report_delta, &hwsdev->stats); + memset(&hwsdev->stats, 0, sizeof(hwsdev->stats)); + return 0; +} + +static void +nsim_dev_hwsdev_report_used(struct nsim_dev_hwstats_netdev *hwsdev, + struct netdev_notifier_offload_xstats_info *info) +{ + if (hwsdev->enabled) + netdev_offload_xstats_report_used(info->report_used); +} + +static int nsim_dev_hwstats_event_off_xstats(struct nsim_dev_hwstats *hwstats, + struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_offload_xstats_info *info; + struct nsim_dev_hwstats_netdev *hwsdev; + struct list_head *hwsdev_list; + int err = 0; + + info = ptr; + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, info->type); + if (!hwsdev_list) + return 0; + + mutex_lock(&hwstats->hwsdev_list_lock); + + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, dev->ifindex); + if (!hwsdev) + goto out; + + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + err = nsim_dev_hwsdev_enable(hwsdev, info->info.extack); + break; + case NETDEV_OFFLOAD_XSTATS_DISABLE: + nsim_dev_hwsdev_disable(hwsdev); + break; + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + nsim_dev_hwsdev_report_used(hwsdev, info); + break; + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + err = nsim_dev_hwsdev_report_delta(hwsdev, info); + break; + } + +out: + mutex_unlock(&hwstats->hwsdev_list_lock); + return err; +} + +static void nsim_dev_hwsdev_fini(struct nsim_dev_hwstats_netdev *hwsdev) +{ + dev_put(hwsdev->netdev); + kfree(hwsdev); +} + +static void +__nsim_dev_hwstats_event_unregister(struct nsim_dev_hwstats *hwstats, + struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + struct list_head *hwsdev_list; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, type); + if (WARN_ON(!hwsdev_list)) + return; + + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, dev->ifindex); + if (!hwsdev) + return; + + list_del(&hwsdev->list); + nsim_dev_hwsdev_fini(hwsdev); +} + +static void nsim_dev_hwstats_event_unregister(struct nsim_dev_hwstats *hwstats, + struct net_device *dev) +{ + mutex_lock(&hwstats->hwsdev_list_lock); + __nsim_dev_hwstats_event_unregister(hwstats, dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + mutex_unlock(&hwstats->hwsdev_list_lock); +} + +static int nsim_dev_hwstats_event(struct nsim_dev_hwstats *hwstats, + struct net_device *dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return nsim_dev_hwstats_event_off_xstats(hwstats, dev, + event, ptr); + case NETDEV_UNREGISTER: + nsim_dev_hwstats_event_unregister(hwstats, dev); + break; + } + + return 0; +} + +static int nsim_dev_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nsim_dev_hwstats *hwstats; + int err = 0; + + hwstats = container_of(nb, struct nsim_dev_hwstats, netdevice_nb); + err = nsim_dev_hwstats_event(hwstats, dev, event, ptr); + if (err) + return notifier_from_errno(err); + + return NOTIFY_OK; +} + +static int +nsim_dev_hwstats_enable_ifindex(struct nsim_dev_hwstats *hwstats, + int ifindex, + enum netdev_offload_xstats_type type, + struct list_head *hwsdev_list) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + struct nsim_dev *nsim_dev; + struct net_device *netdev; + struct net *net; + int err = 0; + + nsim_dev = container_of(hwstats, struct nsim_dev, hwstats); + net = nsim_dev_net(nsim_dev); + + rtnl_lock(); + mutex_lock(&hwstats->hwsdev_list_lock); + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, ifindex); + if (hwsdev) + goto out_unlock_list; + + netdev = dev_get_by_index(net, ifindex); + if (!netdev) { + err = -ENODEV; + goto out_unlock_list; + } + + hwsdev = kzalloc(sizeof(*hwsdev), GFP_KERNEL); + if (!hwsdev) { + err = -ENOMEM; + goto out_put_netdev; + } + + hwsdev->netdev = netdev; + list_add_tail(&hwsdev->list, hwsdev_list); + mutex_unlock(&hwstats->hwsdev_list_lock); + + if (netdev_offload_xstats_enabled(netdev, type)) { + nsim_dev_hwsdev_enable(hwsdev, NULL); + rtnl_offload_xstats_notify(netdev); + } + + rtnl_unlock(); + return err; + +out_put_netdev: + dev_put(netdev); +out_unlock_list: + mutex_unlock(&hwstats->hwsdev_list_lock); + rtnl_unlock(); + return err; +} + +static int +nsim_dev_hwstats_disable_ifindex(struct nsim_dev_hwstats *hwstats, + int ifindex, + enum netdev_offload_xstats_type type, + struct list_head *hwsdev_list) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + int err = 0; + + rtnl_lock(); + mutex_lock(&hwstats->hwsdev_list_lock); + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, ifindex); + if (hwsdev) + list_del(&hwsdev->list); + mutex_unlock(&hwstats->hwsdev_list_lock); + + if (!hwsdev) { + err = -ENOENT; + goto unlock_out; + } + + if (netdev_offload_xstats_enabled(hwsdev->netdev, type)) { + netdev_offload_xstats_push_delta(hwsdev->netdev, type, + &hwsdev->stats); + rtnl_offload_xstats_notify(hwsdev->netdev); + } + nsim_dev_hwsdev_fini(hwsdev); + +unlock_out: + rtnl_unlock(); + return err; +} + +static int +nsim_dev_hwstats_fail_ifindex(struct nsim_dev_hwstats *hwstats, + int ifindex, + enum netdev_offload_xstats_type type, + struct list_head *hwsdev_list) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + int err = 0; + + mutex_lock(&hwstats->hwsdev_list_lock); + + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, ifindex); + if (!hwsdev) { + err = -ENOENT; + goto err_hwsdev_list_unlock; + } + + hwsdev->fail_enable = true; + +err_hwsdev_list_unlock: + mutex_unlock(&hwstats->hwsdev_list_lock); + return err; +} + +enum nsim_dev_hwstats_do { + NSIM_DEV_HWSTATS_DO_DISABLE, + NSIM_DEV_HWSTATS_DO_ENABLE, + NSIM_DEV_HWSTATS_DO_FAIL, +}; + +struct nsim_dev_hwstats_fops { + enum nsim_dev_hwstats_do action; + enum netdev_offload_xstats_type type; +}; + +static ssize_t +nsim_dev_hwstats_do_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_hwstats *hwstats = file->private_data; + const struct nsim_dev_hwstats_fops *hwsfops; + struct list_head *hwsdev_list; + int ifindex; + int err; + + hwsfops = debugfs_get_aux(file); + + err = kstrtoint_from_user(data, count, 0, &ifindex); + if (err) + return err; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, hwsfops->type); + if (WARN_ON(!hwsdev_list)) + return -EINVAL; + + switch (hwsfops->action) { + case NSIM_DEV_HWSTATS_DO_DISABLE: + err = nsim_dev_hwstats_disable_ifindex(hwstats, ifindex, + hwsfops->type, + hwsdev_list); + break; + case NSIM_DEV_HWSTATS_DO_ENABLE: + err = nsim_dev_hwstats_enable_ifindex(hwstats, ifindex, + hwsfops->type, + hwsdev_list); + break; + case NSIM_DEV_HWSTATS_DO_FAIL: + err = nsim_dev_hwstats_fail_ifindex(hwstats, ifindex, + hwsfops->type, + hwsdev_list); + break; + } + if (err) + return err; + + return count; +} + +static struct debugfs_short_fops debugfs_ops = { + .write = nsim_dev_hwstats_do_write, + .llseek = generic_file_llseek, +}; + +#define NSIM_DEV_HWSTATS_FOPS(ACTION, TYPE) \ + { \ + .action = ACTION, \ + .type = TYPE, \ + } + +static const struct nsim_dev_hwstats_fops nsim_dev_hwstats_l3_disable_fops = + NSIM_DEV_HWSTATS_FOPS(NSIM_DEV_HWSTATS_DO_DISABLE, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + +static const struct nsim_dev_hwstats_fops nsim_dev_hwstats_l3_enable_fops = + NSIM_DEV_HWSTATS_FOPS(NSIM_DEV_HWSTATS_DO_ENABLE, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + +static const struct nsim_dev_hwstats_fops nsim_dev_hwstats_l3_fail_fops = + NSIM_DEV_HWSTATS_FOPS(NSIM_DEV_HWSTATS_DO_FAIL, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + +#undef NSIM_DEV_HWSTATS_FOPS + +int nsim_dev_hwstats_init(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_hwstats *hwstats = &nsim_dev->hwstats; + struct net *net = nsim_dev_net(nsim_dev); + int err; + + mutex_init(&hwstats->hwsdev_list_lock); + INIT_LIST_HEAD(&hwstats->l3_list); + + hwstats->netdevice_nb.notifier_call = nsim_dev_netdevice_event; + err = register_netdevice_notifier_net(net, &hwstats->netdevice_nb); + if (err) + goto err_mutex_destroy; + + hwstats->ddir = debugfs_create_dir("hwstats", nsim_dev->ddir); + if (IS_ERR(hwstats->ddir)) { + err = PTR_ERR(hwstats->ddir); + goto err_unregister_notifier; + } + + hwstats->l3_ddir = debugfs_create_dir("l3", hwstats->ddir); + if (IS_ERR(hwstats->l3_ddir)) { + err = PTR_ERR(hwstats->l3_ddir); + goto err_remove_hwstats_recursive; + } + + debugfs_create_file_aux("enable_ifindex", 0200, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_enable_fops, &debugfs_ops); + debugfs_create_file_aux("disable_ifindex", 0200, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_disable_fops, &debugfs_ops); + debugfs_create_file_aux("fail_next_enable", 0200, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_fail_fops, &debugfs_ops); + + INIT_DELAYED_WORK(&hwstats->traffic_dw, + &nsim_dev_hwstats_traffic_work); + schedule_delayed_work(&hwstats->traffic_dw, + msecs_to_jiffies(NSIM_DEV_HWSTATS_TRAFFIC_MS)); + return 0; + +err_remove_hwstats_recursive: + debugfs_remove_recursive(hwstats->ddir); +err_unregister_notifier: + unregister_netdevice_notifier_net(net, &hwstats->netdevice_nb); +err_mutex_destroy: + mutex_destroy(&hwstats->hwsdev_list_lock); + return err; +} + +static void nsim_dev_hwsdev_list_wipe(struct nsim_dev_hwstats *hwstats, + enum netdev_offload_xstats_type type) +{ + struct nsim_dev_hwstats_netdev *hwsdev, *tmp; + struct list_head *hwsdev_list; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, type); + if (WARN_ON(!hwsdev_list)) + return; + + mutex_lock(&hwstats->hwsdev_list_lock); + list_for_each_entry_safe(hwsdev, tmp, hwsdev_list, list) { + list_del(&hwsdev->list); + nsim_dev_hwsdev_fini(hwsdev); + } + mutex_unlock(&hwstats->hwsdev_list_lock); +} + +void nsim_dev_hwstats_exit(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_hwstats *hwstats = &nsim_dev->hwstats; + struct net *net = nsim_dev_net(nsim_dev); + + cancel_delayed_work_sync(&hwstats->traffic_dw); + debugfs_remove_recursive(hwstats->ddir); + unregister_netdevice_notifier_net(net, &hwstats->netdevice_nb); + nsim_dev_hwsdev_list_wipe(hwstats, NETDEV_OFFLOAD_XSTATS_TYPE_L3); + mutex_destroy(&hwstats->hwsdev_list_lock); +} diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c index 76e11d889bb6..36a1be4923d6 100644 --- a/drivers/net/netdevsim/ipsec.c +++ b/drivers/net/netdevsim/ipsec.c @@ -29,9 +29,9 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp, return -ENOMEM; p = buf; - p += snprintf(p, bufsize - (p - buf), - "SA count=%u tx=%u\n", - ipsec->count, ipsec->tx); + p += scnprintf(p, bufsize - (p - buf), + "SA count=%u tx=%u\n", + ipsec->count, ipsec->tx); for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) { struct nsim_sa *sap = &ipsec->sa[i]; @@ -39,18 +39,22 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp, if (!sap->used) continue; - p += snprintf(p, bufsize - (p - buf), - "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n", - i, (sap->rx ? 'r' : 't'), sap->ipaddr[0], - sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]); - p += snprintf(p, bufsize - (p - buf), - "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n", - i, be32_to_cpu(sap->xs->id.spi), - sap->xs->id.proto, sap->salt, sap->crypt); - p += snprintf(p, bufsize - (p - buf), - "sa[%i] key=0x%08x %08x %08x %08x\n", - i, sap->key[0], sap->key[1], - sap->key[2], sap->key[3]); + if (sap->xs->props.family == AF_INET6) + p += scnprintf(p, bufsize - (p - buf), + "sa[%i] %cx ipaddr=%pI6c\n", + i, (sap->rx ? 'r' : 't'), &sap->ipaddr); + else + p += scnprintf(p, bufsize - (p - buf), + "sa[%i] %cx ipaddr=%pI4\n", + i, (sap->rx ? 'r' : 't'), &sap->ipaddr[3]); + p += scnprintf(p, bufsize - (p - buf), + "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n", + i, be32_to_cpu(sap->xs->id.spi), + sap->xs->id.proto, sap->salt, sap->crypt); + p += scnprintf(p, bufsize - (p - buf), + "sa[%i] key=0x%08x %08x %08x %08x\n", + i, sap->key[0], sap->key[1], + sap->key[2], sap->key[3]); } len = simple_read_from_buffer(buffer, count, ppos, buf, p - buf); @@ -81,11 +85,11 @@ static int nsim_ipsec_find_empty_idx(struct nsim_ipsec *ipsec) return -ENOSPC; } -static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs, +static int nsim_ipsec_parse_proto_keys(struct net_device *dev, + struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { const char aes_gcm_name[] = "rfc4106(gcm(aes))"; - struct net_device *dev = xs->xso.dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -125,34 +129,38 @@ static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs, return 0; } -static int nsim_ipsec_add_sa(struct xfrm_state *xs) +static int nsim_ipsec_add_sa(struct net_device *dev, + struct xfrm_state *xs, + struct netlink_ext_ack *extack) { struct nsim_ipsec *ipsec; - struct net_device *dev; struct netdevsim *ns; struct nsim_sa sa; u16 sa_idx; int ret; - dev = xs->xso.dev; ns = netdev_priv(dev); ipsec = &ns->ipsec; if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { - netdev_err(dev, "Unsupported protocol 0x%04x for ipsec offload\n", - xs->id.proto); + NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for ipsec offload"); return -EINVAL; } if (xs->calg) { - netdev_err(dev, "Compression offload not supported\n"); + NL_SET_ERR_MSG_MOD(extack, "Compression offload not supported"); + return -EINVAL; + } + + if (xs->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported ipsec offload type"); return -EINVAL; } /* find the first unused index */ ret = nsim_ipsec_find_empty_idx(ipsec); if (ret < 0) { - netdev_err(dev, "No space for SA in Rx table!\n"); + NL_SET_ERR_MSG_MOD(extack, "No space for SA in Rx table!"); return ret; } sa_idx = (u16)ret; @@ -165,20 +173,19 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs) sa.crypt = xs->ealg || xs->aead; /* get the key and salt */ - ret = nsim_ipsec_parse_proto_keys(xs, sa.key, &sa.salt); + ret = nsim_ipsec_parse_proto_keys(dev, xs, sa.key, &sa.salt); if (ret) { - netdev_err(dev, "Failed to get key data for SA table\n"); + NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for SA table"); return ret; } - if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) { + if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) sa.rx = true; - if (xs->props.family == AF_INET6) - memcpy(sa.ipaddr, &xs->id.daddr.a6, 16); - else - memcpy(&sa.ipaddr[3], &xs->id.daddr.a4, 4); - } + if (xs->props.family == AF_INET6) + memcpy(sa.ipaddr, &xs->id.daddr.a6, 16); + else + memcpy(&sa.ipaddr[3], &xs->id.daddr.a4, 4); /* the preparations worked, so save the info */ memcpy(&ipsec->sa[sa_idx], &sa, sizeof(sa)); @@ -192,9 +199,9 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs) return 0; } -static void nsim_ipsec_del_sa(struct xfrm_state *xs) +static void nsim_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs) { - struct netdevsim *ns = netdev_priv(xs->xso.dev); + struct netdevsim *ns = netdev_priv(dev); struct nsim_ipsec *ipsec = &ns->ipsec; u16 sa_idx; @@ -209,20 +216,9 @@ static void nsim_ipsec_del_sa(struct xfrm_state *xs) ipsec->count--; } -static bool nsim_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) -{ - struct netdevsim *ns = netdev_priv(xs->xso.dev); - struct nsim_ipsec *ipsec = &ns->ipsec; - - ipsec->ok++; - - return true; -} - static const struct xfrmdev_ops nsim_xfrmdev_ops = { .xdo_dev_state_add = nsim_ipsec_add_sa, .xdo_dev_state_delete = nsim_ipsec_del_sa, - .xdo_dev_offload_ok = nsim_ipsec_offload_ok, }; bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb) @@ -281,9 +277,11 @@ void nsim_ipsec_init(struct netdevsim *ns) NETIF_F_GSO_ESP) ns->netdev->features |= NSIM_ESP_FEATURES; + ns->netdev->hw_features |= NSIM_ESP_FEATURES; ns->netdev->hw_enc_features |= NSIM_ESP_FEATURES; - ns->ipsec.pfile = debugfs_create_file("ipsec", 0400, ns->ddir, ns, + ns->ipsec.pfile = debugfs_create_file("ipsec", 0400, + ns->nsim_dev_port->ddir, ns, &ipsec_dbg_fops); } diff --git a/drivers/net/netdevsim/macsec.c b/drivers/net/netdevsim/macsec.c new file mode 100644 index 000000000000..bdc8020d588e --- /dev/null +++ b/drivers/net/netdevsim/macsec.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <net/macsec.h> +#include "netdevsim.h" + +static int nsim_macsec_find_secy(struct netdevsim *ns, sci_t sci) +{ + int i; + + for (i = 0; i < NSIM_MACSEC_MAX_SECY_COUNT; i++) { + if (ns->macsec.nsim_secy[i].sci == sci) + return i; + } + + return -1; +} + +static int nsim_macsec_find_rxsc(struct nsim_secy *ns_secy, sci_t sci) +{ + int i; + + for (i = 0; i < NSIM_MACSEC_MAX_RXSC_COUNT; i++) { + if (ns_secy->nsim_rxsc[i].sci == sci) + return i; + } + + return -1; +} + +static int nsim_macsec_add_secy(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + int idx; + + if (ns->macsec.nsim_secy_count == NSIM_MACSEC_MAX_SECY_COUNT) + return -ENOSPC; + + for (idx = 0; idx < NSIM_MACSEC_MAX_SECY_COUNT; idx++) { + if (!ns->macsec.nsim_secy[idx].used) + break; + } + + if (idx == NSIM_MACSEC_MAX_SECY_COUNT) { + netdev_err(ctx->netdev, "%s: nsim_secy_count not full but all SecYs used\n", + __func__); + return -ENOSPC; + } + + netdev_dbg(ctx->netdev, "%s: adding new secy with sci %016llx at index %d\n", + __func__, sci_to_cpu(ctx->secy->sci), idx); + ns->macsec.nsim_secy[idx].used = true; + ns->macsec.nsim_secy[idx].nsim_rxsc_count = 0; + ns->macsec.nsim_secy[idx].sci = ctx->secy->sci; + ns->macsec.nsim_secy_count++; + + return 0; +} + +static int nsim_macsec_upd_secy(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: updating secy with sci %016llx at index %d\n", + __func__, sci_to_cpu(ctx->secy->sci), idx); + + return 0; +} + +static int nsim_macsec_del_secy(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: removing SecY with SCI %016llx at index %d\n", + __func__, sci_to_cpu(ctx->secy->sci), idx); + + ns->macsec.nsim_secy[idx].used = false; + memset(&ns->macsec.nsim_secy[idx], 0, sizeof(ns->macsec.nsim_secy[idx])); + ns->macsec.nsim_secy_count--; + + return 0; +} + +static int nsim_macsec_add_rxsc(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + struct nsim_secy *secy; + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + secy = &ns->macsec.nsim_secy[idx]; + + if (secy->nsim_rxsc_count == NSIM_MACSEC_MAX_RXSC_COUNT) + return -ENOSPC; + + for (idx = 0; idx < NSIM_MACSEC_MAX_RXSC_COUNT; idx++) { + if (!secy->nsim_rxsc[idx].used) + break; + } + + if (idx == NSIM_MACSEC_MAX_RXSC_COUNT) + netdev_err(ctx->netdev, "%s: nsim_rxsc_count not full but all RXSCs used\n", + __func__); + + netdev_dbg(ctx->netdev, "%s: adding new rxsc with sci %016llx at index %d\n", + __func__, sci_to_cpu(ctx->rx_sc->sci), idx); + secy->nsim_rxsc[idx].used = true; + secy->nsim_rxsc[idx].sci = ctx->rx_sc->sci; + secy->nsim_rxsc_count++; + + return 0; +} + +static int nsim_macsec_upd_rxsc(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + struct nsim_secy *secy; + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + secy = &ns->macsec.nsim_secy[idx]; + + idx = nsim_macsec_find_rxsc(secy, ctx->rx_sc->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in RXSC table\n", + __func__, sci_to_cpu(ctx->rx_sc->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: updating RXSC with sci %016llx at index %d\n", + __func__, sci_to_cpu(ctx->rx_sc->sci), idx); + + return 0; +} + +static int nsim_macsec_del_rxsc(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + struct nsim_secy *secy; + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + secy = &ns->macsec.nsim_secy[idx]; + + idx = nsim_macsec_find_rxsc(secy, ctx->rx_sc->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in RXSC table\n", + __func__, sci_to_cpu(ctx->rx_sc->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: removing RXSC with sci %016llx at index %d\n", + __func__, sci_to_cpu(ctx->rx_sc->sci), idx); + + secy->nsim_rxsc[idx].used = false; + memset(&secy->nsim_rxsc[idx], 0, sizeof(secy->nsim_rxsc[idx])); + secy->nsim_rxsc_count--; + + return 0; +} + +static int nsim_macsec_add_rxsa(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + struct nsim_secy *secy; + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + secy = &ns->macsec.nsim_secy[idx]; + + idx = nsim_macsec_find_rxsc(secy, ctx->sa.rx_sa->sc->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in RXSC table\n", + __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: RXSC with sci %016llx, AN %u\n", + __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci), ctx->sa.assoc_num); + + return 0; +} + +static int nsim_macsec_upd_rxsa(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + struct nsim_secy *secy; + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + secy = &ns->macsec.nsim_secy[idx]; + + idx = nsim_macsec_find_rxsc(secy, ctx->sa.rx_sa->sc->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in RXSC table\n", + __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: RXSC with sci %016llx, AN %u\n", + __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci), ctx->sa.assoc_num); + + return 0; +} + +static int nsim_macsec_del_rxsa(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + struct nsim_secy *secy; + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + secy = &ns->macsec.nsim_secy[idx]; + + idx = nsim_macsec_find_rxsc(secy, ctx->sa.rx_sa->sc->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in RXSC table\n", + __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: RXSC with sci %016llx, AN %u\n", + __func__, sci_to_cpu(ctx->sa.rx_sa->sc->sci), ctx->sa.assoc_num); + + return 0; +} + +static int nsim_macsec_add_txsa(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: SECY with sci %016llx, AN %u\n", + __func__, sci_to_cpu(ctx->secy->sci), ctx->sa.assoc_num); + + return 0; +} + +static int nsim_macsec_upd_txsa(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: SECY with sci %016llx, AN %u\n", + __func__, sci_to_cpu(ctx->secy->sci), ctx->sa.assoc_num); + + return 0; +} + +static int nsim_macsec_del_txsa(struct macsec_context *ctx) +{ + struct netdevsim *ns = netdev_priv(ctx->netdev); + int idx; + + idx = nsim_macsec_find_secy(ns, ctx->secy->sci); + if (idx < 0) { + netdev_err(ctx->netdev, "%s: sci %016llx not found in secy table\n", + __func__, sci_to_cpu(ctx->secy->sci)); + return -ENOENT; + } + + netdev_dbg(ctx->netdev, "%s: SECY with sci %016llx, AN %u\n", + __func__, sci_to_cpu(ctx->secy->sci), ctx->sa.assoc_num); + + return 0; +} + +static const struct macsec_ops nsim_macsec_ops = { + .mdo_add_secy = nsim_macsec_add_secy, + .mdo_upd_secy = nsim_macsec_upd_secy, + .mdo_del_secy = nsim_macsec_del_secy, + .mdo_add_rxsc = nsim_macsec_add_rxsc, + .mdo_upd_rxsc = nsim_macsec_upd_rxsc, + .mdo_del_rxsc = nsim_macsec_del_rxsc, + .mdo_add_rxsa = nsim_macsec_add_rxsa, + .mdo_upd_rxsa = nsim_macsec_upd_rxsa, + .mdo_del_rxsa = nsim_macsec_del_rxsa, + .mdo_add_txsa = nsim_macsec_add_txsa, + .mdo_upd_txsa = nsim_macsec_upd_txsa, + .mdo_del_txsa = nsim_macsec_del_txsa, +}; + +void nsim_macsec_init(struct netdevsim *ns) +{ + ns->netdev->macsec_ops = &nsim_macsec_ops; + ns->netdev->features |= NETIF_F_HW_MACSEC; + memset(&ns->macsec, 0, sizeof(ns->macsec)); +} + +void nsim_macsec_teardown(struct netdevsim *ns) +{ +} diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 8d8e2b3f263e..6927c1962277 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -15,267 +15,170 @@ #include <linux/debugfs.h> #include <linux/etherdevice.h> +#include <linux/ethtool_netlink.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/slab.h> +#include <net/netdev_queues.h> +#include <net/netdev_rx_queue.h> +#include <net/page_pool/helpers.h> #include <net/netlink.h> +#include <net/net_shaper.h> +#include <net/netdev_lock.h> #include <net/pkt_cls.h> #include <net/rtnetlink.h> -#include <net/switchdev.h> +#include <net/udp_tunnel.h> +#include <net/busy_poll.h> #include "netdevsim.h" -struct nsim_vf_config { - int link_state; - u16 min_tx_rate; - u16 max_tx_rate; - u16 vlan; - __be16 vlan_proto; - u16 qos; - u8 vf_mac[ETH_ALEN]; - bool spoofchk_enabled; - bool trusted; - bool rss_query_enabled; -}; - -static u32 nsim_dev_id; +MODULE_IMPORT_NS("NETDEV_INTERNAL"); -static struct dentry *nsim_ddir; -static struct dentry *nsim_sdev_ddir; +#define NSIM_RING_SIZE 256 -static int nsim_num_vf(struct device *dev) +static void nsim_start_peer_tx_queue(struct net_device *dev, struct nsim_rq *rq) { - struct netdevsim *ns = to_nsim(dev); - - return ns->num_vfs; -} + struct netdevsim *ns = netdev_priv(dev); + struct net_device *peer_dev; + struct netdevsim *peer_ns; + struct netdev_queue *txq; + u16 idx; + + idx = rq->napi.index; + rcu_read_lock(); + peer_ns = rcu_dereference(ns->peer); + if (!peer_ns) + goto out; -static struct bus_type nsim_bus = { - .name = DRV_NAME, - .dev_name = DRV_NAME, - .num_vf = nsim_num_vf, -}; + /* TX device */ + peer_dev = peer_ns->netdev; + if (dev->real_num_tx_queues != peer_dev->num_rx_queues) + goto out; -static int nsim_vfs_enable(struct netdevsim *ns, unsigned int num_vfs) -{ - ns->vfconfigs = kcalloc(num_vfs, sizeof(struct nsim_vf_config), - GFP_KERNEL); - if (!ns->vfconfigs) - return -ENOMEM; - ns->num_vfs = num_vfs; + txq = netdev_get_tx_queue(peer_dev, idx); + if (!netif_tx_queue_stopped(txq)) + goto out; - return 0; + netif_tx_wake_queue(txq); +out: + rcu_read_unlock(); } -static void nsim_vfs_disable(struct netdevsim *ns) +static void nsim_stop_tx_queue(struct net_device *tx_dev, + struct net_device *rx_dev, + struct nsim_rq *rq, + u16 idx) { - kfree(ns->vfconfigs); - ns->vfconfigs = NULL; - ns->num_vfs = 0; + /* If different queues size, do not stop, since it is not + * easy to find which TX queue is mapped here + */ + if (rx_dev->real_num_tx_queues != tx_dev->num_rx_queues) + return; + + /* rq is the queue on the receive side */ + netif_subqueue_try_stop(tx_dev, idx, + NSIM_RING_SIZE - skb_queue_len(&rq->skb_queue), + NSIM_RING_SIZE / 2); } -static ssize_t -nsim_numvfs_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static int nsim_napi_rx(struct net_device *tx_dev, struct net_device *rx_dev, + struct nsim_rq *rq, struct sk_buff *skb) { - struct netdevsim *ns = to_nsim(dev); - unsigned int num_vfs; - int ret; - - ret = kstrtouint(buf, 0, &num_vfs); - if (ret) - return ret; - - rtnl_lock(); - if (ns->num_vfs == num_vfs) - goto exit_good; - if (ns->num_vfs && num_vfs) { - ret = -EBUSY; - goto exit_unlock; + if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) { + dev_kfree_skb_any(skb); + return NET_RX_DROP; } - if (num_vfs) { - ret = nsim_vfs_enable(ns, num_vfs); - if (ret) - goto exit_unlock; - } else { - nsim_vfs_disable(ns); - } -exit_good: - ret = count; -exit_unlock: - rtnl_unlock(); - - return ret; -} + skb_queue_tail(&rq->skb_queue, skb); -static ssize_t -nsim_numvfs_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct netdevsim *ns = to_nsim(dev); + /* Stop the peer TX queue avoiding dropping packets later */ + if (skb_queue_len(&rq->skb_queue) >= NSIM_RING_SIZE) + nsim_stop_tx_queue(tx_dev, rx_dev, rq, + skb_get_queue_mapping(skb)); - return sprintf(buf, "%u\n", ns->num_vfs); + return NET_RX_SUCCESS; } -static struct device_attribute nsim_numvfs_attr = - __ATTR(sriov_numvfs, 0664, nsim_numvfs_show, nsim_numvfs_store); - -static struct attribute *nsim_dev_attrs[] = { - &nsim_numvfs_attr.attr, - NULL, -}; - -static const struct attribute_group nsim_dev_attr_group = { - .attrs = nsim_dev_attrs, -}; - -static const struct attribute_group *nsim_dev_attr_groups[] = { - &nsim_dev_attr_group, - NULL, -}; - -static void nsim_dev_release(struct device *dev) +static int nsim_forward_skb(struct net_device *tx_dev, + struct net_device *rx_dev, + struct sk_buff *skb, + struct nsim_rq *rq, + struct skb_ext *psp_ext) { - struct netdevsim *ns = to_nsim(dev); - - nsim_vfs_disable(ns); - free_netdev(ns->netdev); -} + int ret; -static struct device_type nsim_dev_type = { - .groups = nsim_dev_attr_groups, - .release = nsim_dev_release, -}; + ret = __dev_forward_skb(rx_dev, skb); + if (ret) + return ret; -static int -nsim_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) -{ - struct netdevsim *ns = netdev_priv(dev); + nsim_psp_handle_ext(skb, psp_ext); - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(ns->sdev->switch_id); - memcpy(&attr->u.ppid.id, &ns->sdev->switch_id, - attr->u.ppid.id_len); - return 0; - default: - return -EOPNOTSUPP; - } + return nsim_napi_rx(tx_dev, rx_dev, rq, skb); } -static const struct switchdev_ops nsim_switchdev_ops = { - .switchdev_port_attr_get = nsim_port_attr_get, -}; - -static int nsim_init(struct net_device *dev) +static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { - char sdev_ddir_name[10], sdev_link_name[32]; struct netdevsim *ns = netdev_priv(dev); - int err; - - ns->netdev = dev; - ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir); - if (IS_ERR_OR_NULL(ns->ddir)) - return -ENOMEM; + struct skb_ext *psp_ext = NULL; + struct net_device *peer_dev; + unsigned int len = skb->len; + struct netdevsim *peer_ns; + struct netdev_config *cfg; + struct nsim_rq *rq; + int rxq; + int dr; + + rcu_read_lock(); + if (!nsim_ipsec_tx(ns, skb)) + goto out_drop_any; - if (!ns->sdev) { - ns->sdev = kzalloc(sizeof(*ns->sdev), GFP_KERNEL); - if (!ns->sdev) { - err = -ENOMEM; - goto err_debugfs_destroy; - } - ns->sdev->refcnt = 1; - ns->sdev->switch_id = nsim_dev_id; - sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id); - ns->sdev->ddir = debugfs_create_dir(sdev_ddir_name, - nsim_sdev_ddir); - if (IS_ERR_OR_NULL(ns->sdev->ddir)) { - err = PTR_ERR_OR_ZERO(ns->sdev->ddir) ?: -EINVAL; - goto err_sdev_free; - } + /* Check if loopback mode is enabled */ + if (dev->features & NETIF_F_LOOPBACK) { + peer_ns = ns; + peer_dev = dev; } else { - sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id); - ns->sdev->refcnt++; - } - - sprintf(sdev_link_name, "../../" DRV_NAME "_sdev/%s", sdev_ddir_name); - debugfs_create_symlink("sdev", ns->ddir, sdev_link_name); - - err = nsim_bpf_init(ns); - if (err) - goto err_sdev_destroy; - - ns->dev.id = nsim_dev_id++; - ns->dev.bus = &nsim_bus; - ns->dev.type = &nsim_dev_type; - err = device_register(&ns->dev); - if (err) - goto err_bpf_uninit; - - SET_NETDEV_DEV(dev, &ns->dev); - SWITCHDEV_SET_OPS(dev, &nsim_switchdev_ops); - - err = nsim_devlink_setup(ns); - if (err) - goto err_unreg_dev; - - nsim_ipsec_init(ns); - - return 0; - -err_unreg_dev: - device_unregister(&ns->dev); -err_bpf_uninit: - nsim_bpf_uninit(ns); -err_sdev_destroy: - if (!--ns->sdev->refcnt) { - debugfs_remove_recursive(ns->sdev->ddir); -err_sdev_free: - kfree(ns->sdev); - } -err_debugfs_destroy: - debugfs_remove_recursive(ns->ddir); - return err; -} - -static void nsim_uninit(struct net_device *dev) -{ - struct netdevsim *ns = netdev_priv(dev); - - nsim_ipsec_teardown(ns); - nsim_devlink_teardown(ns); - debugfs_remove_recursive(ns->ddir); - nsim_bpf_uninit(ns); - if (!--ns->sdev->refcnt) { - debugfs_remove_recursive(ns->sdev->ddir); - kfree(ns->sdev); + peer_ns = rcu_dereference(ns->peer); + if (!peer_ns) + goto out_drop_any; + peer_dev = peer_ns->netdev; } -} -static void nsim_free(struct net_device *dev) -{ - struct netdevsim *ns = netdev_priv(dev); + dr = nsim_do_psp(skb, ns, peer_ns, &psp_ext); + if (dr) + goto out_drop_free; - device_unregister(&ns->dev); - /* netdev and vf state will be freed out of device_release() */ -} + rxq = skb_get_queue_mapping(skb); + if (rxq >= peer_dev->num_rx_queues) + rxq = rxq % peer_dev->num_rx_queues; + rq = peer_ns->rq[rxq]; -static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct netdevsim *ns = netdev_priv(dev); + cfg = peer_dev->cfg; + if (skb_is_nonlinear(skb) && + (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || + (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + cfg->hds_thresh > len))) + skb_linearize(skb); - if (!nsim_ipsec_tx(ns, skb)) - goto out; + skb_tx_timestamp(skb); + if (unlikely(nsim_forward_skb(dev, peer_dev, + skb, rq, psp_ext) == NET_RX_DROP)) + goto out_drop_cnt; - u64_stats_update_begin(&ns->syncp); - ns->tx_packets++; - ns->tx_bytes += skb->len; - u64_stats_update_end(&ns->syncp); + if (!hrtimer_active(&rq->napi_timer)) + hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL); -out: - dev_kfree_skb(skb); + rcu_read_unlock(); + dev_dstats_tx_add(dev, len); + return NETDEV_TX_OK; +out_drop_any: + dr = SKB_DROP_REASON_NOT_SPECIFIED; +out_drop_free: + kfree_skb_reason(skb, dr); +out_drop_cnt: + rcu_read_unlock(); + dev_dstats_tx_dropped(dev); return NETDEV_TX_OK; } @@ -287,61 +190,30 @@ static int nsim_change_mtu(struct net_device *dev, int new_mtu) { struct netdevsim *ns = netdev_priv(dev); - if (ns->xdp.prog && new_mtu > NSIM_XDP_MAX_MTU) + if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags && + new_mtu > NSIM_XDP_MAX_MTU) return -EBUSY; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } -static void -nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) -{ - struct netdevsim *ns = netdev_priv(dev); - unsigned int start; - - do { - start = u64_stats_fetch_begin(&ns->syncp); - stats->tx_bytes = ns->tx_bytes; - stats->tx_packets = ns->tx_packets; - } while (u64_stats_fetch_retry(&ns->syncp, start)); -} - static int nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv); } -static int -nsim_setup_tc_block(struct net_device *dev, struct tc_block_offload *f) -{ - struct netdevsim *ns = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, nsim_setup_tc_block_cb, - ns, ns, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, nsim_setup_tc_block_cb, ns); - return 0; - default: - return -EOPNOTSUPP; - } -} - static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; /* Only refuse multicast addresses, zero address can mean unset/any. */ - if (vf >= ns->num_vfs || is_multicast_ether_addr(mac)) + if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac)) return -EINVAL; - memcpy(ns->vfconfigs[vf].vf_mac, mac, ETH_ALEN); + memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN); return 0; } @@ -350,13 +222,14 @@ static int nsim_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; - if (vf >= ns->num_vfs || vlan > 4095 || qos > 7) + if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7) return -EINVAL; - ns->vfconfigs[vf].vlan = vlan; - ns->vfconfigs[vf].qos = qos; - ns->vfconfigs[vf].vlan_proto = vlan_proto; + nsim_dev->vfconfigs[vf].vlan = vlan; + nsim_dev->vfconfigs[vf].qos = qos; + nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto; return 0; } @@ -364,12 +237,18 @@ static int nsim_set_vf_vlan(struct net_device *dev, int vf, static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; - if (vf >= ns->num_vfs) + if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) { + pr_err("Not supported in switchdev mode. Please use devlink API.\n"); + return -EOPNOTSUPP; + } + + if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; - ns->vfconfigs[vf].min_tx_rate = min; - ns->vfconfigs[vf].max_tx_rate = max; + nsim_dev->vfconfigs[vf].min_tx_rate = min; + nsim_dev->vfconfigs[vf].max_tx_rate = max; return 0; } @@ -377,10 +256,11 @@ static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max) static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; - if (vf >= ns->num_vfs) + if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; - ns->vfconfigs[vf].spoofchk_enabled = val; + nsim_dev->vfconfigs[vf].spoofchk_enabled = val; return 0; } @@ -388,10 +268,11 @@ static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val) static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; - if (vf >= ns->num_vfs) + if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; - ns->vfconfigs[vf].rss_query_enabled = val; + nsim_dev->vfconfigs[vf].rss_query_enabled = val; return 0; } @@ -399,10 +280,11 @@ static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val) static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; - if (vf >= ns->num_vfs) + if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; - ns->vfconfigs[vf].trusted = val; + nsim_dev->vfconfigs[vf].trusted = val; return 0; } @@ -411,21 +293,22 @@ static int nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; - if (vf >= ns->num_vfs) + if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; ivi->vf = vf; - ivi->linkstate = ns->vfconfigs[vf].link_state; - ivi->min_tx_rate = ns->vfconfigs[vf].min_tx_rate; - ivi->max_tx_rate = ns->vfconfigs[vf].max_tx_rate; - ivi->vlan = ns->vfconfigs[vf].vlan; - ivi->vlan_proto = ns->vfconfigs[vf].vlan_proto; - ivi->qos = ns->vfconfigs[vf].qos; - memcpy(&ivi->mac, ns->vfconfigs[vf].vf_mac, ETH_ALEN); - ivi->spoofchk = ns->vfconfigs[vf].spoofchk_enabled; - ivi->trusted = ns->vfconfigs[vf].trusted; - ivi->rss_query_en = ns->vfconfigs[vf].rss_query_enabled; + ivi->linkstate = nsim_dev->vfconfigs[vf].link_state; + ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate; + ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate; + ivi->vlan = nsim_dev->vfconfigs[vf].vlan; + ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto; + ivi->qos = nsim_dev->vfconfigs[vf].qos; + memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN); + ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled; + ivi->trusted = nsim_dev->vfconfigs[vf].trusted; + ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled; return 0; } @@ -433,8 +316,9 @@ nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi) static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state) { struct netdevsim *ns = netdev_priv(dev); + struct nsim_dev *nsim_dev = ns->nsim_dev; - if (vf >= ns->num_vfs) + if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; switch (state) { @@ -446,17 +330,51 @@ static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state) return -EINVAL; } - ns->vfconfigs[vf].link_state = state; + nsim_dev->vfconfigs[vf].link_state = state; return 0; } +static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats) +{ + stats->window_drops = 0; + stats->tx_overruns = 0; +} + +static int nsim_setup_tc_taprio(struct net_device *dev, + struct tc_taprio_qopt_offload *offload) +{ + int err = 0; + + switch (offload->cmd) { + case TAPRIO_CMD_REPLACE: + case TAPRIO_CMD_DESTROY: + break; + case TAPRIO_CMD_STATS: + nsim_taprio_stats(&offload->stats); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static LIST_HEAD(nsim_block_cb_list); + static int nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct netdevsim *ns = netdev_priv(dev); + switch (type) { + case TC_SETUP_QDISC_TAPRIO: + return nsim_setup_tc_taprio(dev, type_data); case TC_SETUP_BLOCK: - return nsim_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, + &nsim_block_cb_list, + nsim_setup_tc_block_cb, + ns, ns, true); default: return -EOPNOTSUPP; } @@ -473,15 +391,260 @@ nsim_set_features(struct net_device *dev, netdev_features_t features) return 0; } +static int nsim_get_iflink(const struct net_device *dev) +{ + struct netdevsim *nsim, *peer; + int iflink; + + nsim = netdev_priv(dev); + + rcu_read_lock(); + peer = rcu_dereference(nsim->peer); + iflink = peer ? READ_ONCE(peer->netdev->ifindex) : + READ_ONCE(dev->ifindex); + rcu_read_unlock(); + + return iflink; +} + +static int nsim_rcv(struct nsim_rq *rq, int budget) +{ + struct net_device *dev = rq->napi.dev; + struct bpf_prog *xdp_prog; + struct netdevsim *ns; + struct sk_buff *skb; + unsigned int skblen; + int i, ret; + + ns = netdev_priv(dev); + xdp_prog = READ_ONCE(ns->xdp.prog); + + for (i = 0; i < budget; i++) { + if (skb_queue_empty(&rq->skb_queue)) + break; + + skb = skb_dequeue(&rq->skb_queue); + + if (xdp_prog) { + /* skb might be freed directly by XDP, save the len */ + skblen = skb->len; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_checksum_help(skb); + ret = do_xdp_generic(xdp_prog, &skb); + if (ret != XDP_PASS) { + dev_dstats_rx_add(dev, skblen); + continue; + } + } + + /* skb might be discard at netif_receive_skb, save the len */ + dev_dstats_rx_add(dev, skb->len); + napi_gro_receive(&rq->napi, skb); + } + + nsim_start_peer_tx_queue(dev, rq); + return i; +} + +static int nsim_poll(struct napi_struct *napi, int budget) +{ + struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi); + int done; + + done = nsim_rcv(rq, budget); + if (done < budget) + napi_complete_done(napi, done); + + return done; +} + +static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi) +{ + struct page_pool_params params = { + .order = 0, + .pool_size = NSIM_RING_SIZE, + .nid = NUMA_NO_NODE, + .dev = &napi->dev->dev, + .napi = napi, + .dma_dir = DMA_BIDIRECTIONAL, + .netdev = napi->dev, + }; + struct page_pool *pool; + + pool = page_pool_create(¶ms); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + *p = pool; + return 0; +} + +static int nsim_init_napi(struct netdevsim *ns) +{ + struct net_device *dev = ns->netdev; + struct nsim_rq *rq; + int err, i; + + for (i = 0; i < dev->num_rx_queues; i++) { + rq = ns->rq[i]; + + netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i); + } + + for (i = 0; i < dev->num_rx_queues; i++) { + rq = ns->rq[i]; + + err = nsim_create_page_pool(&rq->page_pool, &rq->napi); + if (err) + goto err_pp_destroy; + } + + return 0; + +err_pp_destroy: + while (i--) { + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; + } + + for (i = 0; i < dev->num_rx_queues; i++) + __netif_napi_del_locked(&ns->rq[i]->napi); + + return err; +} + +static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer) +{ + struct nsim_rq *rq; + + rq = container_of(timer, struct nsim_rq, napi_timer); + napi_schedule(&rq->napi); + + return HRTIMER_NORESTART; +} + +static void nsim_rq_timer_init(struct nsim_rq *rq) +{ + hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); +} + +static void nsim_enable_napi(struct netdevsim *ns) +{ + struct net_device *dev = ns->netdev; + int i; + + for (i = 0; i < dev->num_rx_queues; i++) { + struct nsim_rq *rq = ns->rq[i]; + + netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi); + napi_enable_locked(&rq->napi); + } +} + +static int nsim_open(struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *peer; + int err; + + netdev_assert_locked(dev); + + err = nsim_init_napi(ns); + if (err) + return err; + + nsim_enable_napi(ns); + + peer = rtnl_dereference(ns->peer); + if (peer && netif_running(peer->netdev)) { + netif_carrier_on(dev); + netif_carrier_on(peer->netdev); + } + + return 0; +} + +static void nsim_del_napi(struct netdevsim *ns) +{ + struct net_device *dev = ns->netdev; + int i; + + for (i = 0; i < dev->num_rx_queues; i++) { + struct nsim_rq *rq = ns->rq[i]; + + napi_disable_locked(&rq->napi); + __netif_napi_del_locked(&rq->napi); + } + synchronize_net(); + + for (i = 0; i < dev->num_rx_queues; i++) { + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; + } +} + +static int nsim_stop(struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *peer; + + netdev_assert_locked(dev); + + netif_carrier_off(dev); + peer = rtnl_dereference(ns->peer); + if (peer) + netif_carrier_off(peer->netdev); + + nsim_del_napi(ns); + + return 0; +} + +static int nsim_shaper_set(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int nsim_shaper_del(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int nsim_shaper_group(struct net_shaper_binding *binding, + int leaves_count, + const struct net_shaper *leaves, + const struct net_shaper *root, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static void nsim_shaper_cap(struct net_shaper_binding *binding, + enum net_shaper_scope scope, + unsigned long *flags) +{ + *flags = ULONG_MAX; +} + +static const struct net_shaper_ops nsim_shaper_ops = { + .set = nsim_shaper_set, + .delete = nsim_shaper_del, + .group = nsim_shaper_group, + .capabilities = nsim_shaper_cap, +}; + static const struct net_device_ops nsim_netdev_ops = { - .ndo_init = nsim_init, - .ndo_uninit = nsim_uninit, .ndo_start_xmit = nsim_start_xmit, .ndo_set_rx_mode = nsim_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = nsim_change_mtu, - .ndo_get_stats64 = nsim_get_stats64, .ndo_set_vf_mac = nsim_set_vf_mac, .ndo_set_vf_vlan = nsim_set_vf_vlan, .ndo_set_vf_rate = nsim_set_vf_rate, @@ -492,7 +655,313 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en, .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, + .ndo_get_iflink = nsim_get_iflink, .ndo_bpf = nsim_bpf, + .ndo_open = nsim_open, + .ndo_stop = nsim_stop, + .net_shaper_ops = &nsim_shaper_ops, +}; + +static const struct net_device_ops nsim_vf_netdev_ops = { + .ndo_start_xmit = nsim_start_xmit, + .ndo_set_rx_mode = nsim_set_rx_mode, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = nsim_change_mtu, + .ndo_setup_tc = nsim_setup_tc, + .ndo_set_features = nsim_set_features, +}; + +/* We don't have true per-queue stats, yet, so do some random fakery here. + * Only report stuff for queue 0. + */ +static void nsim_get_queue_stats_rx(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *stats) +{ + struct rtnl_link_stats64 rtstats = {}; + + if (!idx) + dev_get_stats(dev, &rtstats); + + stats->packets = rtstats.rx_packets - !!rtstats.rx_packets; + stats->bytes = rtstats.rx_bytes; +} + +static void nsim_get_queue_stats_tx(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *stats) +{ + struct rtnl_link_stats64 rtstats = {}; + + if (!idx) + dev_get_stats(dev, &rtstats); + + stats->packets = rtstats.tx_packets - !!rtstats.tx_packets; + stats->bytes = rtstats.tx_bytes; +} + +static void nsim_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + struct rtnl_link_stats64 rtstats = {}; + + dev_get_stats(dev, &rtstats); + + rx->packets = !!rtstats.rx_packets; + rx->bytes = 0; + tx->packets = !!rtstats.tx_packets; + tx->bytes = 0; +} + +static const struct netdev_stat_ops nsim_stat_ops = { + .get_queue_stats_tx = nsim_get_queue_stats_tx, + .get_queue_stats_rx = nsim_get_queue_stats_rx, + .get_base_stats = nsim_get_base_stats, +}; + +static struct nsim_rq *nsim_queue_alloc(void) +{ + struct nsim_rq *rq; + + rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT); + if (!rq) + return NULL; + + skb_queue_head_init(&rq->skb_queue); + nsim_rq_timer_init(rq); + return rq; +} + +static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq) +{ + hrtimer_cancel(&rq->napi_timer); + + if (rq->skb_queue.qlen) { + local_bh_disable(); + dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen); + local_bh_enable(); + } + + skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); + kfree(rq); +} + +/* Queue reset mode is controlled by ns->rq_reset_mode. + * - normal - new NAPI new pool (old NAPI enabled when new added) + * - mode 1 - allocate new pool (NAPI is only disabled / enabled) + * - mode 2 - new NAPI new pool (old NAPI removed before new added) + * - mode 3 - new NAPI new pool (old NAPI disabled when new added) + */ +struct nsim_queue_mem { + struct nsim_rq *rq; + struct page_pool *pp; +}; + +static int +nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + int err; + + if (ns->rq_reset_mode > 3) + return -EINVAL; + + if (ns->rq_reset_mode == 1) { + if (!netif_running(ns->netdev)) + return -ENETDOWN; + return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); + } + + qmem->rq = nsim_queue_alloc(); + if (!qmem->rq) + return -ENOMEM; + + err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi); + if (err) + goto err_free; + + if (!ns->rq_reset_mode) + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); + + return 0; + +err_free: + nsim_queue_free(dev, qmem->rq); + return err; +} + +static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + page_pool_destroy(qmem->pp); + if (qmem->rq) { + if (!ns->rq_reset_mode) + netif_napi_del_locked(&qmem->rq->napi); + page_pool_destroy(qmem->rq->page_pool); + nsim_queue_free(dev, qmem->rq); + } +} + +static int +nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + netdev_assert_locked(dev); + + if (ns->rq_reset_mode == 1) { + ns->rq[idx]->page_pool = qmem->pp; + napi_enable_locked(&ns->rq[idx]->napi); + return 0; + } + + /* netif_napi_add()/_del() should normally be called from alloc/free, + * here we want to test various call orders. + */ + if (ns->rq_reset_mode == 2) { + netif_napi_del_locked(&ns->rq[idx]->napi); + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); + } else if (ns->rq_reset_mode == 3) { + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); + netif_napi_del_locked(&ns->rq[idx]->napi); + } + + ns->rq[idx] = qmem->rq; + napi_enable_locked(&ns->rq[idx]->napi); + + return 0; +} + +static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + netdev_assert_locked(dev); + + napi_disable_locked(&ns->rq[idx]->napi); + + if (ns->rq_reset_mode == 1) { + qmem->pp = ns->rq[idx]->page_pool; + page_pool_disable_direct_recycling(qmem->pp); + } else { + qmem->rq = ns->rq[idx]; + } + + return 0; +} + +static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct nsim_queue_mem), + .ndo_queue_mem_alloc = nsim_queue_mem_alloc, + .ndo_queue_mem_free = nsim_queue_mem_free, + .ndo_queue_start = nsim_queue_start, + .ndo_queue_stop = nsim_queue_stop, +}; + +static ssize_t +nsim_qreset_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + unsigned int queue, mode; + char buf[32]; + ssize_t ret; + + if (count >= sizeof(buf)) + return -EINVAL; + if (copy_from_user(buf, data, count)) + return -EFAULT; + buf[count] = '\0'; + + ret = sscanf(buf, "%u %u", &queue, &mode); + if (ret != 2) + return -EINVAL; + + netdev_lock(ns->netdev); + if (queue >= ns->netdev->real_num_rx_queues) { + ret = -EINVAL; + goto exit_unlock; + } + + ns->rq_reset_mode = mode; + ret = netdev_rx_queue_restart(ns->netdev, queue); + ns->rq_reset_mode = 0; + if (ret) + goto exit_unlock; + + ret = count; +exit_unlock: + netdev_unlock(ns->netdev); + return ret; +} + +static const struct file_operations nsim_qreset_fops = { + .open = simple_open, + .write = nsim_qreset_write, + .owner = THIS_MODULE, +}; + +static ssize_t +nsim_pp_hold_read(struct file *file, char __user *data, + size_t count, loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + char buf[3] = "n\n"; + + if (ns->page) + buf[0] = 'y'; + + return simple_read_from_buffer(data, count, ppos, buf, 2); +} + +static ssize_t +nsim_pp_hold_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + ssize_t ret; + bool val; + + ret = kstrtobool_from_user(data, count, &val); + if (ret) + return ret; + + rtnl_lock(); + ret = count; + if (val == !!ns->page) + goto exit; + + if (!netif_running(ns->netdev) && val) { + ret = -ENETDOWN; + } else if (val) { + ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool); + if (!ns->page) + ret = -ENOMEM; + } else { + page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp, + ns->page, false); + ns->page = NULL; + } + +exit: + rtnl_unlock(); + return ret; +} + +static const struct file_operations nsim_pp_hold_fops = { + .open = simple_open, + .read = nsim_pp_hold_read, + .write = nsim_pp_hold_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, }; static void nsim_setup(struct net_device *dev) @@ -500,124 +969,287 @@ static void nsim_setup(struct net_device *dev) ether_setup(dev); eth_hw_addr_random(dev); - dev->netdev_ops = &nsim_netdev_ops; - dev->priv_destructor = nsim_free; - - dev->tx_queue_len = 0; - dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; - dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | - IFF_NO_QUEUE; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; dev->features |= NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | + NETIF_F_LRO | NETIF_F_TSO; - dev->hw_features |= NETIF_F_HW_TC; + dev->hw_features |= NETIF_F_HW_TC | + NETIF_F_SG | + NETIF_F_FRAGLIST | + NETIF_F_HW_CSUM | + NETIF_F_LRO | + NETIF_F_TSO | + NETIF_F_LOOPBACK; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; dev->max_mtu = ETH_MAX_MTU; + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_HW_OFFLOAD; } -static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int nsim_queue_init(struct netdevsim *ns) { - if (tb[IFLA_ADDRESS]) { - if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) - return -EINVAL; - if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) - return -EADDRNOTAVAIL; + struct net_device *dev = ns->netdev; + int i; + + ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq), + GFP_KERNEL_ACCOUNT); + if (!ns->rq) + return -ENOMEM; + + for (i = 0; i < dev->num_rx_queues; i++) { + ns->rq[i] = nsim_queue_alloc(); + if (!ns->rq[i]) + goto err_free_prev; } + return 0; + +err_free_prev: + while (i--) + kfree(ns->rq[i]); + kfree(ns->rq); + return -ENOMEM; } -static int nsim_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static void nsim_queue_uninit(struct netdevsim *ns) { - struct netdevsim *ns = netdev_priv(dev); + struct net_device *dev = ns->netdev; + int i; + + for (i = 0; i < dev->num_rx_queues; i++) + nsim_queue_free(dev, ns->rq[i]); + + kfree(ns->rq); + ns->rq = NULL; +} + +static int nsim_init_netdevsim(struct netdevsim *ns) +{ + struct netdevsim *peer; + struct mock_phc *phc; + int err; + + phc = mock_phc_create(&ns->nsim_bus_dev->dev); + if (IS_ERR(phc)) + return PTR_ERR(phc); + + ns->phc = phc; + ns->netdev->netdev_ops = &nsim_netdev_ops; + ns->netdev->stat_ops = &nsim_stat_ops; + ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; + netdev_lockdep_set_classes(ns->netdev); + + err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); + if (err) + goto err_phc_destroy; + + rtnl_lock(); + err = nsim_queue_init(ns); + if (err) + goto err_utn_destroy; + + err = nsim_bpf_init(ns); + if (err) + goto err_rq_destroy; + + nsim_macsec_init(ns); + nsim_ipsec_init(ns); + + err = register_netdevice(ns->netdev); + if (err) + goto err_ipsec_teardown; + rtnl_unlock(); + + err = nsim_psp_init(ns); + if (err) + goto err_unregister_netdev; + + if (IS_ENABLED(CONFIG_DEBUG_NET)) { + ns->nb.notifier_call = netdev_debug_event; + if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb, + &ns->nn)) + ns->nb.notifier_call = NULL; + } + + return 0; + +err_unregister_netdev: + rtnl_lock(); + peer = rtnl_dereference(ns->peer); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); + RCU_INIT_POINTER(ns->peer, NULL); + unregister_netdevice(ns->netdev); +err_ipsec_teardown: + nsim_ipsec_teardown(ns); + nsim_macsec_teardown(ns); + nsim_bpf_uninit(ns); +err_rq_destroy: + nsim_queue_uninit(ns); +err_utn_destroy: + rtnl_unlock(); + nsim_udp_tunnels_info_destroy(ns->netdev); +err_phc_destroy: + mock_phc_destroy(ns->phc); + return err; +} + +static int nsim_init_netdevsim_vf(struct netdevsim *ns) +{ + int err; + + ns->netdev->netdev_ops = &nsim_vf_netdev_ops; + rtnl_lock(); + err = register_netdevice(ns->netdev); + rtnl_unlock(); + return err; +} + +static void nsim_exit_netdevsim(struct netdevsim *ns) +{ + nsim_udp_tunnels_info_destroy(ns->netdev); + mock_phc_destroy(ns->phc); +} + +struct netdevsim *nsim_create(struct nsim_dev *nsim_dev, + struct nsim_dev_port *nsim_dev_port, + u8 perm_addr[ETH_ALEN]) +{ + struct net_device *dev; + struct netdevsim *ns; + int err; + + dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup, + nsim_dev->nsim_bus_dev->num_queues); + if (!dev) + return ERR_PTR(-ENOMEM); + + if (perm_addr) + memcpy(dev->perm_addr, perm_addr, ETH_ALEN); + + dev_net_set(dev, nsim_dev_net(nsim_dev)); + ns = netdev_priv(dev); + ns->netdev = dev; + ns->nsim_dev = nsim_dev; + ns->nsim_dev_port = nsim_dev_port; + ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; + SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); + SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); + nsim_ethtool_init(ns); + if (nsim_dev_port_is_pf(nsim_dev_port)) + err = nsim_init_netdevsim(ns); + else + err = nsim_init_netdevsim_vf(ns); + if (err) + goto err_free_netdev; + + ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir, + ns, &nsim_pp_hold_fops); + ns->qr_dfs = debugfs_create_file("queue_reset", 0200, + nsim_dev_port->ddir, ns, + &nsim_qreset_fops); + return ns; + +err_free_netdev: + free_netdev(dev); + return ERR_PTR(err); +} + +void nsim_destroy(struct netdevsim *ns) +{ + struct net_device *dev = ns->netdev; + struct netdevsim *peer; + + debugfs_remove(ns->qr_dfs); + debugfs_remove(ns->pp_dfs); - if (tb[IFLA_LINK]) { - struct net_device *joindev; - struct netdevsim *joinns; - - joindev = __dev_get_by_index(src_net, - nla_get_u32(tb[IFLA_LINK])); - if (!joindev) - return -ENODEV; - if (joindev->netdev_ops != &nsim_netdev_ops) - return -EINVAL; - - joinns = netdev_priv(joindev); - if (!joinns->sdev || !joinns->sdev->refcnt) - return -EINVAL; - ns->sdev = joinns->sdev; + if (ns->nb.notifier_call) + unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, + &ns->nn); + + nsim_psp_uninit(ns); + + rtnl_lock(); + peer = rtnl_dereference(ns->peer); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); + RCU_INIT_POINTER(ns->peer, NULL); + unregister_netdevice(dev); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { + nsim_macsec_teardown(ns); + nsim_ipsec_teardown(ns); + nsim_bpf_uninit(ns); + nsim_queue_uninit(ns); } + rtnl_unlock(); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) + nsim_exit_netdevsim(ns); + + /* Put this intentionally late to exercise the orphaning path */ + if (ns->page) { + page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp, + ns->page, false); + ns->page = NULL; + } + + free_netdev(dev); +} - return register_netdevice(dev); +bool netdev_is_nsim(struct net_device *dev) +{ + return dev->netdev_ops == &nsim_netdev_ops; } -static void nsim_dellink(struct net_device *dev, struct list_head *head) +static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { - unregister_netdevice_queue(dev, head); + NL_SET_ERR_MSG_MOD(extack, + "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device"); + return -EOPNOTSUPP; } static struct rtnl_link_ops nsim_link_ops __read_mostly = { .kind = DRV_NAME, - .priv_size = sizeof(struct netdevsim), - .setup = nsim_setup, .validate = nsim_validate, - .newlink = nsim_newlink, - .dellink = nsim_dellink, }; static int __init nsim_module_init(void) { int err; - nsim_ddir = debugfs_create_dir(DRV_NAME, NULL); - if (IS_ERR_OR_NULL(nsim_ddir)) - return -ENOMEM; - - nsim_sdev_ddir = debugfs_create_dir(DRV_NAME "_sdev", NULL); - if (IS_ERR_OR_NULL(nsim_sdev_ddir)) { - err = -ENOMEM; - goto err_debugfs_destroy; - } - - err = bus_register(&nsim_bus); + err = nsim_dev_init(); if (err) - goto err_sdir_destroy; + return err; - err = nsim_devlink_init(); + err = nsim_bus_init(); if (err) - goto err_unreg_bus; + goto err_dev_exit; err = rtnl_link_register(&nsim_link_ops); if (err) - goto err_dl_fini; + goto err_bus_exit; return 0; -err_dl_fini: - nsim_devlink_exit(); -err_unreg_bus: - bus_unregister(&nsim_bus); -err_sdir_destroy: - debugfs_remove_recursive(nsim_sdev_ddir); -err_debugfs_destroy: - debugfs_remove_recursive(nsim_ddir); +err_bus_exit: + nsim_bus_exit(); +err_dev_exit: + nsim_dev_exit(); return err; } static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); - nsim_devlink_exit(); - bus_unregister(&nsim_bus); - debugfs_remove_recursive(nsim_sdev_ddir); - debugfs_remove_recursive(nsim_ddir); + nsim_bus_exit(); + nsim_dev_exit(); } module_init(nsim_module_init); module_exit(nsim_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simulated networking device for testing"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 384c254fafc5..d1a941e2b18f 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -13,12 +13,19 @@ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ +#include <linux/debugfs.h> #include <linux/device.h> +#include <linux/ethtool.h> +#include <linux/ethtool_netlink.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/ptp_mock.h> #include <linux/u64_stats_sync.h> +#include <net/devlink.h> +#include <net/udp_tunnel.h> #include <net/xdp.h> +#include <net/macsec.h> #define DRV_NAME "netdevsim" @@ -26,28 +33,11 @@ #define NSIM_EA(extack, msg) NL_SET_ERR_MSG_MOD((extack), msg) -struct bpf_prog; -struct bpf_offload_dev; -struct dentry; -struct nsim_vf_config; - -struct netdevsim_shared_dev { - unsigned int refcnt; - u32 switch_id; - - struct dentry *ddir; - - struct bpf_offload_dev *bpf_dev; - - struct dentry *ddir_bpf_bound_progs; - u32 prog_id_gen; - - struct list_head bpf_bound_progs; - struct list_head bpf_bound_maps; -}; - #define NSIM_IPSEC_MAX_SA_COUNT 33 #define NSIM_IPSEC_VALID BIT(31) +#define NSIM_UDP_TUNNEL_N_PORTS 4 + +#define NSIM_HDS_THRESHOLD_MAX 1024 struct nsim_sa { struct xfrm_state *xs; @@ -64,23 +54,72 @@ struct nsim_ipsec { struct dentry *pfile; u32 count; u32 tx; - u32 ok; }; -struct netdevsim { - struct net_device *netdev; +#define NSIM_MACSEC_MAX_SECY_COUNT 3 +#define NSIM_MACSEC_MAX_RXSC_COUNT 1 +struct nsim_rxsc { + sci_t sci; + bool used; +}; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; +struct nsim_secy { + sci_t sci; + struct nsim_rxsc nsim_rxsc[NSIM_MACSEC_MAX_RXSC_COUNT]; + u8 nsim_rxsc_count; + bool used; +}; - struct device dev; - struct netdevsim_shared_dev *sdev; +struct nsim_macsec { + struct nsim_secy nsim_secy[NSIM_MACSEC_MAX_SECY_COUNT]; + u8 nsim_secy_count; +}; - struct dentry *ddir; +struct nsim_ethtool_pauseparam { + bool rx; + bool tx; + bool report_stats_rx; + bool report_stats_tx; +}; - unsigned int num_vfs; - struct nsim_vf_config *vfconfigs; +struct nsim_ethtool { + u32 get_err; + u32 set_err; + u32 channels; + struct nsim_ethtool_pauseparam pauseparam; + struct ethtool_coalesce coalesce; + struct ethtool_ringparam ring; + struct ethtool_fecparam fec; +}; + +struct nsim_rq { + struct napi_struct napi; + struct sk_buff_head skb_queue; + struct page_pool *page_pool; + struct hrtimer napi_timer; +}; + +struct netdevsim { + struct net_device *netdev; + struct nsim_dev *nsim_dev; + struct nsim_dev_port *nsim_dev_port; + struct mock_phc *phc; + struct nsim_rq **rq; + + int rq_reset_mode; + + struct { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + struct psp_dev *dev; + u32 spi; + u32 assoc_cnt; + } psp; + + struct nsim_bus_dev *nsim_bus_dev; struct bpf_prog *bpf_offloaded; u32 bpf_offloaded_id; @@ -88,22 +127,49 @@ struct netdevsim { struct xdp_attachment_info xdp; struct xdp_attachment_info xdp_hw; - bool bpf_bind_accept; - u32 bpf_bind_verifier_delay; - bool bpf_tc_accept; bool bpf_tc_non_bound_accept; bool bpf_xdpdrv_accept; bool bpf_xdpoffload_accept; bool bpf_map_accept; -#if IS_ENABLED(CONFIG_NET_DEVLINK) - struct devlink *devlink; -#endif struct nsim_ipsec ipsec; + struct nsim_macsec macsec; + struct { + u32 inject_error; + u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; + u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS]; + struct dentry *ddir; + struct debugfs_u32_array dfs_ports[2]; + } udp_ports; + + struct page *page; + struct dentry *pp_dfs; + struct dentry *qr_dfs; + + struct nsim_ethtool ethtool; + struct netdevsim __rcu *peer; + + struct notifier_block nb; + struct netdev_net_notifier nn; }; +struct netdevsim *nsim_create(struct nsim_dev *nsim_dev, + struct nsim_dev_port *nsim_dev_port, + u8 perm_addr[ETH_ALEN]); +void nsim_destroy(struct netdevsim *ns); +bool netdev_is_nsim(struct net_device *dev); + +void nsim_ethtool_init(struct netdevsim *ns); + +void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev); +int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, + struct net_device *dev); +void nsim_udp_tunnels_info_destroy(struct net_device *dev); + #ifdef CONFIG_BPF_SYSCALL +int nsim_bpf_dev_init(struct nsim_dev *nsim_dev); +void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev); int nsim_bpf_init(struct netdevsim *ns); void nsim_bpf_uninit(struct netdevsim *ns); int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf); @@ -111,6 +177,15 @@ int nsim_bpf_disable_tc(struct netdevsim *ns); int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); #else + +static inline int nsim_bpf_dev_init(struct nsim_dev *nsim_dev) +{ + return 0; +} + +static inline void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev) +{ +} static inline int nsim_bpf_init(struct netdevsim *ns) { return 0; @@ -122,7 +197,7 @@ static inline void nsim_bpf_uninit(struct netdevsim *ns) static inline int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) { - return bpf->command == XDP_QUERY_PROG ? 0 : -EOPNOTSUPP; + return -EOPNOTSUPP; } static inline int nsim_bpf_disable_tc(struct netdevsim *ns) @@ -138,7 +213,6 @@ nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } #endif -#if IS_ENABLED(CONFIG_NET_DEVLINK) enum nsim_resource_id { NSIM_RESOURCE_NONE, /* DEVLINK_RESOURCE_ID_PARENT_TOP */ NSIM_RESOURCE_IPV4, @@ -147,39 +221,186 @@ enum nsim_resource_id { NSIM_RESOURCE_IPV6, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, +}; + +struct nsim_dev_health { + struct devlink_health_reporter *empty_reporter; + struct devlink_health_reporter *dummy_reporter; + struct dentry *ddir; + char *recovered_break_msg; + u32 binary_len; + bool fail_recover; +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink); +void nsim_dev_health_exit(struct nsim_dev *nsim_dev); + +struct nsim_dev_hwstats_netdev { + struct list_head list; + struct net_device *netdev; + struct rtnl_hw_stats64 stats; + bool enabled; + bool fail_enable; }; -int nsim_devlink_setup(struct netdevsim *ns); -void nsim_devlink_teardown(struct netdevsim *ns); +struct nsim_dev_hwstats { + struct dentry *ddir; + struct dentry *l3_ddir; + + struct mutex hwsdev_list_lock; /* protects hwsdev list(s) */ + struct list_head l3_list; -int nsim_devlink_init(void); -void nsim_devlink_exit(void); + struct notifier_block netdevice_nb; + struct delayed_work traffic_dw; +}; -int nsim_fib_init(void); -void nsim_fib_exit(void); -u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack); +int nsim_dev_hwstats_init(struct nsim_dev *nsim_dev); +void nsim_dev_hwstats_exit(struct nsim_dev *nsim_dev); + +#if IS_ENABLED(CONFIG_PSAMPLE) +int nsim_dev_psample_init(struct nsim_dev *nsim_dev); +void nsim_dev_psample_exit(struct nsim_dev *nsim_dev); #else -static inline int nsim_devlink_setup(struct netdevsim *ns) +static inline int nsim_dev_psample_init(struct nsim_dev *nsim_dev) { return 0; } -static inline void nsim_devlink_teardown(struct netdevsim *ns) +static inline void nsim_dev_psample_exit(struct nsim_dev *nsim_dev) +{ +} +#endif + +enum nsim_dev_port_type { + NSIM_DEV_PORT_TYPE_PF, + NSIM_DEV_PORT_TYPE_VF, +}; + +#define NSIM_DEV_VF_PORT_INDEX_BASE 128 +#define NSIM_DEV_VF_PORT_INDEX_MAX UINT_MAX + +struct nsim_dev_port { + struct list_head list; + struct devlink_port devlink_port; + unsigned int port_index; + enum nsim_dev_port_type port_type; + struct dentry *ddir; + struct dentry *rate_parent; + char *parent_name; + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; + struct netdevsim *ns; +}; + +struct nsim_vf_config { + int link_state; + u16 min_tx_rate; + u16 max_tx_rate; + u16 vlan; + __be16 vlan_proto; + u16 qos; + u8 vf_mac[ETH_ALEN]; + bool spoofchk_enabled; + bool trusted; + bool rss_query_enabled; +}; + +struct nsim_dev { + struct nsim_bus_dev *nsim_bus_dev; + struct nsim_fib_data *fib_data; + struct nsim_trap_data *trap_data; + struct dentry *ddir; + struct dentry *ports_ddir; + struct dentry *take_snapshot; + struct dentry *nodes_ddir; + + struct nsim_vf_config *vfconfigs; + + struct bpf_offload_dev *bpf_dev; + bool bpf_bind_accept; + bool bpf_bind_verifier_accept; + u32 bpf_bind_verifier_delay; + struct dentry *ddir_bpf_bound_progs; + u32 prog_id_gen; + struct list_head bpf_bound_progs; + struct list_head bpf_bound_maps; + struct netdev_phys_item_id switch_id; + struct list_head port_list; + bool fw_update_status; + u32 fw_update_overwrite_mask; + u32 fw_update_flash_chunk_time_ms; + u32 max_macs; + bool test1; + u32 test2; + bool dont_allow_reload; + bool fail_reload; + struct devlink_region *dummy_region; + struct nsim_dev_health health; + struct nsim_dev_hwstats hwstats; + struct flow_action_cookie *fa_cookie; + spinlock_t fa_cookie_lock; /* protects fa_cookie */ + bool fail_trap_group_set; + bool fail_trap_policer_set; + bool fail_trap_policer_counter_get; + bool fail_trap_drop_counter_get; + struct { + struct udp_tunnel_nic_shared utn_shared; + u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; + bool sync_all; + bool open_only; + bool ipv4_only; + bool shared; + bool static_iana_vxlan; + } udp_ports; + struct nsim_dev_psample *psample; + u16 esw_mode; +}; + +static inline bool nsim_esw_mode_is_legacy(struct nsim_dev *nsim_dev) +{ + return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_LEGACY; +} + +static inline bool nsim_esw_mode_is_switchdev(struct nsim_dev *nsim_dev) { + return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV; } -static inline int nsim_devlink_init(void) +static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) { - return 0; + return devlink_net(priv_to_devlink(nsim_dev)); } -static inline void nsim_devlink_exit(void) +int nsim_dev_init(void); +void nsim_dev_exit(void); +int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev); +void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev); +int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, + enum nsim_dev_port_type type, unsigned int port_index, + u8 perm_addr[ETH_ALEN]); +int nsim_drv_port_del(struct nsim_bus_dev *nsim_bus_dev, + enum nsim_dev_port_type type, + unsigned int port_index); +int nsim_drv_configure_vfs(struct nsim_bus_dev *nsim_bus_dev, + unsigned int num_vfs); + +unsigned int nsim_dev_get_vfs(struct nsim_dev *nsim_dev); + +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack); +void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data); +u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, bool max); + +static inline bool nsim_dev_port_is_pf(struct nsim_dev_port *nsim_dev_port) { + return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_PF; } -#endif +static inline bool nsim_dev_port_is_vf(struct nsim_dev_port *nsim_dev_port) +{ + return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_VF; +} #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); void nsim_ipsec_teardown(struct netdevsim *ns); @@ -199,7 +420,52 @@ static inline bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb) } #endif -static inline struct netdevsim *to_nsim(struct device *ptr) +#if IS_ENABLED(CONFIG_MACSEC) +void nsim_macsec_init(struct netdevsim *ns); +void nsim_macsec_teardown(struct netdevsim *ns); +#else +static inline void nsim_macsec_init(struct netdevsim *ns) +{ +} + +static inline void nsim_macsec_teardown(struct netdevsim *ns) { - return container_of(ptr, struct netdevsim, dev); } +#endif + +#if IS_ENABLED(CONFIG_INET_PSP) +int nsim_psp_init(struct netdevsim *ns); +void nsim_psp_uninit(struct netdevsim *ns); +void nsim_psp_handle_ext(struct sk_buff *skb, struct skb_ext *psp_ext); +enum skb_drop_reason +nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, + struct netdevsim *peer_ns, struct skb_ext **psp_ext); +#else +static inline int nsim_psp_init(struct netdevsim *ns) { return 0; } +static inline void nsim_psp_uninit(struct netdevsim *ns) {} +static inline enum skb_drop_reason +nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, + struct netdevsim *peer_ns, struct skb_ext **psp_ext) +{ + return 0; +} + +static inline void +nsim_psp_handle_ext(struct sk_buff *skb, struct skb_ext *psp_ext) {} +#endif + +struct nsim_bus_dev { + struct device dev; + struct list_head list; + unsigned int port_count; + unsigned int num_queues; /* Number of queues for each port on this bus */ + struct net *initial_net; /* Purpose of this is to carry net pointer + * during the probe time only. + */ + unsigned int max_vfs; + unsigned int num_vfs; + bool init; +}; + +int nsim_bus_init(void); +void nsim_bus_exit(void); diff --git a/drivers/net/netdevsim/psample.c b/drivers/net/netdevsim/psample.c new file mode 100644 index 000000000000..f0c6477dd0ae --- /dev/null +++ b/drivers/net/netdevsim/psample.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Mellanox Technologies. All rights reserved */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/etherdevice.h> +#include <linux/inet.h> +#include <linux/kernel.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <net/devlink.h> +#include <net/ip.h> +#include <net/psample.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/udp.h> + +#include "netdevsim.h" + +#define NSIM_PSAMPLE_REPORT_INTERVAL_MS 100 +#define NSIM_PSAMPLE_INVALID_TC 0xFFFF +#define NSIM_PSAMPLE_L4_DATA_LEN 100 + +struct nsim_dev_psample { + struct delayed_work psample_dw; + struct dentry *ddir; + struct psample_group *group; + u32 rate; + u32 group_num; + u32 trunc_size; + int in_ifindex; + int out_ifindex; + u16 out_tc; + u64 out_tc_occ_max; + u64 latency_max; + bool is_active; +}; + +static struct sk_buff *nsim_dev_psample_skb_build(void) +{ + int tot_len, data_len = NSIM_PSAMPLE_L4_DATA_LEN; + struct sk_buff *skb; + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return NULL; + tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len; + + skb_reset_mac_header(skb); + eth = skb_put(skb, sizeof(struct ethhdr)); + eth_random_addr(eth->h_dest); + eth_random_addr(eth->h_source); + eth->h_proto = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); + + skb_set_network_header(skb, skb->len); + iph = skb_put(skb, sizeof(struct iphdr)); + iph->protocol = IPPROTO_UDP; + iph->saddr = in_aton("192.0.2.1"); + iph->daddr = in_aton("198.51.100.1"); + iph->version = 0x4; + iph->frag_off = 0; + iph->ihl = 0x5; + iph->tot_len = htons(tot_len); + iph->id = 0; + iph->ttl = 100; + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + skb_set_transport_header(skb, skb->len); + udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len); + get_random_bytes(&udph->source, sizeof(u16)); + get_random_bytes(&udph->dest, sizeof(u16)); + udph->len = htons(sizeof(struct udphdr) + data_len); + + return skb; +} + +static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample, + struct psample_metadata *md, + unsigned int len) +{ + md->trunc_size = psample->trunc_size ? psample->trunc_size : len; + md->in_ifindex = psample->in_ifindex; + md->out_ifindex = psample->out_ifindex; + + if (psample->out_tc != NSIM_PSAMPLE_INVALID_TC) { + md->out_tc = psample->out_tc; + md->out_tc_valid = 1; + } + + if (psample->out_tc_occ_max) { + u64 out_tc_occ; + + get_random_bytes(&out_tc_occ, sizeof(u64)); + md->out_tc_occ = out_tc_occ & (psample->out_tc_occ_max - 1); + md->out_tc_occ_valid = 1; + } + + if (psample->latency_max) { + u64 latency; + + get_random_bytes(&latency, sizeof(u64)); + md->latency = latency & (psample->latency_max - 1); + md->latency_valid = 1; + } +} + +static void nsim_dev_psample_report_work(struct work_struct *work) +{ + struct nsim_dev_psample *psample; + struct psample_metadata md = {}; + struct sk_buff *skb; + unsigned long delay; + + psample = container_of(work, struct nsim_dev_psample, psample_dw.work); + + skb = nsim_dev_psample_skb_build(); + if (!skb) + goto out; + + nsim_dev_psample_md_prepare(psample, &md, skb->len); + psample_sample_packet(psample->group, skb, psample->rate, &md); + consume_skb(skb); + +out: + delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS); + schedule_delayed_work(&psample->psample_dw, delay); +} + +static int nsim_dev_psample_enable(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_psample *psample = nsim_dev->psample; + struct devlink *devlink; + unsigned long delay; + + if (psample->is_active) + return -EBUSY; + + devlink = priv_to_devlink(nsim_dev); + psample->group = psample_group_get(devlink_net(devlink), + psample->group_num); + if (!psample->group) + return -EINVAL; + + delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS); + schedule_delayed_work(&psample->psample_dw, delay); + + psample->is_active = true; + + return 0; +} + +static int nsim_dev_psample_disable(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_psample *psample = nsim_dev->psample; + + if (!psample->is_active) + return -EINVAL; + + psample->is_active = false; + + cancel_delayed_work_sync(&psample->psample_dw); + psample_group_put(psample->group); + + return 0; +} + +static ssize_t nsim_dev_psample_enable_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev *nsim_dev = file->private_data; + bool enable; + int err; + + err = kstrtobool_from_user(data, count, &enable); + if (err) + return err; + + if (enable) + err = nsim_dev_psample_enable(nsim_dev); + else + err = nsim_dev_psample_disable(nsim_dev); + + return err ? err : count; +} + +static const struct file_operations nsim_psample_enable_fops = { + .open = simple_open, + .write = nsim_dev_psample_enable_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +int nsim_dev_psample_init(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_psample *psample; + int err; + + psample = kzalloc(sizeof(*psample), GFP_KERNEL); + if (!psample) + return -ENOMEM; + nsim_dev->psample = psample; + + INIT_DELAYED_WORK(&psample->psample_dw, nsim_dev_psample_report_work); + + psample->ddir = debugfs_create_dir("psample", nsim_dev->ddir); + if (IS_ERR(psample->ddir)) { + err = PTR_ERR(psample->ddir); + goto err_psample_free; + } + + /* Populate sampling parameters with sane defaults. */ + psample->rate = 100; + debugfs_create_u32("rate", 0600, psample->ddir, &psample->rate); + + psample->group_num = 10; + debugfs_create_u32("group_num", 0600, psample->ddir, + &psample->group_num); + + psample->trunc_size = 0; + debugfs_create_u32("trunc_size", 0600, psample->ddir, + &psample->trunc_size); + + psample->in_ifindex = 1; + debugfs_create_u32("in_ifindex", 0600, psample->ddir, + &psample->in_ifindex); + + psample->out_ifindex = 2; + debugfs_create_u32("out_ifindex", 0600, psample->ddir, + &psample->out_ifindex); + + psample->out_tc = 0; + debugfs_create_u16("out_tc", 0600, psample->ddir, &psample->out_tc); + + psample->out_tc_occ_max = 10000; + debugfs_create_u64("out_tc_occ_max", 0600, psample->ddir, + &psample->out_tc_occ_max); + + psample->latency_max = 50; + debugfs_create_u64("latency_max", 0600, psample->ddir, + &psample->latency_max); + + debugfs_create_file("enable", 0200, psample->ddir, nsim_dev, + &nsim_psample_enable_fops); + + return 0; + +err_psample_free: + kfree(nsim_dev->psample); + return err; +} + +void nsim_dev_psample_exit(struct nsim_dev *nsim_dev) +{ + debugfs_remove_recursive(nsim_dev->psample->ddir); + if (nsim_dev->psample->is_active) { + cancel_delayed_work_sync(&nsim_dev->psample->psample_dw); + psample_group_put(nsim_dev->psample->group); + } + kfree(nsim_dev->psample); +} diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c new file mode 100644 index 000000000000..727da06101ca --- /dev/null +++ b/drivers/net/netdevsim/psp.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <net/ip6_checksum.h> +#include <net/psp.h> +#include <net/sock.h> + +#include "netdevsim.h" + +void nsim_psp_handle_ext(struct sk_buff *skb, struct skb_ext *psp_ext) +{ + if (psp_ext) + __skb_ext_set(skb, SKB_EXT_PSP, psp_ext); +} + +enum skb_drop_reason +nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, + struct netdevsim *peer_ns, struct skb_ext **psp_ext) +{ + enum skb_drop_reason rc = 0; + struct psp_assoc *pas; + struct net *net; + void **ptr; + + rcu_read_lock(); + pas = psp_skb_get_assoc_rcu(skb); + if (!pas) { + rc = SKB_NOT_DROPPED_YET; + goto out_unlock; + } + + if (!skb_transport_header_was_set(skb)) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + ptr = psp_assoc_drv_data(pas); + if (*ptr != ns) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + net = sock_net(skb->sk); + if (!psp_dev_encapsulate(net, skb, pas->tx.spi, pas->version, 0)) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + /* Now pretend we just received this frame */ + if (peer_ns->psp.dev->config.versions & (1 << pas->version)) { + bool strip_icv = false; + u8 generation; + + /* We cheat a bit and put the generation in the key. + * In real life if generation was too old, then decryption would + * fail. Here, we just make it so a bad key causes a bad + * generation too, and psp_sk_rx_policy_check() will fail. + */ + generation = pas->tx.key[0]; + + skb_ext_reset(skb); + skb->mac_len = ETH_HLEN; + if (psp_dev_rcv(skb, peer_ns->psp.dev->id, generation, + strip_icv)) { + rc = SKB_DROP_REASON_PSP_OUTPUT; + goto out_unlock; + } + + *psp_ext = skb->extensions; + refcount_inc(&(*psp_ext)->refcnt); + skb->decrypted = 1; + + u64_stats_update_begin(&ns->psp.syncp); + ns->psp.tx_packets++; + ns->psp.rx_packets++; + ns->psp.tx_bytes += skb->len - skb_inner_transport_offset(skb); + ns->psp.rx_bytes += skb->len - skb_inner_transport_offset(skb); + u64_stats_update_end(&ns->psp.syncp); + } else { + struct ipv6hdr *ip6h __maybe_unused; + struct iphdr *iph; + struct udphdr *uh; + __wsum csum; + + /* Do not decapsulate. Receive the skb with the udp and psp + * headers still there as if this is a normal udp packet. + * psp_dev_encapsulate() sets udp checksum to 0, so we need to + * provide a valid checksum here, so the skb isn't dropped. + */ + uh = udp_hdr(skb); + csum = skb_checksum(skb, skb_transport_offset(skb), + ntohs(uh->len), 0); + + switch (skb->protocol) { + case htons(ETH_P_IP): + iph = ip_hdr(skb); + uh->check = udp_v4_check(ntohs(uh->len), iph->saddr, + iph->daddr, csum); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ip6h = ipv6_hdr(skb); + uh->check = udp_v6_check(ntohs(uh->len), &ip6h->saddr, + &ip6h->daddr, csum); + break; +#endif + } + + uh->check = uh->check ?: CSUM_MANGLED_0; + skb->ip_summed = CHECKSUM_NONE; + } + +out_unlock: + rcu_read_unlock(); + return rc; +} + +static int +nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int +nsim_rx_spi_alloc(struct psp_dev *psd, u32 version, + struct psp_key_parsed *assoc, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = psd->drv_priv; + unsigned int new; + int i; + + new = ++ns->psp.spi & PSP_SPI_KEY_ID; + if (psd->generation & 1) + new |= PSP_SPI_KEY_PHASE; + + assoc->spi = cpu_to_be32(new); + assoc->key[0] = psd->generation; + for (i = 1; i < PSP_MAX_KEY; i++) + assoc->key[i] = ns->psp.spi + i; + + return 0; +} + +static int nsim_assoc_add(struct psp_dev *psd, struct psp_assoc *pas, + struct netlink_ext_ack *extack) +{ + struct netdevsim *ns = psd->drv_priv; + void **ptr = psp_assoc_drv_data(pas); + + /* Copy drv_priv from psd to assoc */ + *ptr = psd->drv_priv; + ns->psp.assoc_cnt++; + + return 0; +} + +static int nsim_key_rotate(struct psp_dev *psd, struct netlink_ext_ack *extack) +{ + return 0; +} + +static void nsim_assoc_del(struct psp_dev *psd, struct psp_assoc *pas) +{ + struct netdevsim *ns = psd->drv_priv; + void **ptr = psp_assoc_drv_data(pas); + + *ptr = NULL; + ns->psp.assoc_cnt--; +} + +static void nsim_get_stats(struct psp_dev *psd, struct psp_dev_stats *stats) +{ + struct netdevsim *ns = psd->drv_priv; + unsigned int start; + + /* WARNING: do *not* blindly zero stats in real drivers! + * All required stats must be reported by the device! + */ + memset(stats, 0, sizeof(struct psp_dev_stats)); + + do { + start = u64_stats_fetch_begin(&ns->psp.syncp); + stats->rx_bytes = ns->psp.rx_bytes; + stats->rx_packets = ns->psp.rx_packets; + stats->tx_bytes = ns->psp.tx_bytes; + stats->tx_packets = ns->psp.tx_packets; + } while (u64_stats_fetch_retry(&ns->psp.syncp, start)); +} + +static struct psp_dev_ops nsim_psp_ops = { + .set_config = nsim_psp_set_config, + .rx_spi_alloc = nsim_rx_spi_alloc, + .tx_key_add = nsim_assoc_add, + .tx_key_del = nsim_assoc_del, + .key_rotate = nsim_key_rotate, + .get_stats = nsim_get_stats, +}; + +static struct psp_dev_caps nsim_psp_caps = { + .versions = 1 << PSP_VERSION_HDR0_AES_GCM_128 | + 1 << PSP_VERSION_HDR0_AES_GMAC_128 | + 1 << PSP_VERSION_HDR0_AES_GCM_256 | + 1 << PSP_VERSION_HDR0_AES_GMAC_256, + .assoc_drv_spc = sizeof(void *), +}; + +void nsim_psp_uninit(struct netdevsim *ns) +{ + if (!IS_ERR(ns->psp.dev)) + psp_dev_unregister(ns->psp.dev); + WARN_ON(ns->psp.assoc_cnt); +} + +static ssize_t +nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count, + loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + int err; + + nsim_psp_uninit(ns); + + ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, + &nsim_psp_caps, ns); + err = PTR_ERR_OR_ZERO(ns->psp.dev); + return err ?: count; +} + +static const struct file_operations nsim_psp_rereg_fops = { + .open = simple_open, + .write = nsim_psp_rereg_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +int nsim_psp_init(struct netdevsim *ns) +{ + struct dentry *ddir = ns->nsim_dev_port->ddir; + int err; + + ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, + &nsim_psp_caps, ns); + err = PTR_ERR_OR_ZERO(ns->psp.dev); + if (err) + return err; + + debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops); + return 0; +} diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c new file mode 100644 index 000000000000..89fff76e51cf --- /dev/null +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2020 Facebook Inc. + +#include <linux/debugfs.h> +#include <linux/netdevice.h> +#include <linux/slab.h> +#include <net/udp_tunnel.h> + +#include "netdevsim.h" + +static int +nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct netdevsim *ns = netdev_priv(dev); + int ret; + + ret = -ns->udp_ports.inject_error; + ns->udp_ports.inject_error = 0; + + if (!ret) { + if (ns->udp_ports.ports[table][entry]) { + WARN(1, "entry already in use\n"); + ret = -EBUSY; + } else { + ns->udp_ports.ports[table][entry] = + be16_to_cpu(ti->port) << 16 | ti->type; + } + } + + netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", + table, entry, ti->type, ti->sa_family, ntohs(ti->port), + ret); + return ret; +} + +static int +nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct netdevsim *ns = netdev_priv(dev); + int ret; + + ret = -ns->udp_ports.inject_error; + ns->udp_ports.inject_error = 0; + + if (!ret) { + u32 val = be16_to_cpu(ti->port) << 16 | ti->type; + + if (val == ns->udp_ports.ports[table][entry]) { + ns->udp_ports.ports[table][entry] = 0; + } else { + WARN(1, "entry not installed %x vs %x\n", + val, ns->udp_ports.ports[table][entry]); + ret = -ENOENT; + } + } + + netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", + table, entry, ti->type, ti->sa_family, ntohs(ti->port), + ret); + return ret; +} + +static int +nsim_udp_tunnel_sync_table(struct net_device *dev, unsigned int table) +{ + struct netdevsim *ns = netdev_priv(dev); + struct udp_tunnel_info ti; + unsigned int i; + int ret; + + ret = -ns->udp_ports.inject_error; + ns->udp_ports.inject_error = 0; + + for (i = 0; i < NSIM_UDP_TUNNEL_N_PORTS; i++) { + udp_tunnel_nic_get_port(dev, table, i, &ti); + ns->udp_ports.ports[table][i] = + be16_to_cpu(ti.port) << 16 | ti.type; + } + + return ret; +} + +static const struct udp_tunnel_nic_info nsim_udp_tunnel_info = { + .set_port = nsim_udp_tunnel_set_port, + .unset_port = nsim_udp_tunnel_unset_port, + .sync_table = nsim_udp_tunnel_sync_table, + + .tables = { + { + .n_entries = NSIM_UDP_TUNNEL_N_PORTS, + .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, + }, + { + .n_entries = NSIM_UDP_TUNNEL_N_PORTS, + .tunnel_types = UDP_TUNNEL_TYPE_GENEVE | + UDP_TUNNEL_TYPE_VXLAN_GPE, + }, + }, +}; + +static ssize_t +nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct net_device *dev = file->private_data; + struct netdevsim *ns = netdev_priv(dev); + + if (dev->reg_state == NETREG_REGISTERED) { + memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); + udp_tunnel_nic_reset_ntf(dev); + } + + return count; +} + +static const struct file_operations nsim_udp_tunnels_info_reset_fops = { + .open = simple_open, + .write = nsim_udp_tunnels_info_reset_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, + struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + struct udp_tunnel_nic_info *info; + + if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) { + dev_err(&nsim_dev->nsim_bus_dev->dev, + "shared can't be used in conjunction with open_only\n"); + return -EINVAL; + } + + if (!nsim_dev->udp_ports.shared) + ns->udp_ports.ports = ns->udp_ports.__ports; + else + ns->udp_ports.ports = nsim_dev->udp_ports.__ports; + + ns->udp_ports.ddir = debugfs_create_dir("udp_ports", + ns->nsim_dev_port->ddir); + + debugfs_create_u32("inject_error", 0600, ns->udp_ports.ddir, + &ns->udp_ports.inject_error); + + ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; + ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; + debugfs_create_u32_array("table0", 0400, ns->udp_ports.ddir, + &ns->udp_ports.dfs_ports[0]); + + ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; + ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; + debugfs_create_u32_array("table1", 0400, ns->udp_ports.ddir, + &ns->udp_ports.dfs_ports[1]); + + debugfs_create_file("reset", 0200, ns->udp_ports.ddir, + dev, &nsim_udp_tunnels_info_reset_fops); + + /* Note: it's not normal to allocate the info struct like this! + * Drivers are expected to use a static const one, here we're testing. + */ + info = kmemdup(&nsim_udp_tunnel_info, sizeof(nsim_udp_tunnel_info), + GFP_KERNEL); + if (!info) + return -ENOMEM; + + if (nsim_dev->udp_ports.sync_all) { + info->set_port = NULL; + info->unset_port = NULL; + } else { + info->sync_table = NULL; + } + + if (nsim_dev->udp_ports.open_only) + info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; + if (nsim_dev->udp_ports.ipv4_only) + info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; + if (nsim_dev->udp_ports.shared) + info->shared = &nsim_dev->udp_ports.utn_shared; + if (nsim_dev->udp_ports.static_iana_vxlan) + info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; + + dev->udp_tunnel_nic_info = info; + return 0; +} + +void nsim_udp_tunnels_info_destroy(struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + + debugfs_remove_recursive(ns->udp_ports.ddir); + kfree(dev->udp_tunnel_nic_info); + dev->udp_tunnel_nic_info = NULL; +} + +void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) +{ + debugfs_create_bool("udp_ports_sync_all", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.sync_all); + debugfs_create_bool("udp_ports_open_only", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.open_only); + debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.ipv4_only); + debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.shared); + debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.static_iana_vxlan); +} |
