summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/devlink.c1124
-rw-r--r--net/core/drop_monitor.c730
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/flow_dissector.c2
-rw-r--r--net/core/flow_offload.c13
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/page_pool.c41
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/core/sock.c50
-rw-r--r--net/core/sock_diag.c3
-rw-r--r--net/core/sock_map.c2
-rw-r--r--net/core/stream.c16
13 files changed, 1889 insertions, 115 deletions
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d3dbb904bf3b..6e52d639dac6 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -18,6 +18,8 @@
#include <linux/spinlock.h>
#include <linux/refcount.h>
#include <linux/workqueue.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/timekeeping.h>
#include <rdma/ib_verbs.h>
#include <net/netlink.h>
#include <net/genetlink.h>
@@ -25,6 +27,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
+#include <net/drop_monitor.h>
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
@@ -133,7 +136,7 @@ static struct devlink *devlink_get_from_info(struct genl_info *info)
}
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
- int port_index)
+ unsigned int port_index)
{
struct devlink_port *devlink_port;
@@ -144,7 +147,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
return NULL;
}
-static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
+static bool devlink_port_index_exists(struct devlink *devlink,
+ unsigned int port_index)
{
return devlink_port_get_by_index(devlink, port_index);
}
@@ -506,32 +510,37 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
return -EMSGSIZE;
- if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) {
+ switch (devlink_port->attrs.flavour) {
+ case DEVLINK_PORT_FLAVOUR_PCI_PF:
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
attrs->pci_pf.pf))
return -EMSGSIZE;
- } else if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_VF:
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
attrs->pci_vf.pf) ||
nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
attrs->pci_vf.vf))
return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+ case DEVLINK_PORT_FLAVOUR_CPU:
+ case DEVLINK_PORT_FLAVOUR_DSA:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (!attrs->split)
+ return 0;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+ attrs->phys.split_subport_number))
+ return -EMSGSIZE;
+ break;
+ default:
+ break;
}
- if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PHYSICAL &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA)
- return 0;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (!attrs->split)
- return 0;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
- attrs->phys.split_subport_number))
- return -EMSGSIZE;
return 0;
}
@@ -551,7 +560,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
goto nla_put_failure;
- spin_lock(&devlink_port->type_lock);
+ spin_lock_bh(&devlink_port->type_lock);
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
goto nla_put_failure_type_locked;
if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
@@ -576,7 +585,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
ibdev->name))
goto nla_put_failure_type_locked;
}
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
if (devlink_nl_port_attrs_put(msg, devlink_port))
goto nla_put_failure;
@@ -584,7 +593,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
return 0;
nla_put_failure_type_locked:
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -5154,6 +5163,571 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
return 0;
}
+struct devlink_stats {
+ u64 rx_bytes;
+ u64 rx_packets;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @refcount: Number of trap items using the group.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * registration.
+ */
+struct devlink_trap_group_item {
+ const struct devlink_trap_group *group;
+ refcount_t refcount;
+ struct list_head list;
+ struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+ const struct devlink_trap *trap;
+ struct devlink_trap_group_item *group_item;
+ struct list_head list;
+ enum devlink_trap_action action;
+ struct devlink_stats __percpu *stats;
+ void *priv;
+};
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (!strcmp(trap_item->trap->name, name))
+ return trap_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ struct nlattr *attr;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+ return NULL;
+ attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+ return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+ enum devlink_trap_action *p_trap_action)
+{
+ u8 val;
+
+ val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+ switch (val) {
+ case DEVLINK_TRAP_ACTION_DROP: /* fall-through */
+ case DEVLINK_TRAP_ACTION_TRAP:
+ *p_trap_action = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+ const struct devlink_trap *trap)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+ struct devlink_stats *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(i) {
+ struct devlink_stats *cpu_stats;
+ u64 rx_packets, rx_bytes;
+ unsigned int start;
+
+ cpu_stats = per_cpu_ptr(trap_stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ rx_packets = cpu_stats->rx_packets;
+ rx_bytes = cpu_stats->rx_bytes;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->rx_bytes += rx_bytes;
+ }
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg,
+ struct devlink_stats __percpu *trap_stats)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+
+ devlink_trap_stats_read(trap_stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ stats.rx_packets, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ stats.rx_bytes, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+ goto nla_put_failure;
+
+ if (trap_item->trap->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+ goto nla_put_failure;
+
+ err = devlink_trap_metadata_put(msg, trap_item->trap);
+ if (err)
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, trap_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+ info->snd_seq, 0);
+ if (err)
+ goto err_trap_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_trap_item *trap_item;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping");
+ return 0;
+ }
+
+ err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+ trap_action);
+ if (err)
+ return err;
+
+ trap_item->action = trap_action;
+
+ return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ return __devlink_trap_action_set(devlink, trap_item, trap_action,
+ info->extack);
+}
+
+static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_action_set(devlink, trap_item, info);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (!strcmp(group_item->group->name, name))
+ return group_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ char *name;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+ return NULL;
+ name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+ return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (group_item->group->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, group_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_group_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW;
+ struct devlink_trap_group_item *group_item;
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(group_item, &devlink->trap_group_list,
+ list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_group_fill(msg, devlink,
+ group_item, cmd,
+ portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ const char *group_name = group_item->group->name;
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->trap->group.name, group_name))
+ continue;
+ err = __devlink_trap_action_set(devlink, trap_item,
+ trap_action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+ info->extack);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_group_action_set(devlink, group_item, info);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -5184,6 +5758,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -5483,6 +6060,32 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .doit = devlink_nl_cmd_trap_get_doit,
+ .dumpit = devlink_nl_cmd_trap_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_SET,
+ .doit = devlink_nl_cmd_trap_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .doit = devlink_nl_cmd_trap_group_get_doit,
+ .dumpit = devlink_nl_cmd_trap_group_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+ .doit = devlink_nl_cmd_trap_group_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -5528,6 +6131,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->param_list);
INIT_LIST_HEAD(&devlink->region_list);
INIT_LIST_HEAD(&devlink->reporter_list);
+ INIT_LIST_HEAD(&devlink->trap_list);
+ INIT_LIST_HEAD(&devlink->trap_group_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
return devlink;
@@ -5574,6 +6179,8 @@ void devlink_free(struct devlink *devlink)
{
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
WARN_ON(!list_empty(&devlink->reporter_list));
WARN_ON(!list_empty(&devlink->region_list));
WARN_ON(!list_empty(&devlink->param_list));
@@ -5678,10 +6285,10 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
if (WARN_ON(!devlink_port->registered))
return;
devlink_port_type_warn_cancel(devlink_port);
- spin_lock(&devlink_port->type_lock);
+ spin_lock_bh(&devlink_port->type_lock);
devlink_port->type = type;
devlink_port->type_dev = type_dev;
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
}
@@ -6834,6 +7441,475 @@ unlock:
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
+#define DEVLINK_TRAP(_id, _type) \
+ { \
+ .type = DEVLINK_TRAP_TYPE_##_type, \
+ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap devlink_trap_generic[] = {
+ DEVLINK_TRAP(SMAC_MC, DROP),
+ DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+ DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+ DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+ DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+ DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+ DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+ DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+ DEVLINK_TRAP(TAIL_DROP, DROP),
+};
+
+#define DEVLINK_TRAP_GROUP(_id) \
+ { \
+ .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+ DEVLINK_TRAP_GROUP(L2_DROPS),
+ DEVLINK_TRAP_GROUP(L3_DROPS),
+ DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+ if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+ return -EINVAL;
+
+ if (trap->type != devlink_trap_generic[trap->id].type)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+ int i;
+
+ if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+ if (!strcmp(trap->name, devlink_trap_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+ if (!trap || !trap->name || !trap->group.name)
+ return -EINVAL;
+
+ if (trap->generic)
+ return devlink_trap_generic_verify(trap);
+ else
+ return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+ if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+ int i;
+
+ if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+ if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+ if (group->generic)
+ return devlink_trap_group_generic_verify(group);
+ else
+ return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_create(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ err = devlink_trap_group_verify(group);
+ if (err)
+ return ERR_PTR(err);
+
+ group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+ if (!group_item)
+ return ERR_PTR(-ENOMEM);
+
+ group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!group_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ group_item->group = group;
+ refcount_set(&group_item->refcount, 1);
+
+ if (devlink->ops->trap_group_init) {
+ err = devlink->ops->trap_group_init(devlink, group);
+ if (err)
+ goto err_group_init;
+ }
+
+ list_add_tail(&group_item->list, &devlink->trap_group_list);
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ return group_item;
+
+err_group_init:
+ free_percpu(group_item->stats);
+err_stats_alloc:
+ kfree(group_item);
+ return ERR_PTR(err);
+}
+
+static void
+devlink_trap_group_item_destroy(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_del(&group_item->list);
+ free_percpu(group_item->stats);
+ kfree(group_item);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_lookup(devlink, group->name);
+ if (group_item) {
+ refcount_inc(&group_item->refcount);
+ return group_item;
+ }
+
+ return devlink_trap_group_item_create(devlink, group);
+}
+
+static void
+devlink_trap_group_item_put(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ if (!refcount_dec_and_test(&group_item->refcount))
+ return;
+
+ devlink_trap_group_item_destroy(devlink, group_item);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_get(devlink,
+ &trap_item->trap->group);
+ if (IS_ERR(group_item))
+ return PTR_ERR(group_item);
+
+ trap_item->group_item = group_item;
+
+ return 0;
+}
+
+static void
+devlink_trap_item_group_unlink(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ devlink_trap_group_item_put(devlink, trap_item->group_item);
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+ const struct devlink_trap *trap, void *priv)
+{
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (devlink_trap_item_lookup(devlink, trap->name))
+ return -EEXIST;
+
+ trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+ if (!trap_item)
+ return -ENOMEM;
+
+ trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!trap_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ trap_item->trap = trap;
+ trap_item->action = trap->init_action;
+ trap_item->priv = priv;
+
+ err = devlink_trap_item_group_link(devlink, trap_item);
+ if (err)
+ goto err_group_link;
+
+ err = devlink->ops->trap_init(devlink, trap, trap_item);
+ if (err)
+ goto err_trap_init;
+
+ list_add_tail(&trap_item->list, &devlink->trap_list);
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ return 0;
+
+err_trap_init:
+ devlink_trap_item_group_unlink(devlink, trap_item);
+err_group_link:
+ free_percpu(trap_item->stats);
+err_stats_alloc:
+ kfree(trap_item);
+ return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+ list_del(&trap_item->list);
+ if (devlink->ops->trap_fini)
+ devlink->ops->trap_fini(devlink, trap, trap_item);
+ devlink_trap_item_group_unlink(devlink, trap_item);
+ free_percpu(trap_item->stats);
+ kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP);
+ trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int i, err;
+
+ if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+ return -EINVAL;
+
+ mutex_lock(&devlink->lock);
+ for (i = 0; i < traps_count; i++) {
+ const struct devlink_trap *trap = &traps[i];
+
+ err = devlink_trap_verify(trap);
+ if (err)
+ goto err_trap_verify;
+
+ err = devlink_trap_register(devlink, trap, priv);
+ if (err)
+ goto err_trap_register;
+ }
+ mutex_unlock(&devlink->lock);
+
+ return 0;
+
+err_trap_register:
+err_trap_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ int i;
+
+ mutex_lock(&devlink->lock);
+ /* Make sure we do not have any packets in-flight while unregistering
+ * traps by disabling all of them and waiting for a grace period.
+ */
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_disable(devlink, &traps[i]);
+ synchronize_rcu();
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+ size_t skb_len)
+{
+ struct devlink_stats *stats;
+
+ stats = this_cpu_ptr(trap_stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_bytes += skb_len;
+ stats->rx_packets++;
+ u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata,
+ const struct devlink_trap_item *trap_item,
+ struct devlink_port *in_devlink_port)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+
+ hw_metadata->trap_group_name = group_item->group->name;
+ hw_metadata->trap_name = trap_item->trap->name;
+
+ spin_lock(&in_devlink_port->type_lock);
+ if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+ hw_metadata->input_dev = in_devlink_port->type_dev;
+ spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+ void *trap_ctx, struct devlink_port *in_devlink_port)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+ struct net_dm_hw_metadata hw_metadata = {};
+
+ devlink_trap_stats_update(trap_item->stats, skb->len);
+ devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+ devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
+ in_devlink_port);
+ net_dm_hw_report(skb, &hw_metadata);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+
+ return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
static void __devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len)
{
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 39e094907391..cc60cc22e2db 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -26,6 +26,7 @@
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <net/drop_monitor.h>
#include <net/genetlink.h>
#include <net/netevent.h>
@@ -43,6 +44,7 @@
* netlink alerts
*/
static int trace_state = TRACE_OFF;
+static bool monitor_hw;
/* net_dm_mutex
*
@@ -56,9 +58,26 @@ struct net_dm_stats {
struct u64_stats_sync syncp;
};
+#define NET_DM_MAX_HW_TRAP_NAME_LEN 40
+
+struct net_dm_hw_entry {
+ char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
+ u32 count;
+};
+
+struct net_dm_hw_entries {
+ u32 num_entries;
+ struct net_dm_hw_entry entries[0];
+};
+
struct per_cpu_dm_data {
- spinlock_t lock; /* Protects 'skb' and 'send_timer' */
- struct sk_buff *skb;
+ spinlock_t lock; /* Protects 'skb', 'hw_entries' and
+ * 'send_timer'
+ */
+ union {
+ struct sk_buff *skb;
+ struct net_dm_hw_entries *hw_entries;
+ };
struct sk_buff_head drop_queue;
struct work_struct dm_alert_work;
struct timer_list send_timer;
@@ -76,6 +95,7 @@ struct dm_hw_stat_delta {
static struct genl_family net_drop_monitor_family;
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
@@ -92,10 +112,16 @@ struct net_dm_alert_ops {
void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
int work, int budget);
void (*work_item_func)(struct work_struct *work);
+ void (*hw_work_item_func)(struct work_struct *work);
+ void (*hw_probe)(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata);
};
struct net_dm_skb_cb {
- void *pc;
+ union {
+ struct net_dm_hw_metadata *hw_metadata;
+ void *pc;
+ };
};
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -266,10 +292,190 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_unlock();
}
+static struct net_dm_hw_entries *
+net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
+{
+ struct net_dm_hw_entries *hw_entries;
+ unsigned long flags;
+
+ hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
+ GFP_KERNEL);
+ if (!hw_entries) {
+ /* If the memory allocation failed, we try to perform another
+ * allocation in 1/10 second. Otherwise, the probe function
+ * will constantly bail out.
+ */
+ mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
+ }
+
+ spin_lock_irqsave(&hw_data->lock, flags);
+ swap(hw_data->hw_entries, hw_entries);
+ spin_unlock_irqrestore(&hw_data->lock, flags);
+
+ return hw_entries;
+}
+
+static int net_dm_hw_entry_put(struct sk_buff *msg,
+ const struct net_dm_hw_entry *hw_entry)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_hw_entries_put(struct sk_buff *msg,
+ const struct net_dm_hw_entries *hw_entries)
+{
+ struct nlattr *attr;
+ int i;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
+ if (!attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < hw_entries->num_entries; i++) {
+ int rc;
+
+ rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
+ if (rc)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int
+net_dm_hw_summary_report_fill(struct sk_buff *msg,
+ const struct net_dm_hw_entries *hw_entries)
+{
+ struct net_dm_alert_msg anc_hdr = { 0 };
+ void *hdr;
+ int rc;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ /* We need to put the ancillary header in order not to break user
+ * space.
+ */
+ if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
+ goto nla_put_failure;
+
+ rc = net_dm_hw_entries_put(msg, hw_entries);
+ if (rc)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void net_dm_hw_summary_work(struct work_struct *work)
+{
+ struct net_dm_hw_entries *hw_entries;
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff *msg;
+ int rc;
+
+ hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+ if (!hw_entries)
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_hw_summary_report_fill(msg, hw_entries);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ kfree(hw_entries);
+}
+
+static void
+net_dm_hw_summary_probe(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_entries *hw_entries;
+ struct net_dm_hw_entry *hw_entry;
+ struct per_cpu_dm_data *hw_data;
+ unsigned long flags;
+ int i;
+
+ hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+ spin_lock_irqsave(&hw_data->lock, flags);
+ hw_entries = hw_data->hw_entries;
+
+ if (!hw_entries)
+ goto out;
+
+ for (i = 0; i < hw_entries->num_entries; i++) {
+ hw_entry = &hw_entries->entries[i];
+ if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
+ NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
+ hw_entry->count++;
+ goto out;
+ }
+ }
+ if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
+ goto out;
+
+ hw_entry = &hw_entries->entries[hw_entries->num_entries];
+ strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
+ NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
+ hw_entry->count = 1;
+ hw_entries->num_entries++;
+
+ if (!timer_pending(&hw_data->send_timer)) {
+ hw_data->send_timer.expires = jiffies + dm_delay * HZ;
+ add_timer(&hw_data->send_timer);
+ }
+
+out:
+ spin_unlock_irqrestore(&hw_data->lock, flags);
+}
+
static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
.kfree_skb_probe = trace_kfree_skb_hit,
.napi_poll_probe = trace_napi_poll_hit,
.work_item_func = send_dm_alert,
+ .hw_work_item_func = net_dm_hw_summary_work,
+ .hw_probe = net_dm_hw_summary_probe,
};
static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
@@ -323,7 +529,9 @@ static size_t net_dm_in_port_size(void)
/* NET_DM_ATTR_IN_PORT nest */
return nla_total_size(0) +
/* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
- nla_total_size(sizeof(u32));
+ nla_total_size(sizeof(u32)) +
+ /* NET_DM_ATTR_PORT_NETDEV_NAME */
+ nla_total_size(IFNAMSIZ + 1);
}
#define NET_DM_MAX_SYMBOL_LEN 40
@@ -335,6 +543,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
return NLMSG_ALIGN(size) +
+ /* NET_DM_ATTR_ORIGIN */
+ nla_total_size(sizeof(u16)) +
/* NET_DM_ATTR_PC */
nla_total_size(sizeof(u64)) +
/* NET_DM_ATTR_SYMBOL */
@@ -342,7 +552,7 @@ static size_t net_dm_packet_report_size(size_t payload_len)
/* NET_DM_ATTR_IN_PORT */
net_dm_in_port_size() +
/* NET_DM_ATTR_TIMESTAMP */
- nla_total_size(sizeof(struct timespec)) +
+ nla_total_size(sizeof(u64)) +
/* NET_DM_ATTR_ORIG_LEN */
nla_total_size(sizeof(u32)) +
/* NET_DM_ATTR_PROTO */
@@ -351,7 +561,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
nla_total_size(payload_len);
}
-static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
+ const char *name)
{
struct nlattr *attr;
@@ -363,6 +574,9 @@ static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
goto nla_put_failure;
+ if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
+ goto nla_put_failure;
+
nla_nest_end(msg, attr);
return 0;
@@ -378,7 +592,6 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
char buf[NET_DM_MAX_SYMBOL_LEN];
struct nlattr *attr;
- struct timespec ts;
void *hdr;
int rc;
@@ -387,6 +600,9 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (!hdr)
return -EMSGSIZE;
+ if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
+ goto nla_put_failure;
+
if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
goto nla_put_failure;
@@ -394,12 +610,12 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
goto nla_put_failure;
- rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif);
+ rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
if (rc)
goto nla_put_failure;
- if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
- nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+ ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
@@ -481,10 +697,249 @@ static void net_dm_packet_work(struct work_struct *work)
net_dm_packet_report(skb);
}
+static size_t
+net_dm_hw_packet_report_size(size_t payload_len,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ size_t size;
+
+ size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+ return NLMSG_ALIGN(size) +
+ /* NET_DM_ATTR_ORIGIN */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
+ nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
+ /* NET_DM_ATTR_HW_TRAP_NAME */
+ nla_total_size(strlen(hw_metadata->trap_name) + 1) +
+ /* NET_DM_ATTR_IN_PORT */
+ net_dm_in_port_size() +
+ /* NET_DM_ATTR_TIMESTAMP */
+ nla_total_size(sizeof(u64)) +
+ /* NET_DM_ATTR_ORIG_LEN */
+ nla_total_size(sizeof(u32)) +
+ /* NET_DM_ATTR_PROTO */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_PAYLOAD */
+ nla_total_size(payload_len);
+}
+
+static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
+ struct sk_buff *skb, size_t payload_len)
+{
+ struct net_dm_hw_metadata *hw_metadata;
+ struct nlattr *attr;
+ void *hdr;
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_PACKET_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
+ hw_metadata->trap_group_name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
+ hw_metadata->trap_name))
+ goto nla_put_failure;
+
+ if (hw_metadata->input_dev) {
+ struct net_device *dev = hw_metadata->input_dev;
+ int rc;
+
+ rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
+ dev->name);
+ if (rc)
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+ ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+ goto nla_put_failure;
+
+ if (!payload_len)
+ goto out;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+ goto nla_put_failure;
+
+ attr = skb_put(msg, nla_total_size(payload_len));
+ attr->nla_type = NET_DM_ATTR_PAYLOAD;
+ attr->nla_len = nla_attr_size(payload_len);
+ if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static struct net_dm_hw_metadata *
+net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_metadata *n_hw_metadata;
+ const char *trap_group_name;
+ const char *trap_name;
+
+ n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC);
+ if (!n_hw_metadata)
+ return NULL;
+
+ trap_group_name = kmemdup(hw_metadata->trap_group_name,
+ strlen(hw_metadata->trap_group_name) + 1,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!trap_group_name)
+ goto free_hw_metadata;
+ n_hw_metadata->trap_group_name = trap_group_name;
+
+ trap_name = kmemdup(hw_metadata->trap_name,
+ strlen(hw_metadata->trap_name) + 1,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!trap_name)
+ goto free_trap_group;
+ n_hw_metadata->trap_name = trap_name;
+
+ n_hw_metadata->input_dev = hw_metadata->input_dev;
+ if (n_hw_metadata->input_dev)
+ dev_hold(n_hw_metadata->input_dev);
+
+ return n_hw_metadata;
+
+free_trap_group:
+ kfree(trap_group_name);
+free_hw_metadata:
+ kfree(n_hw_metadata);
+ return NULL;
+}
+
+static void
+net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
+{
+ if (hw_metadata->input_dev)
+ dev_put(hw_metadata->input_dev);
+ kfree(hw_metadata->trap_name);
+ kfree(hw_metadata->trap_group_name);
+ kfree(hw_metadata);
+}
+
+static void net_dm_hw_packet_report(struct sk_buff *skb)
+{
+ struct net_dm_hw_metadata *hw_metadata;
+ struct sk_buff *msg;
+ size_t payload_len;
+ int rc;
+
+ if (skb->data > skb_mac_header(skb))
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ else
+ skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+ payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+ if (net_dm_trunc_len)
+ payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+ msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
+ GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
+ consume_skb(skb);
+}
+
+static void net_dm_hw_packet_work(struct work_struct *work)
+{
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff_head list;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ __skb_queue_head_init(&list);
+
+ spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+ skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+ while ((skb = __skb_dequeue(&list)))
+ net_dm_hw_packet_report(skb);
+}
+
+static void
+net_dm_hw_packet_probe(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_metadata *n_hw_metadata;
+ ktime_t tstamp = ktime_get_real();
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff *nskb;
+ unsigned long flags;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
+ if (!n_hw_metadata)
+ goto free;
+
+ NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
+ nskb->tstamp = tstamp;
+
+ hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+
+ spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+ if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
+ __skb_queue_tail(&hw_data->drop_queue, nskb);
+ else
+ goto unlock_free;
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+ schedule_work(&hw_data->dm_alert_work);
+
+ return;
+
+unlock_free:
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+ u64_stats_update_begin(&hw_data->stats.syncp);
+ hw_data->stats.dropped++;
+ u64_stats_update_end(&hw_data->stats.syncp);
+ net_dm_hw_metadata_free(n_hw_metadata);
+free:
+ consume_skb(nskb);
+}
+
static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
.kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit,
.napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
.work_item_func = net_dm_packet_work,
+ .hw_work_item_func = net_dm_hw_packet_work,
+ .hw_probe = net_dm_hw_packet_probe,
};
static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
@@ -492,6 +947,85 @@ static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
[NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
};
+void net_dm_hw_report(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ rcu_read_lock();
+
+ if (!monitor_hw)
+ goto out;
+
+ net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(net_dm_hw_report);
+
+static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
+{
+ const struct net_dm_alert_ops *ops;
+ int cpu;
+
+ if (monitor_hw) {
+ NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
+ return -EAGAIN;
+ }
+
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+ if (!try_module_get(THIS_MODULE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+ return -ENODEV;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct net_dm_hw_entries *hw_entries;
+
+ INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
+ timer_setup(&hw_data->send_timer, sched_send_work, 0);
+ hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+ kfree(hw_entries);
+ }
+
+ monitor_hw = true;
+
+ return 0;
+}
+
+static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
+{
+ int cpu;
+
+ if (!monitor_hw)
+ NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+
+ monitor_hw = false;
+
+ /* After this call returns we are guaranteed that no CPU is processing
+ * any hardware drops.
+ */
+ synchronize_rcu();
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct sk_buff *skb;
+
+ del_timer_sync(&hw_data->send_timer);
+ cancel_work_sync(&hw_data->dm_alert_work);
+ while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
+ struct net_dm_hw_metadata *hw_metadata;
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+ net_dm_hw_metadata_free(hw_metadata);
+ consume_skb(skb);
+ }
+ }
+
+ module_put(THIS_MODULE);
+}
+
static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
{
const struct net_dm_alert_ops *ops;
@@ -604,6 +1138,11 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
return rc;
}
+static bool net_dm_is_monitoring(void)
+{
+ return trace_state == TRACE_ON || monitor_hw;
+}
+
static int net_dm_alert_mode_get_from_info(struct genl_info *info,
enum net_dm_alert_mode *p_alert_mode)
{
@@ -665,8 +1204,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
struct netlink_ext_ack *extack = info->extack;
int rc;
- if (trace_state == TRACE_ON) {
- NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on");
+ if (net_dm_is_monitoring()) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
return -EBUSY;
}
@@ -681,14 +1220,61 @@ static int net_dm_cmd_config(struct sk_buff *skb,
return 0;
}
+static int net_dm_monitor_start(bool set_sw, bool set_hw,
+ struct netlink_ext_ack *extack)
+{
+ bool sw_set = false;
+ int rc;
+
+ if (set_sw) {
+ rc = set_all_monitor_traces(TRACE_ON, extack);
+ if (rc)
+ return rc;
+ sw_set = true;
+ }
+
+ if (set_hw) {
+ rc = net_dm_hw_monitor_start(extack);
+ if (rc)
+ goto err_monitor_hw;
+ }
+
+ return 0;
+
+err_monitor_hw:
+ if (sw_set)
+ set_all_monitor_traces(TRACE_OFF, extack);
+ return rc;
+}
+
+static void net_dm_monitor_stop(bool set_sw, bool set_hw,
+ struct netlink_ext_ack *extack)
+{
+ if (set_hw)
+ net_dm_hw_monitor_stop(extack);
+ if (set_sw)
+ set_all_monitor_traces(TRACE_OFF, extack);
+}
+
static int net_dm_cmd_trace(struct sk_buff *skb,
struct genl_info *info)
{
+ bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
+ bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
+ struct netlink_ext_ack *extack = info->extack;
+
+ /* To maintain backward compatibility, we start / stop monitoring of
+ * software drops if no flag is specified.
+ */
+ if (!set_sw && !set_hw)
+ set_sw = true;
+
switch (info->genlhdr->cmd) {
case NET_DM_CMD_START:
- return set_all_monitor_traces(TRACE_ON, info->extack);
+ return net_dm_monitor_start(set_sw, set_hw, extack);
case NET_DM_CMD_STOP:
- return set_all_monitor_traces(TRACE_OFF, info->extack);
+ net_dm_monitor_stop(set_sw, set_hw, extack);
+ return 0;
}
return -EOPNOTSUPP;
@@ -785,6 +1371,50 @@ nla_put_failure:
return -EMSGSIZE;
}
+static void net_dm_hw_stats_read(struct net_dm_stats *stats)
+{
+ int cpu;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct net_dm_stats *cpu_stats = &hw_data->stats;
+ unsigned int start;
+ u64 dropped;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ dropped = cpu_stats->dropped;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->dropped += dropped;
+ }
+}
+
+static int net_dm_hw_stats_put(struct sk_buff *msg)
+{
+ struct net_dm_stats stats;
+ struct nlattr *attr;
+
+ net_dm_hw_stats_read(&stats);
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+ stats.dropped, NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
{
void *hdr;
@@ -799,6 +1429,10 @@ static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
if (rc)
goto nla_put_failure;
+ rc = net_dm_hw_stats_put(msg);
+ if (rc)
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -872,6 +1506,8 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
[NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
+ [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG },
+ [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG },
};
static const struct genl_ops dropmon_ops[] = {
@@ -934,9 +1570,57 @@ static struct notifier_block dropmon_net_notifier = {
.notifier_call = dropmon_net_event
};
-static int __init init_net_drop_monitor(void)
+static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
+{
+ spin_lock_init(&data->lock);
+ skb_queue_head_init(&data->drop_queue);
+ u64_stats_init(&data->stats.syncp);
+}
+
+static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
+{
+ WARN_ON(!skb_queue_empty(&data->drop_queue));
+}
+
+static void net_dm_cpu_data_init(int cpu)
+{
+ struct per_cpu_dm_data *data;
+
+ data = &per_cpu(dm_cpu_data, cpu);
+ __net_dm_cpu_data_init(data);
+}
+
+static void net_dm_cpu_data_fini(int cpu)
{
struct per_cpu_dm_data *data;
+
+ data = &per_cpu(dm_cpu_data, cpu);
+ /* At this point, we should have exclusive access
+ * to this struct and can free the skb inside it.
+ */
+ consume_skb(data->skb);
+ __net_dm_cpu_data_fini(data);
+}
+
+static void net_dm_hw_cpu_data_init(int cpu)
+{
+ struct per_cpu_dm_data *hw_data;
+
+ hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ __net_dm_cpu_data_init(hw_data);
+}
+
+static void net_dm_hw_cpu_data_fini(int cpu)
+{
+ struct per_cpu_dm_data *hw_data;
+
+ hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ kfree(hw_data->hw_entries);
+ __net_dm_cpu_data_fini(hw_data);
+}
+
+static int __init init_net_drop_monitor(void)
+{
int cpu, rc;
pr_info("Initializing network drop monitor service\n");
@@ -962,10 +1646,8 @@ static int __init init_net_drop_monitor(void)
rc = 0;
for_each_possible_cpu(cpu) {
- data = &per_cpu(dm_cpu_data, cpu);
- spin_lock_init(&data->lock);
- skb_queue_head_init(&data->drop_queue);
- u64_stats_init(&data->stats.syncp);
+ net_dm_cpu_data_init(cpu);
+ net_dm_hw_cpu_data_init(cpu);
}
goto out;
@@ -978,7 +1660,6 @@ out:
static void exit_net_drop_monitor(void)
{
- struct per_cpu_dm_data *data;
int cpu;
BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
@@ -989,13 +1670,8 @@ static void exit_net_drop_monitor(void)
*/
for_each_possible_cpu(cpu) {
- data = &per_cpu(dm_cpu_data, cpu);
- /*
- * At this point, we should have exclusive access
- * to this struct and can free the skb inside it
- */
- kfree_skb(data->skb);
- WARN_ON(!skb_queue_empty(&data->drop_queue));
+ net_dm_hw_cpu_data_fini(cpu);
+ net_dm_cpu_data_fini(cpu);
}
BUG_ON(genl_unregister_family(&net_drop_monitor_family));
diff --git a/net/core/filter.c b/net/core/filter.c
index 17bc9af8f156..ed6563622ce3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8841,13 +8841,13 @@ sk_reuseport_is_valid_access(int off, int size,
return size == size_default;
/* Fields that allow narrowing */
- case offsetof(struct sk_reuseport_md, eth_protocol):
+ case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
if (size < FIELD_SIZEOF(struct sk_buff, protocol))
return false;
/* fall through */
- case offsetof(struct sk_reuseport_md, ip_protocol):
- case offsetof(struct sk_reuseport_md, bind_inany):
- case offsetof(struct sk_reuseport_md, len):
+ case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
+ case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
+ case bpf_ctx_range(struct sk_reuseport_md, len):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 9741b593ea53..7c09d87d3269 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -142,8 +142,8 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
mutex_unlock(&flow_dissector_mutex);
return -ENOENT;
}
- bpf_prog_put(attached);
RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
+ bpf_prog_put(attached);
mutex_unlock(&flow_dissector_mutex);
return 0;
}
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 64c3d4d72b9c..cf52d9c422fa 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -391,6 +391,8 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
kfree(indr_block_cb);
}
+static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
+
static void flow_block_ing_cmd(struct net_device *dev,
flow_indr_block_bind_cb_t *cb,
void *cb_priv,
@@ -398,11 +400,11 @@ static void flow_block_ing_cmd(struct net_device *dev,
{
struct flow_indr_block_ing_entry *entry;
- rcu_read_lock();
- list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
+ mutex_lock(&flow_indr_block_ing_cb_lock);
+ list_for_each_entry(entry, &block_ing_cb_list, list) {
entry->cb(dev, cb, cb_priv, command);
}
- rcu_read_unlock();
+ mutex_unlock(&flow_indr_block_ing_cb_lock);
}
int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
@@ -497,11 +499,10 @@ void flow_indr_block_call(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(flow_indr_block_call);
-static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
{
mutex_lock(&flow_indr_block_ing_cb_lock);
- list_add_tail_rcu(&entry->list, &block_ing_cb_list);
+ list_add_tail(&entry->list, &block_ing_cb_list);
mutex_unlock(&flow_indr_block_ing_cb_lock);
}
EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
@@ -509,7 +510,7 @@ EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
{
mutex_lock(&flow_indr_block_ing_cb_lock);
- list_del_rcu(&entry->list);
+ list_del(&entry->list);
mutex_unlock(&flow_indr_block_ing_cb_lock);
}
EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2cf27da1baeb..849380a622ef 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -122,7 +122,7 @@ static void queue_process(struct work_struct *work)
txq = netdev_get_tx_queue(dev, q_index);
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (netif_xmit_frozen_or_stopped(txq) ||
- netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
+ !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
skb_queue_head(&npinfo->txq, skb);
HARD_TX_UNLOCK(dev, txq);
local_irq_restore(flags);
@@ -335,7 +335,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
HARD_TX_UNLOCK(dev, txq);
- if (status == NETDEV_TX_OK)
+ if (dev_xmit_complete(status))
break;
}
@@ -352,7 +352,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
}
- if (status != NETDEV_TX_OK) {
+ if (!dev_xmit_complete(status)) {
skb_queue_tail(&npinfo->txq, skb);
schedule_delayed_work(&npinfo->tx_work,0);
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3272dc7a8c81..5bc65587f1c4 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -61,7 +61,7 @@ static int page_pool_init(struct page_pool *pool,
struct page_pool *page_pool_create(const struct page_pool_params *params)
{
struct page_pool *pool;
- int err = 0;
+ int err;
pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
if (!pool)
@@ -82,12 +82,9 @@ EXPORT_SYMBOL(page_pool_create);
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
+ bool refill = false;
struct page *page;
- /* Quicker fallback, avoid locks when ring is empty */
- if (__ptr_ring_empty(r))
- return NULL;
-
/* Test for safe-context, caller should provide this guarantee */
if (likely(in_serving_softirq())) {
if (likely(pool->alloc.count)) {
@@ -95,27 +92,23 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
page = pool->alloc.cache[--pool->alloc.count];
return page;
}
- /* Slower-path: Alloc array empty, time to refill
- *
- * Open-coded bulk ptr_ring consumer.
- *
- * Discussion: the ring consumer lock is not really
- * needed due to the softirq/NAPI protection, but
- * later need the ability to reclaim pages on the
- * ring. Thus, keeping the locks.
- */
- spin_lock(&r->consumer_lock);
- while ((page = __ptr_ring_consume(r))) {
- if (pool->alloc.count == PP_ALLOC_CACHE_REFILL)
- break;
- pool->alloc.cache[pool->alloc.count++] = page;
- }
- spin_unlock(&r->consumer_lock);
- return page;
+ refill = true;
}
- /* Slow-path: Get page from locked ring queue */
- page = ptr_ring_consume(&pool->ring);
+ /* Quicker fallback, avoid locks when ring is empty */
+ if (__ptr_ring_empty(r))
+ return NULL;
+
+ /* Slow-path: Get page from locked ring queue,
+ * refill alloc array if requested.
+ */
+ spin_lock(&r->consumer_lock);
+ page = __ptr_ring_consume(r);
+ if (refill)
+ pool->alloc.count = __ptr_ring_consume_batched(r,
+ pool->alloc.cache,
+ PP_ALLOC_CACHE_REFILL);
+ spin_unlock(&r->consumer_lock);
return page;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ea8e8d332d85..2b40b5a9425b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4087,6 +4087,9 @@ static const u8 skb_ext_type_len[] = {
#ifdef CONFIG_XFRM
[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#ifdef CONFIG_XFRM
skb_ext_type_len[SKB_EXT_SEC_PATH] +
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ skb_ext_type_len[TC_SKB_EXT] +
+#endif
0;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6832eeb4b785..cf390e0aa73d 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -190,8 +190,7 @@ static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
sk_msg_check_to_free(msg, i, msg->sg.size);
sge = sk_msg_elem(msg, i);
}
- if (msg->skb)
- consume_skb(msg->skb);
+ consume_skb(msg->skb);
sk_msg_init(msg);
return freed;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index f5e801a9cea4..07863edbe6fc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1995,6 +1995,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
}
EXPORT_SYMBOL(skb_set_owner_w);
+static bool can_skb_orphan_partial(const struct sk_buff *skb)
+{
+#ifdef CONFIG_TLS_DEVICE
+ /* Drivers depend on in-order delivery for crypto offload,
+ * partial orphan breaks out-of-order-OK logic.
+ */
+ if (skb->decrypted)
+ return false;
+#endif
+ return (skb->destructor == sock_wfree ||
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
+}
+
/* This helper is used by netem, as it can hold packets in its
* delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver.
@@ -2006,11 +2019,7 @@ void skb_orphan_partial(struct sk_buff *skb)
if (skb_is_tcp_pure_ack(skb))
return;
- if (skb->destructor == sock_wfree
-#ifdef CONFIG_INET
- || skb->destructor == tcp_wfree
-#endif
- ) {
+ if (can_skb_orphan_partial(skb)) {
struct sock *sk = skb->sk;
if (refcount_inc_not_zero(&sk->sk_refcnt)) {
@@ -3281,16 +3290,17 @@ static __init int net_inuse_init(void)
core_initcall(net_inuse_init);
-static void assign_proto_idx(struct proto *prot)
+static int assign_proto_idx(struct proto *prot)
{
prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
pr_err("PROTO_INUSE_NR exhausted\n");
- return;
+ return -ENOSPC;
}
set_bit(prot->inuse_idx, proto_inuse_idx);
+ return 0;
}
static void release_proto_idx(struct proto *prot)
@@ -3299,8 +3309,9 @@ static void release_proto_idx(struct proto *prot)
clear_bit(prot->inuse_idx, proto_inuse_idx);
}
#else
-static inline void assign_proto_idx(struct proto *prot)
+static inline int assign_proto_idx(struct proto *prot)
{
+ return 0;
}
static inline void release_proto_idx(struct proto *prot)
@@ -3349,6 +3360,8 @@ static int req_prot_init(const struct proto *prot)
int proto_register(struct proto *prot, int alloc_slab)
{
+ int ret = -ENOBUFS;
+
if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0,
@@ -3385,20 +3398,27 @@ int proto_register(struct proto *prot, int alloc_slab)
}
mutex_lock(&proto_list_mutex);
+ ret = assign_proto_idx(prot);
+ if (ret) {
+ mutex_unlock(&proto_list_mutex);
+ goto out_free_timewait_sock_slab_name;
+ }
list_add(&prot->node, &proto_list);
- assign_proto_idx(prot);
mutex_unlock(&proto_list_mutex);
- return 0;
+ return ret;
out_free_timewait_sock_slab_name:
- kfree(prot->twsk_prot->twsk_slab_name);
+ if (alloc_slab && prot->twsk_prot)
+ kfree(prot->twsk_prot->twsk_slab_name);
out_free_request_sock_slab:
- req_prot_cleanup(prot->rsk_prot);
+ if (alloc_slab) {
+ req_prot_cleanup(prot->rsk_prot);
- kmem_cache_destroy(prot->slab);
- prot->slab = NULL;
+ kmem_cache_destroy(prot->slab);
+ prot->slab = NULL;
+ }
out:
- return -ENOBUFS;
+ return ret;
}
EXPORT_SYMBOL(proto_register);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 3312a5849a97..c13ffbd33d8d 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,6 +19,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
+static atomic64_t cookie_gen;
u64 sock_gen_cookie(struct sock *sk)
{
@@ -27,7 +28,7 @@ u64 sock_gen_cookie(struct sock *sk)
if (res)
return res;
- res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
+ res = atomic64_inc_return(&cookie_gen);
atomic64_cmpxchg(&sk->sk_cookie, 0, res);
}
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1330a7442e5b..01998860afaa 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
- if (unlikely(icsk->icsk_ulp_data))
+ if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
return -EINVAL;
link = sk_psock_init_link();
diff --git a/net/core/stream.c b/net/core/stream.c
index e94bb02a5629..4f1d4aa5fb38 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -120,7 +120,6 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
int err = 0;
long vm_wait = 0;
long current_timeo = *timeo_p;
- bool noblock = (*timeo_p ? false : true);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
if (sk_stream_memory_free(sk))
@@ -133,11 +132,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- if (!*timeo_p) {
- if (noblock)
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- goto do_nonblock;
- }
+ if (!*timeo_p)
+ goto do_eagain;
if (signal_pending(current))
goto do_interrupted;
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -169,7 +165,13 @@ out:
do_error:
err = -EPIPE;
goto out;
-do_nonblock:
+do_eagain:
+ /* Make sure that whenever EAGAIN is returned, EPOLLOUT event can
+ * be generated later.
+ * When TCP receives ACK packets that make room, tcp_check_space()
+ * only calls tcp_new_space() if SOCK_NOSPACE is set.
+ */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
err = -EAGAIN;
goto out;
do_interrupted: