summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c46
-rw-r--r--net/core/devlink.c563
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/flow_dissector.c3
-rw-r--r--net/core/neighbour.c196
-rw-r--r--net/core/net-sysfs.c57
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/of_net.c170
-rw-r--r--net/core/rtnetlink.c4
-rw-r--r--net/core/skbuff.c27
-rw-r--r--net/core/sock.c104
-rw-r--r--net/core/stream.c2
13 files changed, 799 insertions, 399 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 35ced6201814..4268846f2f47 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_FAILOVER) += failover.o
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
+obj-$(CONFIG_OF) += of_net.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ee9fecd3aff..eb61a8821b3a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -303,6 +303,12 @@ static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
return NULL;
}
+bool netdev_name_in_use(struct net *net, const char *name)
+{
+ return netdev_name_node_lookup(net, name);
+}
+EXPORT_SYMBOL(netdev_name_in_use);
+
int netdev_name_node_alt_create(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
@@ -1133,7 +1139,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
}
snprintf(buf, IFNAMSIZ, name, i);
- if (!__dev_get_by_name(net, buf))
+ if (!netdev_name_in_use(net, buf))
return i;
/* It is possible to run out of possible slots
@@ -1187,7 +1193,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev,
if (strchr(name, '%'))
return dev_alloc_name_ns(net, dev, name);
- else if (__dev_get_by_name(net, name))
+ else if (netdev_name_in_use(net, name))
return -EEXIST;
else if (dev->name != name)
strlcpy(dev->name, name, IFNAMSIZ);
@@ -2921,6 +2927,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->num_tc)
netif_setup_tc(dev, txq);
+ dev_qdisc_change_real_num_tx(dev, txq);
+
dev->real_num_tx_queues = txq;
if (disabling) {
@@ -5837,7 +5845,7 @@ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int se
gro_normal_list(napi);
}
-static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
__be16 type = skb->protocol;
@@ -5866,12 +5874,11 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
if (err) {
WARN_ON(&ptype->list == head);
kfree_skb(skb);
- return NET_RX_SUCCESS;
+ return;
}
out:
gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
- return NET_RX_SUCCESS;
}
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
@@ -6898,19 +6905,25 @@ EXPORT_SYMBOL(netif_napi_add);
void napi_disable(struct napi_struct *n)
{
+ unsigned long val, new;
+
might_sleep();
set_bit(NAPI_STATE_DISABLE, &n->state);
- while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
- msleep(1);
- while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
- msleep(1);
+ do {
+ val = READ_ONCE(n->state);
+ if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
+ usleep_range(20, 200);
+ continue;
+ }
+
+ new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
+ new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
+ } while (cmpxchg(&n->state, val, new) != val);
hrtimer_cancel(&n->timer);
- clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
- clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);
@@ -9152,14 +9165,11 @@ int dev_get_port_parent_id(struct net_device *dev,
}
err = devlink_compat_switch_id_get(dev, ppid);
- if (!err || err != -EOPNOTSUPP)
+ if (!recurse || err != -EOPNOTSUPP)
return err;
- if (!recurse)
- return -EOPNOTSUPP;
-
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = dev_get_port_parent_id(lower_dev, ppid, recurse);
+ err = dev_get_port_parent_id(lower_dev, ppid, true);
if (err)
break;
if (!first.id_len)
@@ -11146,7 +11156,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
* we can use it in the destination network namespace.
*/
err = -EEXIST;
- if (__dev_get_by_name(net, dev->name)) {
+ if (netdev_name_in_use(net, dev->name)) {
/* We get here if we can't use the current device name */
if (!pat)
goto out;
@@ -11499,7 +11509,7 @@ static void __net_exit default_device_exit(struct net *net)
/* Push remaining network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
- if (__dev_get_by_name(&init_net, fb_name))
+ if (netdev_name_in_use(&init_net, fb_name))
snprintf(fb_name, IFNAMSIZ, "dev%%d");
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a856ae401ea5..3ce6147a2fe8 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -30,6 +30,63 @@
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
+struct devlink_dev_stats {
+ u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+};
+
+struct devlink {
+ u32 index;
+ struct list_head port_list;
+ struct list_head rate_list;
+ struct list_head sb_list;
+ struct list_head dpipe_table_list;
+ struct list_head resource_list;
+ struct list_head param_list;
+ struct list_head region_list;
+ struct list_head reporter_list;
+ struct mutex reporters_lock; /* protects reporter_list */
+ struct devlink_dpipe_headers *dpipe_headers;
+ struct list_head trap_list;
+ struct list_head trap_group_list;
+ struct list_head trap_policer_list;
+ const struct devlink_ops *ops;
+ u64 features;
+ struct xarray snapshot_ids;
+ struct devlink_dev_stats stats;
+ struct device *dev;
+ possible_net_t _net;
+ /* Serializes access to devlink instance specific objects such as
+ * port, sb, dpipe, resource, params, region, traps and more.
+ */
+ struct mutex lock;
+ u8 reload_failed:1;
+ refcount_t refcount;
+ struct completion comp;
+ char priv[0] __aligned(NETDEV_ALIGN);
+};
+
+void *devlink_priv(struct devlink *devlink)
+{
+ return &devlink->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_priv);
+
+struct devlink *priv_to_devlink(void *priv)
+{
+ return container_of(priv, struct devlink, priv);
+}
+EXPORT_SYMBOL_GPL(priv_to_devlink);
+
+struct device *devlink_to_dev(const struct devlink *devlink)
+{
+ return devlink->dev;
+}
+EXPORT_SYMBOL_GPL(devlink_to_dev);
+
static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
{
.name = "destination mac",
@@ -95,6 +152,22 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
#define DEVLINK_REGISTERED XA_MARK_1
+/* devlink instances are open to the access from the user space after
+ * devlink_register() call. Such logical barrier allows us to have certain
+ * expectations related to locking.
+ *
+ * Before *_register() - we are in initialization stage and no parallel
+ * access possible to the devlink instance. All drivers perform that phase
+ * by implicitly holding device_lock.
+ *
+ * After *_register() - users and driver can access devlink instance at
+ * the same time.
+ */
+#define ASSERT_DEVLINK_REGISTERED(d) \
+ WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
+ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+
/* devlink_mutex
*
* An overall lock guarding every operation coming from userspace.
@@ -742,6 +815,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
int err;
WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -1040,11 +1114,15 @@ nla_put_failure:
static void devlink_port_notify(struct devlink_port *devlink_port,
enum devlink_command cmd)
{
+ struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -1055,19 +1133,22 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(devlink_port->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static void devlink_rate_notify(struct devlink_rate *devlink_rate,
enum devlink_command cmd)
{
+ struct devlink *devlink = devlink_rate->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -1078,9 +1159,8 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(devlink_rate->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
@@ -3952,9 +4032,6 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;
- if (!devlink->reload_enabled)
- return -EOPNOTSUPP;
-
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
@@ -4022,7 +4099,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
u32 actions_performed;
int err;
- if (!devlink_reload_supported(devlink->ops))
+ if (!(devlink->features & DEVLINK_F_RELOAD))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -4150,6 +4227,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -5070,6 +5148,11 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
+ region->max_snapshots);
+ if (err)
+ goto nla_put_failure;
+
err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
if (err)
goto nla_put_failure;
@@ -5145,17 +5228,19 @@ static void devlink_nl_region_notify(struct devlink_region *region,
struct devlink_snapshot *snapshot,
enum devlink_command cmd)
{
+ struct devlink *devlink = region->devlink;
struct sk_buff *msg;
WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
if (IS_ERR(msg))
return;
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(region->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
/**
@@ -6269,23 +6354,21 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
return 0;
}
-int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put);
-int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put);
int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
{
@@ -6296,14 +6379,13 @@ int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
}
EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
-int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put);
int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
{
@@ -6923,10 +7005,12 @@ genlmsg_cancel:
static void devlink_recover_notify(struct devlink_health_reporter *reporter,
enum devlink_command cmd)
{
+ struct devlink *devlink = reporter->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -6938,9 +7022,8 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(reporter->devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
void
@@ -8900,6 +8983,25 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
}
/**
+ * devlink_set_features - Set devlink supported features
+ *
+ * @devlink: devlink
+ * @features: devlink support features
+ *
+ * This interface allows us to set reload ops separatelly from
+ * the devlink_alloc.
+ */
+void devlink_set_features(struct devlink *devlink, u64 features)
+{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ WARN_ON(features & DEVLINK_F_RELOAD &&
+ !devlink_reload_supported(devlink->ops));
+ devlink->features = features;
+}
+EXPORT_SYMBOL_GPL(devlink_set_features);
+
+/**
* devlink_alloc_ns - Allocate new devlink instance resources
* in specific namespace
*
@@ -8958,18 +9060,99 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
}
EXPORT_SYMBOL_GPL(devlink_alloc_ns);
+static void
+devlink_trap_policer_notify(struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd);
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd);
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd);
+
+static void devlink_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct devlink_trap_group_item *group_item;
+ struct devlink_trap_item *trap_item;
+ struct devlink_port *devlink_port;
+ struct devlink_rate *rate_node;
+ struct devlink_region *region;
+
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(devlink_port, &devlink->port_list, list)
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ list_for_each_entry(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+
+ list_for_each_entry(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ devlink_params_publish(devlink);
+}
+
+static void devlink_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct devlink_trap_group_item *group_item;
+ struct devlink_trap_item *trap_item;
+ struct devlink_port *devlink_port;
+ struct devlink_rate *rate_node;
+ struct devlink_region *region;
+
+ devlink_params_unpublish(devlink);
+
+ list_for_each_entry_reverse(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+
+ list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+
+ list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+
+ list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
+ list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+
+ list_for_each_entry_reverse(devlink_port, &devlink->port_list, list)
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+}
+
/**
* devlink_register - Register devlink instance
*
* @devlink: devlink
*/
-int devlink_register(struct devlink *devlink)
+void devlink_register(struct devlink *devlink)
{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ /* Make sure that we are in .probe() routine */
+ device_lock_assert(devlink->dev);
+
mutex_lock(&devlink_mutex);
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- devlink_notify(devlink, DEVLINK_CMD_NEW);
+ devlink_notify_register(devlink);
mutex_unlock(&devlink_mutex);
- return 0;
}
EXPORT_SYMBOL_GPL(devlink_register);
@@ -8980,60 +9163,29 @@ EXPORT_SYMBOL_GPL(devlink_register);
*/
void devlink_unregister(struct devlink *devlink)
{
+ ASSERT_DEVLINK_REGISTERED(devlink);
+ /* Make sure that we are in .remove() routine */
+ device_lock_assert(devlink->dev);
+
devlink_put(devlink);
wait_for_completion(&devlink->comp);
mutex_lock(&devlink_mutex);
- WARN_ON(devlink_reload_supported(devlink->ops) &&
- devlink->reload_enabled);
- devlink_notify(devlink, DEVLINK_CMD_DEL);
+ devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
mutex_unlock(&devlink_mutex);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
/**
- * devlink_reload_enable - Enable reload of devlink instance
- *
- * @devlink: devlink
- *
- * Should be called at end of device initialization
- * process when reload operation is supported.
- */
-void devlink_reload_enable(struct devlink *devlink)
-{
- mutex_lock(&devlink_mutex);
- devlink->reload_enabled = true;
- mutex_unlock(&devlink_mutex);
-}
-EXPORT_SYMBOL_GPL(devlink_reload_enable);
-
-/**
- * devlink_reload_disable - Disable reload of devlink instance
- *
- * @devlink: devlink
- *
- * Should be called at the beginning of device cleanup
- * process when reload operation is supported.
- */
-void devlink_reload_disable(struct devlink *devlink)
-{
- mutex_lock(&devlink_mutex);
- /* Mutex is taken which ensures that no reload operation is in
- * progress while setting up forbidded flag.
- */
- devlink->reload_enabled = false;
- mutex_unlock(&devlink_mutex);
-}
-EXPORT_SYMBOL_GPL(devlink_reload_disable);
-
-/**
* devlink_free - Free devlink instance resources
*
* @devlink: devlink
*/
void devlink_free(struct devlink *devlink)
{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
WARN_ON(!list_empty(&devlink->trap_policer_list));
@@ -10090,6 +10242,9 @@ void devlink_params_publish(struct devlink *devlink)
{
struct devlink_param_item *param_item;
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
list_for_each_entry(param_item, &devlink->param_list, list) {
if (param_item->published)
continue;
@@ -10122,102 +10277,25 @@ void devlink_params_unpublish(struct devlink *devlink)
EXPORT_SYMBOL_GPL(devlink_params_unpublish);
/**
- * devlink_param_publish - publish one configuration parameter
- *
- * @devlink: devlink
- * @param: one configuration parameter
- *
- * Publish previously registered configuration parameter.
- */
-void devlink_param_publish(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
-
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->param != param || param_item->published)
- continue;
- param_item->published = true;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
- break;
- }
-}
-EXPORT_SYMBOL_GPL(devlink_param_publish);
-
-/**
- * devlink_param_unpublish - unpublish one configuration parameter
+ * devlink_param_driverinit_value_get - get configuration parameter
+ * value for driver initializing
*
- * @devlink: devlink
- * @param: one configuration parameter
+ * @devlink: devlink
+ * @param_id: parameter ID
+ * @init_val: value of parameter in driverinit configuration mode
*
- * Unpublish previously registered configuration parameter.
+ * This function should be used by the driver to get driverinit
+ * configuration for initialization after reload command.
*/
-void devlink_param_unpublish(struct devlink *devlink,
- const struct devlink_param *param)
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+ union devlink_param_value *init_val)
{
struct devlink_param_item *param_item;
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->param != param || !param_item->published)
- continue;
- param_item->published = false;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
- break;
- }
-}
-EXPORT_SYMBOL_GPL(devlink_param_unpublish);
-
-/**
- * devlink_port_params_register - register port configuration parameters
- *
- * @devlink_port: devlink port
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- *
- * Register the configuration parameters supported by the port.
- */
-int devlink_port_params_register(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
- return __devlink_params_register(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list, params,
- params_count,
- DEVLINK_CMD_PORT_PARAM_NEW,
- DEVLINK_CMD_PORT_PARAM_DEL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_params_register);
-
-/**
- * devlink_port_params_unregister - unregister port configuration
- * parameters
- *
- * @devlink_port: devlink port
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- */
-void devlink_port_params_unregister(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
- return __devlink_params_unregister(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list,
- params, params_count,
- DEVLINK_CMD_PORT_PARAM_DEL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_params_unregister);
-
-static int
-__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
- union devlink_param_value *init_val)
-{
- struct devlink_param_item *param_item;
+ if (!devlink_reload_supported(devlink->ops))
+ return -EOPNOTSUPP;
- param_item = devlink_param_find_by_id(param_list, param_id);
+ param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
if (!param_item)
return -EINVAL;
@@ -10233,54 +10311,6 @@ __devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
return 0;
}
-
-static int
-__devlink_param_driverinit_value_set(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list, u32 param_id,
- union devlink_param_value init_val,
- enum devlink_command cmd)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(param_list, param_id);
- if (!param_item)
- return -EINVAL;
-
- if (!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT))
- return -EOPNOTSUPP;
-
- if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(param_item->driverinit_value.vstr, init_val.vstr);
- else
- param_item->driverinit_value = init_val;
- param_item->driverinit_value_valid = true;
-
- devlink_param_notify(devlink, port_index, param_item, cmd);
- return 0;
-}
-
-/**
- * devlink_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink: devlink
- * @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- */
-int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *init_val)
-{
- if (!devlink_reload_supported(devlink->ops))
- return -EOPNOTSUPP;
-
- return __devlink_param_driverinit_value_get(&devlink->param_list,
- param_id, init_val);
-}
EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
/**
@@ -10298,61 +10328,26 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
union devlink_param_value init_val)
{
- return __devlink_param_driverinit_value_set(devlink, 0,
- &devlink->param_list,
- param_id, init_val,
- DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
+ struct devlink_param_item *param_item;
-/**
- * devlink_port_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- */
-int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value *init_val)
-{
- struct devlink *devlink = devlink_port->devlink;
+ param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ if (!param_item)
+ return -EINVAL;
- if (!devlink_reload_supported(devlink->ops))
+ if (!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT))
return -EOPNOTSUPP;
- return __devlink_param_driverinit_value_get(&devlink_port->param_list,
- param_id, init_val);
-}
-EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get);
+ if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+ strcpy(param_item->driverinit_value.vstr, init_val.vstr);
+ else
+ param_item->driverinit_value = init_val;
+ param_item->driverinit_value_valid = true;
-/**
- * devlink_port_param_driverinit_value_set - set value of configuration
- * parameter for driverinit
- * configuration mode
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- * @init_val: value of parameter to set for driverinit configuration mode
- *
- * This function should be used by the driver to set driverinit
- * configuration mode default value.
- */
-int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value init_val)
-{
- return __devlink_param_driverinit_value_set(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list,
- param_id, init_val,
- DEVLINK_CMD_PORT_PARAM_NEW);
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+ return 0;
}
-EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set);
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
/**
* devlink_param_value_changed - notify devlink on a parameter's value
@@ -10378,50 +10373,6 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
EXPORT_SYMBOL_GPL(devlink_param_value_changed);
/**
- * devlink_port_param_value_changed - notify devlink on a parameter's value
- * change. Should be called by the driver
- * right after the change.
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- *
- * This function should be used by the driver to notify devlink on value
- * change, excluding driverinit configuration mode.
- * For driverinit configuration mode driver should use the function
- * devlink_port_param_driverinit_value_set() instead.
- */
-void devlink_port_param_value_changed(struct devlink_port *devlink_port,
- u32 param_id)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(&devlink_port->param_list,
- param_id);
- WARN_ON(!param_item);
-
- devlink_param_notify(devlink_port->devlink, devlink_port->index,
- param_item, DEVLINK_CMD_PORT_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devlink_port_param_value_changed);
-
-/**
- * devlink_param_value_str_fill - Safely fill-up the string preventing
- * from overflow of the preallocated buffer
- *
- * @dst_val: destination devlink_param_value
- * @src: source buffer
- */
-void devlink_param_value_str_fill(union devlink_param_value *dst_val,
- const char *src)
-{
- size_t len;
-
- len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE);
- WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE);
-}
-EXPORT_SYMBOL_GPL(devlink_param_value_str_fill);
-
-/**
* devlink_region_create - create a new address region
*
* @devlink: devlink
@@ -10839,6 +10790,8 @@ devlink_trap_group_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -10880,6 +10833,8 @@ static void devlink_trap_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
cmd != DEVLINK_CMD_TRAP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -11261,6 +11216,8 @@ devlink_trap_policer_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -11429,6 +11386,24 @@ free_msg:
nlmsg_free(msg);
}
+static struct devlink_port *netdev_to_devlink_port(struct net_device *dev)
+{
+ if (!dev->netdev_ops->ndo_get_devlink_port)
+ return NULL;
+
+ return dev->netdev_ops->ndo_get_devlink_port(dev);
+}
+
+static struct devlink *netdev_to_devlink(struct net_device *dev)
+{
+ struct devlink_port *devlink_port = netdev_to_devlink_port(dev);
+
+ if (!devlink_port)
+ return NULL;
+
+ return devlink_port->devlink;
+}
+
void devlink_compat_running_version(struct net_device *dev,
char *buf, size_t len)
{
@@ -11538,7 +11513,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
if (!net_eq(devlink_net(devlink), net))
goto retry;
- WARN_ON(!devlink_reload_supported(devlink->ops));
+ WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e32cee2c469..4bace37a6a44 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7765,6 +7765,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
+ if (type == BPF_WRITE || size != sizeof(__u64))
+ return false;
+ break;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64))
return false;
@@ -7774,6 +7778,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
+ case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
+ /* Explicitly prohibit access to padding in __sk_buff. */
+ return false;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -7802,6 +7809,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -7872,6 +7880,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -8373,6 +8382,7 @@ static bool sk_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -8884,6 +8894,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, sk));
break;
+ case offsetof(struct __sk_buff, hwtstamp):
+ BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
+ BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
+
+ insn = bpf_convert_shinfo_access(si, insn);
+ *insn++ = BPF_LDX_MEM(BPF_DW,
+ si->dst_reg, si->dst_reg,
+ bpf_target_off(struct skb_shared_info,
+ hwtstamps, 8,
+ target_size));
+ break;
}
return insn - insn_buf;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index bac0184cf3de..7d0a9f84aaf7 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1196,9 +1196,8 @@ proto_again:
break;
}
- proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
- switch (proto) {
+ switch (hdr->proto) {
case htons(PPP_IP):
proto = htons(ETH_P_IP);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2d5bc3a75fae..eae73efa9245 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
list_del_init(&n->gc_list);
atomic_dec(&n->tbl->gc_entries);
}
+ if (!list_empty(&n->managed_list))
+ list_del_init(&n->managed_list);
}
static void neigh_update_gc_list(struct neighbour *n)
@@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
write_lock_bh(&n->tbl->lock);
write_lock(&n->lock);
-
if (n->dead)
goto out;
@@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
atomic_inc(&n->tbl->gc_entries);
}
+out:
+ write_unlock(&n->lock);
+ write_unlock_bh(&n->tbl->lock);
+}
+static void neigh_update_managed_list(struct neighbour *n)
+{
+ bool on_managed_list, add_to_managed;
+
+ write_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+
+ add_to_managed = n->flags & NTF_MANAGED;
+ on_managed_list = !list_empty(&n->managed_list);
+
+ if (!add_to_managed && on_managed_list)
+ list_del_init(&n->managed_list);
+ else if (add_to_managed && !on_managed_list)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
out:
write_unlock(&n->lock);
write_unlock_bh(&n->tbl->lock);
}
-static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
- int *notify)
+static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
+ bool *gc_update, bool *managed_update)
{
- bool rc = false;
- u8 ndm_flags;
+ u32 ndm_flags, old_flags = neigh->flags;
if (!(flags & NEIGH_UPDATE_F_ADMIN))
- return rc;
+ return;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
- ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
- if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
if (ndm_flags & NTF_EXT_LEARNED)
neigh->flags |= NTF_EXT_LEARNED;
else
neigh->flags &= ~NTF_EXT_LEARNED;
- rc = true;
*notify = 1;
+ *gc_update = true;
+ }
+ if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
+ if (ndm_flags & NTF_MANAGED)
+ neigh->flags |= NTF_MANAGED;
+ else
+ neigh->flags &= ~NTF_MANAGED;
+ *notify = 1;
+ *managed_update = true;
}
-
- return rc;
}
static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
@@ -379,7 +407,7 @@ EXPORT_SYMBOL(neigh_ifdown);
static struct neighbour *neigh_alloc(struct neigh_table *tbl,
struct net_device *dev,
- bool exempt_from_gc)
+ u32 flags, bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
@@ -412,6 +440,7 @@ do_alloc:
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ n->flags = flags;
seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
timer_setup(&n->timer, neigh_timer_handler, 0);
@@ -421,6 +450,7 @@ do_alloc:
refcount_set(&n->refcnt, 1);
n->dead = 1;
INIT_LIST_HEAD(&n->gc_list);
+ INIT_LIST_HEAD(&n->managed_list);
atomic_inc(&tbl->entries);
out:
@@ -575,19 +605,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
}
EXPORT_SYMBOL(neigh_lookup_nodev);
-static struct neighbour *___neigh_create(struct neigh_table *tbl,
- const void *pkey,
- struct net_device *dev,
- bool exempt_from_gc, bool want_ref)
+static struct neighbour *
+___neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, u32 flags,
+ bool exempt_from_gc, bool want_ref)
{
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
- u32 hash_val;
- unsigned int key_len = tbl->key_len;
- int error;
+ u32 hash_val, key_len = tbl->key_len;
+ struct neighbour *n1, *rc, *n;
struct neigh_hash_table *nht;
+ int error;
+ n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
-
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
@@ -650,7 +679,8 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
n->dead = 0;
if (!exempt_from_gc)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
-
+ if (n->flags & NTF_MANAGED)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
rcu_assign_pointer(n->next,
@@ -674,7 +704,7 @@ out_neigh_release:
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
- return ___neigh_create(tbl, pkey, dev, false, want_ref);
+ return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
}
EXPORT_SYMBOL(__neigh_create);
@@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
}
}
-
-
/* Generic update routine.
-- lladdr is new lladdr or NULL, if it is not supplied.
-- new is new state.
@@ -1217,7 +1245,8 @@ static void neigh_update_hhs(struct neighbour *neigh)
lladdr instead of overriding it
if it is different.
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
-
+ NEIGH_UPDATE_F_USE means that the entry is user triggered.
+ NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
NTF_ROUTER flag.
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
@@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
Caller MUST hold reference count on the entry.
*/
-
static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
u8 new, u32 flags, u32 nlmsg_pid,
struct netlink_ext_ack *extack)
{
- bool ext_learn_change = false;
- u8 old;
- int err;
- int notify = 0;
- struct net_device *dev;
+ bool gc_update = false, managed_update = false;
int update_isrouter = 0;
+ struct net_device *dev;
+ int err, notify = 0;
+ u8 old;
trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
@@ -1254,7 +1281,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
- ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
+ neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
+ if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
+ new = old & ~NUD_PERMANENT;
+ neigh->nud_state = new;
+ err = 0;
+ goto out;
+ }
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
@@ -1399,15 +1432,13 @@ out:
if (update_isrouter)
neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
-
- if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+ if (((new ^ old) & NUD_PERMANENT) || gc_update)
neigh_update_gc_list(neigh);
-
+ if (managed_update)
+ neigh_update_managed_list(neigh);
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
-
trace_neigh_update_done(neigh, err);
-
return err;
}
@@ -1533,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
}
EXPORT_SYMBOL(neigh_direct_output);
+static void neigh_managed_work(struct work_struct *work)
+{
+ struct neigh_table *tbl = container_of(work, struct neigh_table,
+ managed_work.work);
+ struct neighbour *neigh;
+
+ write_lock_bh(&tbl->lock);
+ list_for_each_entry(neigh, &tbl->managed_list, managed_list)
+ neigh_event_send(neigh, NULL);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
+ NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
+ write_unlock_bh(&tbl->lock);
+}
+
static void neigh_proxy_process(struct timer_list *t)
{
struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
@@ -1679,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
INIT_LIST_HEAD(&tbl->parms_list);
INIT_LIST_HEAD(&tbl->gc_list);
+ INIT_LIST_HEAD(&tbl->managed_list);
+
list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
refcount_set(&tbl->parms.refcnt, 1);
@@ -1710,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
rwlock_init(&tbl->lock);
+
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
tbl->parms.reachable_time);
+ INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
+
timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
skb_queue_head_init_class(&tbl->proxy_queue,
&neigh_table_proxy_queue_class);
@@ -1783,6 +1834,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
[NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FLAGS_EXT] = { .type = NLA_U32 },
[NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
};
@@ -1855,7 +1907,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
@@ -1864,6 +1916,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct neighbour *neigh;
void *dst, *lladdr;
u8 protocol = 0;
+ u32 ndm_flags;
int err;
ASSERT_RTNL();
@@ -1879,6 +1932,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
}
ndm = nlmsg_data(nlh);
+ ndm_flags = ndm->ndm_flags;
+ if (tb[NDA_FLAGS_EXT]) {
+ u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
+
+ if (ext & ~NTF_EXT_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid extended flags");
+ goto out;
+ }
+ ndm_flags |= (ext << NTF_EXT_SHIFT);
+ }
if (ndm->ndm_ifindex) {
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
@@ -1906,14 +1969,18 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[NDA_PROTOCOL])
protocol = nla_get_u8(tb[NDA_PROTOCOL]);
-
- if (ndm->ndm_flags & NTF_PROXY) {
+ if (ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
+ if (ndm_flags & NTF_MANAGED) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
+ goto out;
+ }
+
err = -ENOBUFS;
pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
- pn->flags = ndm->ndm_flags;
+ pn->flags = ndm_flags;
if (protocol)
pn->protocol = protocol;
err = 0;
@@ -1941,8 +2008,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
}
exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
- ndm->ndm_flags & NTF_EXT_LEARNED;
- neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
+ ndm_flags & NTF_EXT_LEARNED;
+ neigh = ___neigh_create(tbl, dst, dev,
+ ndm_flags &
+ (NTF_EXT_LEARNED | NTF_MANAGED),
+ exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
goto out;
@@ -1961,22 +2031,22 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (protocol)
neigh->protocol = protocol;
-
- if (ndm->ndm_flags & NTF_EXT_LEARNED)
+ if (ndm_flags & NTF_EXT_LEARNED)
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
-
- if (ndm->ndm_flags & NTF_ROUTER)
+ if (ndm_flags & NTF_ROUTER)
flags |= NEIGH_UPDATE_F_ISROUTER;
+ if (ndm_flags & NTF_MANAGED)
+ flags |= NEIGH_UPDATE_F_MANAGED;
+ if (ndm_flags & NTF_USE)
+ flags |= NEIGH_UPDATE_F_USE;
- if (ndm->ndm_flags & NTF_USE) {
+ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid, extack);
+ if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
neigh_event_send(neigh, NULL);
err = 0;
- } else
- err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
- NETLINK_CB(skb).portid, extack);
-
+ }
neigh_release(neigh);
-
out:
return err;
}
@@ -2427,6 +2497,7 @@ out:
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
u32 pid, u32 seq, int type, unsigned int flags)
{
+ u32 neigh_flags, neigh_flags_ext;
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
@@ -2436,11 +2507,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
+ neigh_flags = neigh->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = neigh->ops->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = neigh->flags;
+ ndm->ndm_flags = neigh_flags;
ndm->ndm_type = neigh->type;
ndm->ndm_ifindex = neigh->dev->ifindex;
@@ -2471,6 +2545,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2484,6 +2560,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
u32 pid, u32 seq, int type, unsigned int flags,
struct neigh_table *tbl)
{
+ u32 neigh_flags, neigh_flags_ext;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
@@ -2491,11 +2568,14 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
+ neigh_flags = pn->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = tbl->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = pn->flags | NTF_PROXY;
+ ndm->ndm_flags = neigh_flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
@@ -2505,6 +2585,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2820,6 +2902,7 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(struct nda_cacheinfo))
+ nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
@@ -2848,6 +2931,7 @@ static inline size_t pneigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
+ nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6197774048b..d6e4e0b43beb 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -175,6 +175,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
+ struct net_device *netdev = to_net_dev(dev);
+
+ /* The check is also done in change_carrier; this helps returning early
+ * without hitting the trylock/restart in netdev_store.
+ */
+ if (!netdev->netdev_ops->ndo_change_carrier)
+ return -EOPNOTSUPP;
+
return netdev_store(dev, attr, buf, len, change_carrier);
}
@@ -196,6 +204,12 @@ static ssize_t speed_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->ethtool_ops->get_link_ksettings)
+ return ret;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -216,6 +230,12 @@ static ssize_t duplex_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->ethtool_ops->get_link_ksettings)
+ return ret;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -468,6 +488,14 @@ static ssize_t proto_down_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
+ struct net_device *netdev = to_net_dev(dev);
+
+ /* The check is also done in change_proto_down; this helps returning
+ * early without hitting the trylock/restart in netdev_store.
+ */
+ if (!netdev->netdev_ops->ndo_change_proto_down)
+ return -EOPNOTSUPP;
+
return netdev_store(dev, attr, buf, len, change_proto_down);
}
NETDEVICE_SHOW_RW(proto_down, fmt_dec);
@@ -478,6 +506,12 @@ static ssize_t phys_port_id_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The check is also done in dev_get_phys_port_id; this helps returning
+ * early without hitting the trylock/restart below.
+ */
+ if (!netdev->netdev_ops->ndo_get_phys_port_id)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -500,6 +534,13 @@ static ssize_t phys_port_name_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The checks are also done in dev_get_phys_port_name; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->netdev_ops->ndo_get_phys_port_name &&
+ !netdev->netdev_ops->ndo_get_devlink_port)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -522,6 +563,14 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The checks are also done in dev_get_phys_port_name; this helps
+ * returning early without hitting the trylock/restart below. This works
+ * because recurse is false when calling dev_get_port_parent_id.
+ */
+ if (!netdev->netdev_ops->ndo_get_port_parent_id &&
+ !netdev->netdev_ops->ndo_get_devlink_port)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -1226,6 +1275,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
+ /* The check is also done later; this helps returning early without
+ * hitting the trylock/restart below.
+ */
+ if (!dev->netdev_ops->ndo_set_tx_maxrate)
+ return -EOPNOTSUPP;
+
err = kstrtou32(buf, 10, &rate);
if (err < 0)
return err;
@@ -1869,7 +1924,7 @@ static struct class net_class __ro_after_init = {
.get_ownership = net_get_ownership,
};
-#ifdef CONFIG_OF_NET
+#ifdef CONFIG_OF
static int of_dev_node_match(struct device *dev, const void *data)
{
for (; dev; dev = dev->parent) {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a448a9b5bb2d..202fa5eacd0f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -473,7 +473,9 @@ struct net *copy_net_ns(unsigned long flags,
if (rv < 0) {
put_userns:
+#ifdef CONFIG_KEYS
key_remove_domain(net->key_domain);
+#endif
put_user_ns(user_ns);
net_free(net);
dec_ucounts:
@@ -605,7 +607,9 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
dec_net_namespaces(net->ucounts);
+#ifdef CONFIG_KEYS
key_remove_domain(net->key_domain);
+#endif
put_user_ns(net->user_ns);
net_free(net);
}
diff --git a/net/core/of_net.c b/net/core/of_net.c
new file mode 100644
index 000000000000..f1a9bf7578e7
--- /dev/null
+++ b/net/core/of_net.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OF helpers for network devices.
+ *
+ * Initially copied out of arch/powerpc/kernel/prom_parse.c
+ */
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/phy.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/nvmem-consumer.h>
+
+/**
+ * of_get_phy_mode - Get phy mode for given device_node
+ * @np: Pointer to the given device_node
+ * @interface: Pointer to the result
+ *
+ * The function gets phy interface string from property 'phy-mode' or
+ * 'phy-connection-type'. The index in phy_modes table is set in
+ * interface and 0 returned. In case of error interface is set to
+ * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
+ */
+int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
+{
+ const char *pm;
+ int err, i;
+
+ *interface = PHY_INTERFACE_MODE_NA;
+
+ err = of_property_read_string(np, "phy-mode", &pm);
+ if (err < 0)
+ err = of_property_read_string(np, "phy-connection-type", &pm);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
+ if (!strcasecmp(pm, phy_modes(i))) {
+ *interface = i;
+ return 0;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(of_get_phy_mode);
+
+static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
+{
+ struct property *pp = of_find_property(np, name, NULL);
+
+ if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
+ memcpy(addr, pp->value, ETH_ALEN);
+ return 0;
+ }
+ return -ENODEV;
+}
+
+static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
+{
+ struct platform_device *pdev = of_find_device_by_node(np);
+ struct nvmem_cell *cell;
+ const void *mac;
+ size_t len;
+ int ret;
+
+ /* Try lookup by device first, there might be a nvmem_cell_lookup
+ * associated with a given device.
+ */
+ if (pdev) {
+ ret = nvmem_get_mac_address(&pdev->dev, addr);
+ put_device(&pdev->dev);
+ return ret;
+ }
+
+ cell = of_nvmem_cell_get(np, "mac-address");
+ if (IS_ERR(cell))
+ return PTR_ERR(cell);
+
+ mac = nvmem_cell_read(cell, &len);
+ nvmem_cell_put(cell);
+
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
+ }
+
+ memcpy(addr, mac, ETH_ALEN);
+ kfree(mac);
+
+ return 0;
+}
+
+/**
+ * of_get_mac_address()
+ * @np: Caller's Device Node
+ * @addr: Pointer to a six-byte array for the result
+ *
+ * Search the device tree for the best MAC address to use. 'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address. If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree. If any
+ * of the above isn't set, then try to get MAC address from nvmem cell named
+ * 'mac-address'.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the device tree, but were not set by U-Boot. For example, the
+ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+ * addresses. Some older U-Boots only initialized 'local-mac-address'. In
+ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+ * but is all zeros.
+ *
+ * Return: 0 on success and errno in case of error.
+*/
+int of_get_mac_address(struct device_node *np, u8 *addr)
+{
+ int ret;
+
+ if (!np)
+ return -ENODEV;
+
+ ret = of_get_mac_addr(np, "mac-address", addr);
+ if (!ret)
+ return 0;
+
+ ret = of_get_mac_addr(np, "local-mac-address", addr);
+ if (!ret)
+ return 0;
+
+ ret = of_get_mac_addr(np, "address", addr);
+ if (!ret)
+ return 0;
+
+ return of_get_mac_addr_nvmem(np, addr);
+}
+EXPORT_SYMBOL(of_get_mac_address);
+
+/**
+ * of_get_ethdev_address()
+ * @np: Caller's Device Node
+ * @dev: Pointer to netdevice which address will be updated
+ *
+ * Search the device tree for the best MAC address to use.
+ * If found set @dev->dev_addr to that address.
+ *
+ * See documentation of of_get_mac_address() for more information on how
+ * the best address is determined.
+ *
+ * Return: 0 on success and errno in case of error.
+ */
+int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
+{
+ u8 addr[ETH_ALEN];
+ int ret;
+
+ ret = of_get_mac_address(np, addr);
+ if (!ret)
+ eth_hw_addr_set(dev, addr);
+ return ret;
+}
+EXPORT_SYMBOL(of_get_ethdev_address);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8ccce85562a1..2dc1b209ba91 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -301,7 +301,7 @@ int rtnl_unregister(int protocol, int msgtype)
}
link = rtnl_dereference(tab[msgindex]);
- rcu_assign_pointer(tab[msgindex], NULL);
+ RCU_INIT_POINTER(tab[msgindex], NULL);
rtnl_unlock();
kfree_rcu(link, rcu);
@@ -337,7 +337,7 @@ void rtnl_unregister_all(int protocol)
if (!link)
continue;
- rcu_assign_pointer(tab[msgindex], NULL);
+ RCU_INIT_POINTER(tab[msgindex], NULL);
kfree_rcu(link, rcu);
}
rtnl_unlock();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2170bea2c7de..74601bbc56ac 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -134,34 +134,31 @@ struct napi_alloc_cache {
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
-static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask);
-}
-
-void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
-{
fragsz = SKB_DATA_ALIGN(fragsz);
- return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+ return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
}
EXPORT_SYMBOL(__napi_alloc_frag_align);
void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
- struct page_frag_cache *nc;
void *data;
fragsz = SKB_DATA_ALIGN(fragsz);
if (in_hardirq() || irqs_disabled()) {
- nc = this_cpu_ptr(&netdev_alloc_cache);
+ struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
+
data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
} else {
+ struct napi_alloc_cache *nc;
+
local_bh_disable();
- data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+ nc = this_cpu_ptr(&napi_alloc_cache);
+ data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
local_bh_enable();
}
return data;
@@ -397,8 +394,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct kmem_cache *cache;
struct sk_buff *skb;
- u8 *data;
+ unsigned int osize;
bool pfmemalloc;
+ u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
? skbuff_fclone_cache : skbuff_head_cache;
@@ -430,7 +428,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- size = SKB_WITH_OVERHEAD(ksize(data));
+ osize = ksize(data);
+ size = SKB_WITH_OVERHEAD(osize);
prefetchw(data + size);
/*
@@ -439,7 +438,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- __build_skb_around(skb, data, 0);
+ __build_skb_around(skb, data, osize);
skb->pfmemalloc = pfmemalloc;
if (flags & SKB_ALLOC_FCLONE) {
diff --git a/net/core/sock.c b/net/core/sock.c
index c1601f75ec4b..9862eefce21e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -350,7 +350,7 @@ void sk_error_report(struct sock *sk)
}
EXPORT_SYMBOL(sk_error_report);
-static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
+int sock_get_timeout(long timeo, void *optval, bool old_timeval)
{
struct __kernel_sock_timeval tv;
@@ -379,12 +379,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
*(struct __kernel_sock_timeval *)optval = tv;
return sizeof(tv);
}
+EXPORT_SYMBOL(sock_get_timeout);
-static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
- bool old_timeval)
+int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
+ sockptr_t optval, int optlen, bool old_timeval)
{
- struct __kernel_sock_timeval tv;
-
if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
struct old_timeval32 tv32;
@@ -393,8 +392,8 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
return -EFAULT;
- tv.tv_sec = tv32.tv_sec;
- tv.tv_usec = tv32.tv_usec;
+ tv->tv_sec = tv32.tv_sec;
+ tv->tv_usec = tv32.tv_usec;
} else if (old_timeval) {
struct __kernel_old_timeval old_tv;
@@ -402,14 +401,28 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
return -EINVAL;
if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
return -EFAULT;
- tv.tv_sec = old_tv.tv_sec;
- tv.tv_usec = old_tv.tv_usec;
+ tv->tv_sec = old_tv.tv_sec;
+ tv->tv_usec = old_tv.tv_usec;
} else {
- if (optlen < sizeof(tv))
+ if (optlen < sizeof(*tv))
return -EINVAL;
- if (copy_from_sockptr(&tv, optval, sizeof(tv)))
+ if (copy_from_sockptr(tv, optval, sizeof(*tv)))
return -EFAULT;
}
+
+ return 0;
+}
+EXPORT_SYMBOL(sock_copy_user_timeval);
+
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
+ bool old_timeval)
+{
+ struct __kernel_sock_timeval tv;
+ int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
+
+ if (err)
+ return err;
+
if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
return -EDOM;
@@ -947,6 +960,53 @@ void sock_set_mark(struct sock *sk, u32 val)
}
EXPORT_SYMBOL(sock_set_mark);
+static void sock_release_reserved_memory(struct sock *sk, int bytes)
+{
+ /* Round down bytes to multiple of pages */
+ bytes &= ~(SK_MEM_QUANTUM - 1);
+
+ WARN_ON(bytes > sk->sk_reserved_mem);
+ sk->sk_reserved_mem -= bytes;
+ sk_mem_reclaim(sk);
+}
+
+static int sock_reserve_memory(struct sock *sk, int bytes)
+{
+ long allocated;
+ bool charged;
+ int pages;
+
+ if (!mem_cgroup_sockets_enabled || !sk->sk_memcg)
+ return -EOPNOTSUPP;
+
+ if (!bytes)
+ return 0;
+
+ pages = sk_mem_pages(bytes);
+
+ /* pre-charge to memcg */
+ charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ if (!charged)
+ return -ENOMEM;
+
+ /* pre-charge to forward_alloc */
+ allocated = sk_memory_allocated_add(sk, pages);
+ /* If the system goes into memory pressure with this
+ * precharge, give up and return error.
+ */
+ if (allocated > sk_prot_mem_limits(sk, 1)) {
+ sk_memory_allocated_sub(sk, pages);
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
+ return -ENOMEM;
+ }
+ sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT;
+
+ sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT;
+
+ return 0;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -1367,6 +1427,23 @@ set_sndbuf:
~SOCK_BUF_LOCK_MASK);
break;
+ case SO_RESERVE_MEM:
+ {
+ int delta;
+
+ if (val < 0) {
+ ret = -EINVAL;
+ break;
+ }
+
+ delta = val - sk->sk_reserved_mem;
+ if (delta < 0)
+ sock_release_reserved_memory(sk, -delta);
+ else
+ ret = sock_reserve_memory(sk, delta);
+ break;
+ }
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1750,6 +1827,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
break;
+ case SO_RESERVE_MEM:
+ v.val = sk->sk_reserved_mem;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -2063,6 +2144,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+ newsk->sk_reserved_mem = 0;
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
diff --git a/net/core/stream.c b/net/core/stream.c
index 4f1d4aa5fb38..e09ffd410685 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -202,7 +202,7 @@ void sk_stream_kill_queues(struct sock *sk)
WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
- sk_mem_reclaim(sk);
+ sk_mem_reclaim_final(sk);
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);