From d8ef4b38cb69d907f9b0e889c44d05fc0f890977 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Apr 2020 14:55:35 -0400 Subject: Revert "cgroup: Add memory barriers to plug cgroup_rstat_updated() race window" This reverts commit 9a9e97b2f1f2 ("cgroup: Add memory barriers to plug cgroup_rstat_updated() race window"). The commit was added in anticipation of memcg rstat conversion which needed synchronous accounting for the event counters (e.g. oom kill count). However, the conversion didn't get merged due to percpu memory overhead concern which couldn't be addressed at the time. Unfortunately, the patch's addition of smp_mb() to cgroup_rstat_updated() meant that every scheduling event now had to go through an additional full barrier and Mel Gorman noticed it as 1% regression in netperf UDP_STREAM test. There's no need to have this barrier in tree now and even if we need synchronous accounting in the future, the right thing to do is separating that out to a separate function so that hot paths which don't care about synchronous behavior don't have to pay the overhead of the full barrier. Let's revert. Signed-off-by: Tejun Heo Reported-by: Mel Gorman Link: http://lkml.kernel.org/r/20200409154413.GK3818@techsingularity.net Cc: v4.18+ --- kernel/cgroup/rstat.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 6f87352f8219..41ca996568df 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -33,12 +33,9 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) return; /* - * Paired with the one in cgroup_rstat_cpu_pop_updated(). Either we - * see NULL updated_next or they see our updated stat. - */ - smp_mb(); - - /* + * Speculative already-on-list test. This may race leading to + * temporary inaccuracies, which is fine. + * * Because @parent's updated_children is terminated with @parent * instead of NULL, we can tell whether @cgrp is on the list by * testing the next pointer for NULL. @@ -134,13 +131,6 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, *nextp = rstatc->updated_next; rstatc->updated_next = NULL; - /* - * Paired with the one in cgroup_rstat_cpu_updated(). - * Either they see NULL updated_next or we see their - * updated stat. - */ - smp_mb(); - return pos; } -- cgit From 772b3140669246e1ab32392c490d338e2eb7b803 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 8 Apr 2020 23:27:29 -0700 Subject: xattr: fix uninitialized out-param `removed_sized` isn't correctly initialized (as the doc comment suggests) on memory allocation failures. Fix by moving initialization up a bit. Fixes: 0c47383ba3bd ("kernfs: Add option to enable user xattrs") Reported-by: Dan Carpenter Signed-off-by: Daniel Xu Signed-off-by: Tejun Heo --- fs/xattr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index e13265e65871..91608d9bfc6a 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -876,6 +876,9 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, struct simple_xattr *new_xattr = NULL; int err = 0; + if (removed_size) + *removed_size = -1; + /* value == NULL means remove */ if (value) { new_xattr = simple_xattr_alloc(value, size); @@ -914,9 +917,6 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, list_add(&new_xattr->list, &xattrs->head); xattr = NULL; } - - if (removed_size) - *removed_size = -1; out: spin_unlock(&xattrs->lock); if (xattr) { -- cgit From eec8fd0277e37cf447b88c6be181e81df867bcf1 Mon Sep 17 00:00:00 2001 From: Odin Ugedal Date: Fri, 3 Apr 2020 19:55:28 +0200 Subject: device_cgroup: Cleanup cgroup eBPF device filter code Original cgroup v2 eBPF code for filtering device access made it possible to compile with CONFIG_CGROUP_DEVICE=n and still use the eBPF filtering. Change commit 4b7d4d453fc4 ("device_cgroup: Export devcgroup_check_permission") reverted this, making it required to set it to y. Since the device filtering (and all the docs) for cgroup v2 is no longer a "device controller" like it was in v1, someone might compile their kernel with CONFIG_CGROUP_DEVICE=n. Then (for linux 5.5+) the eBPF filter will not be invoked, and all processes will be allowed access to all devices, no matter what the eBPF filter says. Signed-off-by: Odin Ugedal Acked-by: Roman Gushchin Signed-off-by: Tejun Heo --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- include/linux/device_cgroup.h | 14 +++++--------- security/Makefile | 2 +- security/device_cgroup.c | 19 ++++++++++++++++--- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4a3049841086..c24cad3c64ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1050,7 +1050,7 @@ void kfd_dec_compute_active(struct kfd_dev *dev); /* Check with device cgroup if @kfd device is accessible */ static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) { -#if defined(CONFIG_CGROUP_DEVICE) +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) struct drm_device *ddev = kfd->ddev; return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major, diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index fa35b52e0002..9a72214496e5 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include -#include #define DEVCG_ACC_MKNOD 1 #define DEVCG_ACC_READ 2 @@ -11,16 +10,10 @@ #define DEVCG_DEV_CHAR 2 #define DEVCG_DEV_ALL 4 /* this represents all devices */ -#ifdef CONFIG_CGROUP_DEVICE -int devcgroup_check_permission(short type, u32 major, u32 minor, - short access); -#else -static inline int devcgroup_check_permission(short type, u32 major, u32 minor, - short access) -{ return 0; } -#endif #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) +int devcgroup_check_permission(short type, u32 major, u32 minor, + short access); static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; @@ -61,6 +54,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev) } #else +static inline int devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ return 0; } static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } static inline int devcgroup_inode_mknod(int mode, dev_t dev) diff --git a/security/Makefile b/security/Makefile index 22e73a3482bd..3baf435de541 100644 --- a/security/Makefile +++ b/security/Makefile @@ -30,7 +30,7 @@ obj-$(CONFIG_SECURITY_YAMA) += yama/ obj-$(CONFIG_SECURITY_LOADPIN) += loadpin/ obj-$(CONFIG_SECURITY_SAFESETID) += safesetid/ obj-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown/ -obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o +obj-$(CONFIG_CGROUPS) += device_cgroup.o obj-$(CONFIG_BPF_LSM) += bpf/ # Object integrity file lists diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 7d0f8f7431ff..43ab0ad45c1b 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -15,6 +15,8 @@ #include #include +#ifdef CONFIG_CGROUP_DEVICE + static DEFINE_MUTEX(devcgroup_mutex); enum devcg_behavior { @@ -792,7 +794,7 @@ struct cgroup_subsys devices_cgrp_subsys = { }; /** - * __devcgroup_check_permission - checks if an inode operation is permitted + * devcgroup_legacy_check_permission - checks if an inode operation is permitted * @dev_cgroup: the dev cgroup to be tested against * @type: device type * @major: device major number @@ -801,7 +803,7 @@ struct cgroup_subsys devices_cgrp_subsys = { * * returns 0 on success, -EPERM case the operation is not permitted */ -static int __devcgroup_check_permission(short type, u32 major, u32 minor, +static int devcgroup_legacy_check_permission(short type, u32 major, u32 minor, short access) { struct dev_cgroup *dev_cgroup; @@ -825,6 +827,10 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor, return 0; } +#endif /* CONFIG_CGROUP_DEVICE */ + +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) + int devcgroup_check_permission(short type, u32 major, u32 minor, short access) { int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); @@ -832,6 +838,13 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access) if (rc) return -EPERM; - return __devcgroup_check_permission(type, major, minor, access); + #ifdef CONFIG_CGROUP_DEVICE + return devcgroup_legacy_check_permission(type, major, minor, access); + + #else /* CONFIG_CGROUP_DEVICE */ + return 0; + + #endif /* CONFIG_CGROUP_DEVICE */ } EXPORT_SYMBOL(devcgroup_check_permission); +#endif /* defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) */ -- cgit