summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-28 11:39:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-03-01 10:01:22 -0800
commitf122a08b197d076ccf136c73fae0146875812a88 (patch)
tree56d3d72377837c8316edbcca1d118726d7a109b7 /kernel
parent1d2aea1bcf68992c90218f47405bee29efd722cd (diff)
capability: just use a 'u64' instead of a 'u32[2]' array
Back in 2008 we extended the capability bits from 32 to 64, and we did it by extending the single 32-bit capability word from one word to an array of two words. It was then obfuscated by hiding the "2" behind two macro expansions, with the reasoning being that maybe it gets extended further some day. That reasoning may have been valid at the time, but the last thing we want to do is to extend the capability set any more. And the array of values not only causes source code oddities (with loops to deal with it), but also results in worse code generation. It's a lose-lose situation. So just change the 'u32[2]' into a 'u64' and be done with it. We still have to deal with the fact that the user space interface is designed around an array of these 32-bit values, but that was the case before too, since the array layouts were different (ie user space doesn't use an array of 32-bit values for individual capability masks, but an array of 32-bit slices of multiple masks). So that marshalling of data is actually simplified too, even if it does remain somewhat obscure and odd. This was all triggered by my reaction to the new "cap_isidentical()" introduced recently. By just using a saner data structure, it went from unsigned __capi; CAP_FOR_EACH_U32(__capi) { if (a.cap[__capi] != b.cap[__capi]) return false; } return true; to just being return a.val == b.val; instead. Which is rather more obvious both to humans and to compilers. Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Casey Schaufler <casey@schaufler-ca.com> Cc: Serge Hallyn <serge@hallyn.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Paul Moore <paul@paul-moore.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/capability.c104
-rw-r--r--kernel/umh.c41
3 files changed, 64 insertions, 87 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 93d0b87f3283..addeed3df15d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1295,15 +1295,11 @@ out:
static void audit_log_cap(struct audit_buffer *ab, char *prefix,
kernel_cap_t *cap)
{
- int i;
-
if (cap_isclear(*cap)) {
audit_log_format(ab, " %s=0", prefix);
return;
}
- audit_log_format(ab, " %s=", prefix);
- CAP_FOR_EACH_U32(i)
- audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
+ audit_log_format(ab, " %s=%016llx", prefix, cap->val);
}
static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
diff --git a/kernel/capability.c b/kernel/capability.c
index 339a44dfe2f4..3e058f41df32 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -20,13 +20,6 @@
#include <linux/user_namespace.h>
#include <linux/uaccess.h>
-/*
- * Leveraged for setting/resetting capabilities
- */
-
-const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
-EXPORT_SYMBOL(__cap_empty_set);
-
int file_caps_enabled = 1;
static int __init file_caps_disable(char *str)
@@ -151,6 +144,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
pid_t pid;
unsigned tocopy;
kernel_cap_t pE, pI, pP;
+ struct __user_cap_data_struct kdata[2];
ret = cap_validate_magic(header, &tocopy);
if ((dataptr == NULL) || (ret != 0))
@@ -163,42 +157,46 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
return -EINVAL;
ret = cap_get_target_pid(pid, &pE, &pI, &pP);
- if (!ret) {
- struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
- unsigned i;
-
- for (i = 0; i < tocopy; i++) {
- kdata[i].effective = pE.cap[i];
- kdata[i].permitted = pP.cap[i];
- kdata[i].inheritable = pI.cap[i];
- }
-
- /*
- * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
- * we silently drop the upper capabilities here. This
- * has the effect of making older libcap
- * implementations implicitly drop upper capability
- * bits when they perform a: capget/modify/capset
- * sequence.
- *
- * This behavior is considered fail-safe
- * behavior. Upgrading the application to a newer
- * version of libcap will enable access to the newer
- * capabilities.
- *
- * An alternative would be to return an error here
- * (-ERANGE), but that causes legacy applications to
- * unexpectedly fail; the capget/modify/capset aborts
- * before modification is attempted and the application
- * fails.
- */
- if (copy_to_user(dataptr, kdata, tocopy
- * sizeof(struct __user_cap_data_struct))) {
- return -EFAULT;
- }
- }
+ if (ret)
+ return ret;
- return ret;
+ /*
+ * Annoying legacy format with 64-bit capabilities exposed
+ * as two sets of 32-bit fields, so we need to split the
+ * capability values up.
+ */
+ kdata[0].effective = pE.val; kdata[1].effective = pE.val >> 32;
+ kdata[0].permitted = pP.val; kdata[1].permitted = pP.val >> 32;
+ kdata[0].inheritable = pI.val; kdata[1].inheritable = pI.val >> 32;
+
+ /*
+ * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
+ * we silently drop the upper capabilities here. This
+ * has the effect of making older libcap
+ * implementations implicitly drop upper capability
+ * bits when they perform a: capget/modify/capset
+ * sequence.
+ *
+ * This behavior is considered fail-safe
+ * behavior. Upgrading the application to a newer
+ * version of libcap will enable access to the newer
+ * capabilities.
+ *
+ * An alternative would be to return an error here
+ * (-ERANGE), but that causes legacy applications to
+ * unexpectedly fail; the capget/modify/capset aborts
+ * before modification is attempted and the application
+ * fails.
+ */
+ if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0])))
+ return -EFAULT;
+
+ return 0;
+}
+
+static kernel_cap_t mk_kernel_cap(u32 low, u32 high)
+{
+ return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK };
}
/**
@@ -221,8 +219,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
*/
SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
{
- struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
- unsigned i, tocopy, copybytes;
+ struct __user_cap_data_struct kdata[2] = { { 0, }, };
+ unsigned tocopy, copybytes;
kernel_cap_t inheritable, permitted, effective;
struct cred *new;
int ret;
@@ -246,21 +244,9 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
if (copy_from_user(&kdata, data, copybytes))
return -EFAULT;
- for (i = 0; i < tocopy; i++) {
- effective.cap[i] = kdata[i].effective;
- permitted.cap[i] = kdata[i].permitted;
- inheritable.cap[i] = kdata[i].inheritable;
- }
- while (i < _KERNEL_CAPABILITY_U32S) {
- effective.cap[i] = 0;
- permitted.cap[i] = 0;
- inheritable.cap[i] = 0;
- i++;
- }
-
- effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
- permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
- inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+ effective = mk_kernel_cap(kdata[0].effective, kdata[1].effective);
+ permitted = mk_kernel_cap(kdata[0].permitted, kdata[1].permitted);
+ inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable);
new = prepare_creds();
if (!new)
diff --git a/kernel/umh.c b/kernel/umh.c
index fbf872c624cb..2a4708277335 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -501,9 +501,9 @@ static int proc_cap_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
- unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
- kernel_cap_t new_cap;
- int err, i;
+ unsigned long cap_array[2];
+ kernel_cap_t new_cap, *cap;
+ int err;
if (write && (!capable(CAP_SETPCAP) ||
!capable(CAP_SYS_MODULE)))
@@ -514,14 +514,16 @@ static int proc_cap_handler(struct ctl_table *table, int write,
* userspace if this is a read.
*/
spin_lock(&umh_sysctl_lock);
- for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) {
- if (table->data == CAP_BSET)
- cap_array[i] = usermodehelper_bset.cap[i];
- else if (table->data == CAP_PI)
- cap_array[i] = usermodehelper_inheritable.cap[i];
- else
- BUG();
- }
+ if (table->data == CAP_BSET)
+ cap = &usermodehelper_bset;
+ else if (table->data == CAP_PI)
+ cap = &usermodehelper_inheritable;
+ else
+ BUG();
+
+ /* Legacy format: capabilities are exposed as two 32-bit values */
+ cap_array[0] = (u32) cap->val;
+ cap_array[1] = cap->val >> 32;
spin_unlock(&umh_sysctl_lock);
t = *table;
@@ -535,22 +537,15 @@ static int proc_cap_handler(struct ctl_table *table, int write,
if (err < 0)
return err;
- /*
- * convert from the sysctl array of ulongs to the kernel_cap_t
- * internal representation
- */
- for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
- new_cap.cap[i] = cap_array[i];
+ new_cap.val = (u32)cap_array[0];
+ new_cap.val += (u64)cap_array[1] << 32;
/*
* Drop everything not in the new_cap (but don't add things)
*/
if (write) {
spin_lock(&umh_sysctl_lock);
- if (table->data == CAP_BSET)
- usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
- if (table->data == CAP_PI)
- usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
+ *cap = cap_intersect(*cap, new_cap);
spin_unlock(&umh_sysctl_lock);
}
@@ -561,14 +556,14 @@ struct ctl_table usermodehelper_table[] = {
{
.procname = "bset",
.data = CAP_BSET,
- .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+ .maxlen = 2 * sizeof(unsigned long),
.mode = 0600,
.proc_handler = proc_cap_handler,
},
{
.procname = "inheritable",
.data = CAP_PI,
- .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+ .maxlen = 2 * sizeof(unsigned long),
.mode = 0600,
.proc_handler = proc_cap_handler,
},