summaryrefslogtreecommitdiff
path: root/kernel/seccomp.c
diff options
context:
space:
mode:
authorYiFei Zhu <yifeifz2@illinois.edu>2020-11-11 07:33:54 -0600
committerKees Cook <keescook@chromium.org>2020-11-20 11:16:35 -0800
commit0d8315dddd2899f519fe1ca3d4d5cdaf44ea421e (patch)
tree5e761960d24bde7cb22cbb01ec797751f1823a15 /kernel/seccomp.c
parent445247b02342a05b7d528bba6d85d2d418875b69 (diff)
seccomp/cache: Report cache data through /proc/pid/seccomp_cache
Currently the kernel does not provide an infrastructure to translate architecture numbers to a human-readable name. Translating syscall numbers to syscall names is possible through FTRACE_SYSCALL infrastructure but it does not provide support for compat syscalls. This will create a file for each PID as /proc/pid/seccomp_cache. The file will be empty when no seccomp filters are loaded, or be in the format of: <arch name> <decimal syscall number> <ALLOW | FILTER> where ALLOW means the cache is guaranteed to allow the syscall, and filter means the cache will pass the syscall to the BPF filter. For the docker default profile on x86_64 it looks like: x86_64 0 ALLOW x86_64 1 ALLOW x86_64 2 ALLOW x86_64 3 ALLOW [...] x86_64 132 ALLOW x86_64 133 ALLOW x86_64 134 FILTER x86_64 135 FILTER x86_64 136 FILTER x86_64 137 ALLOW x86_64 138 ALLOW x86_64 139 FILTER x86_64 140 ALLOW x86_64 141 ALLOW [...] This file is guarded by CONFIG_SECCOMP_CACHE_DEBUG with a default of N because I think certain users of seccomp might not want the application to know which syscalls are definitely usable. For the same reason, it is also guarded by CAP_SYS_ADMIN. Suggested-by: Jann Horn <jannh@google.com> Link: https://lore.kernel.org/lkml/CAG48ez3Ofqp4crXGksLmZY6=fGrF_tWyUCg7PBkAetvbbOPeOA@mail.gmail.com/ Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu> Signed-off-by: Kees Cook <keescook@chromium.org> Link: https://lore.kernel.org/r/94e663fa53136f5a11f432c661794d1ee7060779.1605101222.git.yifeifz2@illinois.edu
Diffstat (limited to 'kernel/seccomp.c')
-rw-r--r--kernel/seccomp.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d8cf468dbe1e..76f524e320b1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -553,6 +553,9 @@ void seccomp_filter_release(struct task_struct *tsk)
{
struct seccomp_filter *orig = tsk->seccomp.filter;
+ /* We are effectively holding the siglock by not having any sighand. */
+ WARN_ON(tsk->sighand != NULL);
+
/* Detach task from its filter tree. */
tsk->seccomp.filter = NULL;
__seccomp_filter_release(orig);
@@ -2335,3 +2338,59 @@ static int __init seccomp_sysctl_init(void)
device_initcall(seccomp_sysctl_init)
#endif /* CONFIG_SYSCTL */
+
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+/* Currently CONFIG_SECCOMP_CACHE_DEBUG implies SECCOMP_ARCH_NATIVE */
+static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
+ const void *bitmap, size_t bitmap_size)
+{
+ int nr;
+
+ for (nr = 0; nr < bitmap_size; nr++) {
+ bool cached = test_bit(nr, bitmap);
+ char *status = cached ? "ALLOW" : "FILTER";
+
+ seq_printf(m, "%s %d %s\n", name, nr, status);
+ }
+}
+
+int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ struct seccomp_filter *f;
+ unsigned long flags;
+
+ /*
+ * We don't want some sandboxed process to know what their seccomp
+ * filters consist of.
+ */
+ if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
+ return -EACCES;
+
+ if (!lock_task_sighand(task, &flags))
+ return -ESRCH;
+
+ f = READ_ONCE(task->seccomp.filter);
+ if (!f) {
+ unlock_task_sighand(task, &flags);
+ return 0;
+ }
+
+ /* prevent filter from being freed while we are printing it */
+ __get_seccomp_filter(f);
+ unlock_task_sighand(task, &flags);
+
+ proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
+ f->cache.allow_native,
+ SECCOMP_ARCH_NATIVE_NR);
+
+#ifdef SECCOMP_ARCH_COMPAT
+ proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
+ f->cache.allow_compat,
+ SECCOMP_ARCH_COMPAT_NR);
+#endif /* SECCOMP_ARCH_COMPAT */
+
+ __put_seccomp_filter(f);
+ return 0;
+}
+#endif /* CONFIG_SECCOMP_CACHE_DEBUG */