summaryrefslogtreecommitdiff
path: root/kernel/sys.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c158
1 files changed, 120 insertions, 38 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index e219fcfa112d..adc0de0aa364 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
#include <linux/user_namespace.h>
#include <linux/time_namespace.h>
#include <linux/binfmts.h>
+#include <linux/futex.h>
#include <linux/sched.h>
#include <linux/sched/autogroup.h>
@@ -75,6 +76,8 @@
#include <asm/io.h>
#include <asm/unistd.h>
+#include <trace/events/task.h>
+
#include "uid16.h"
#ifndef SET_UNALIGN_CTL
@@ -146,6 +149,15 @@
#ifndef RISCV_V_GET_CONTROL
# define RISCV_V_GET_CONTROL() (-EINVAL)
#endif
+#ifndef RISCV_SET_ICACHE_FLUSH_CTX
+# define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL)
+#endif
+#ifndef PPC_GET_DEXCR_ASPECT
+# define PPC_GET_DEXCR_ASPECT(a, b) (-EINVAL)
+#endif
+#ifndef PPC_SET_DEXCR_ASPECT
+# define PPC_SET_DEXCR_ASPECT(a, b, c) (-EINVAL)
+#endif
/*
* this is where the system-wide overflow UID and GID are defined, for
@@ -1074,6 +1086,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
{
struct task_struct *p;
struct task_struct *group_leader = current->group_leader;
+ struct pid *pids[PIDTYPE_MAX] = { 0 };
struct pid *pgrp;
int err;
@@ -1131,13 +1144,14 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
goto out;
if (task_pgrp(p) != pgrp)
- change_pid(p, PIDTYPE_PGID, pgrp);
+ change_pid(pids, p, PIDTYPE_PGID, pgrp);
err = 0;
out:
/* All paths lead to here, thus we are safe. -DaveM */
write_unlock_irq(&tasklist_lock);
rcu_read_unlock();
+ free_pids(pids);
return err;
}
@@ -1211,21 +1225,22 @@ out:
return retval;
}
-static void set_special_pids(struct pid *pid)
+static void set_special_pids(struct pid **pids, struct pid *pid)
{
struct task_struct *curr = current->group_leader;
if (task_session(curr) != pid)
- change_pid(curr, PIDTYPE_SID, pid);
+ change_pid(pids, curr, PIDTYPE_SID, pid);
if (task_pgrp(curr) != pid)
- change_pid(curr, PIDTYPE_PGID, pid);
+ change_pid(pids, curr, PIDTYPE_PGID, pid);
}
int ksys_setsid(void)
{
struct task_struct *group_leader = current->group_leader;
struct pid *sid = task_pid(group_leader);
+ struct pid *pids[PIDTYPE_MAX] = { 0 };
pid_t session = pid_vnr(sid);
int err = -EPERM;
@@ -1241,13 +1256,14 @@ int ksys_setsid(void)
goto out;
group_leader->signal->leader = 1;
- set_special_pids(sid);
+ set_special_pids(pids, sid);
proc_clear_tty(group_leader);
err = session;
out:
write_unlock_irq(&tasklist_lock);
+ free_pids(pids);
if (err > 0) {
proc_sid_connector(group_leader);
sched_autogroup_create_attach(group_leader);
@@ -1785,21 +1801,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
struct task_struct *t;
unsigned long flags;
u64 tgutime, tgstime, utime, stime;
- unsigned long maxrss = 0;
+ unsigned long maxrss;
+ struct mm_struct *mm;
struct signal_struct *sig = p->signal;
+ unsigned int seq = 0;
- memset((char *)r, 0, sizeof (*r));
+retry:
+ memset(r, 0, sizeof(*r));
utime = stime = 0;
+ maxrss = 0;
if (who == RUSAGE_THREAD) {
task_cputime_adjusted(current, &utime, &stime);
accumulate_thread_rusage(p, r);
maxrss = sig->maxrss;
- goto out;
+ goto out_thread;
}
- if (!lock_task_sighand(p, &flags))
- return;
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
switch (who) {
case RUSAGE_BOTH:
@@ -1819,9 +1838,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
fallthrough;
case RUSAGE_SELF:
- thread_group_cputime_adjusted(p, &tgutime, &tgstime);
- utime += tgutime;
- stime += tgstime;
r->ru_nvcsw += sig->nvcsw;
r->ru_nivcsw += sig->nivcsw;
r->ru_minflt += sig->min_flt;
@@ -1830,28 +1846,42 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
r->ru_oublock += sig->oublock;
if (maxrss < sig->maxrss)
maxrss = sig->maxrss;
+
+ rcu_read_lock();
__for_each_thread(sig, t)
accumulate_thread_rusage(t, r);
+ rcu_read_unlock();
+
break;
default:
BUG();
}
- unlock_task_sighand(p, &flags);
-out:
- r->ru_utime = ns_to_kernel_old_timeval(utime);
- r->ru_stime = ns_to_kernel_old_timeval(stime);
+ if (need_seqretry(&sig->stats_lock, seq)) {
+ seq = 1;
+ goto retry;
+ }
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
- if (who != RUSAGE_CHILDREN) {
- struct mm_struct *mm = get_task_mm(p);
+ if (who == RUSAGE_CHILDREN)
+ goto out_children;
- if (mm) {
- setmax_mm_hiwater_rss(&maxrss, mm);
- mmput(mm);
- }
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+ utime += tgutime;
+ stime += tgstime;
+
+out_thread:
+ mm = get_task_mm(p);
+ if (mm) {
+ setmax_mm_hiwater_rss(&maxrss, mm);
+ mmput(mm);
}
+
+out_children:
r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
+ r->ru_utime = ns_to_kernel_old_timeval(utime);
+ r->ru_stime = ns_to_kernel_old_timeval(stime);
}
SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
@@ -1888,33 +1918,28 @@ SYSCALL_DEFINE1(umask, int, mask)
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{
- struct fd exe;
+ CLASS(fd, exe)(fd);
struct inode *inode;
int err;
- exe = fdget(fd);
- if (!exe.file)
+ if (fd_empty(exe))
return -EBADF;
- inode = file_inode(exe.file);
+ inode = file_inode(fd_file(exe));
/*
* Because the original mm->exe_file points to executable file, make
* sure that this one is executable as well, to avoid breaking an
* overall picture.
*/
- err = -EACCES;
- if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path))
- goto exit;
+ if (!S_ISREG(inode->i_mode) || path_noexec(&fd_file(exe)->f_path))
+ return -EACCES;
- err = file_permission(exe.file, MAY_EXEC);
+ err = file_permission(fd_file(exe), MAY_EXEC);
if (err)
- goto exit;
+ return err;
- err = replace_mm_exe_file(mm, exe.file);
-exit:
- fdput(exe);
- return err;
+ return replace_mm_exe_file(mm, fd_file(exe));
}
/*
@@ -2301,6 +2326,21 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
return -EINVAL;
}
+int __weak arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status)
+{
+ return -EINVAL;
+}
+
+int __weak arch_set_shadow_stack_status(struct task_struct *t, unsigned long status)
+{
+ return -EINVAL;
+}
+
+int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status)
+{
+ return -EINVAL;
+}
+
#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
#ifdef CONFIG_ANON_VMA_NAME
@@ -2394,8 +2434,11 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
return -EINVAL;
- /* PARISC cannot allow mdwe as it needs writable stacks */
- if (IS_ENABLED(CONFIG_PARISC))
+ /*
+ * EOPNOTSUPP might be more appropriate here in principle, but
+ * existing userspace depends on EINVAL specifically.
+ */
+ if (!arch_memory_deny_write_exec_supported())
return -EINVAL;
current_bits = get_current_mdwe();
@@ -2531,6 +2574,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = current->timer_slack_ns;
break;
case PR_SET_TIMERSLACK:
+ if (rt_or_dl_task_policy(current))
+ break;
if (arg2 <= 0)
current->timer_slack_ns =
current->default_timer_slack_ns;
@@ -2709,6 +2754,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_MDWE:
error = prctl_get_mdwe(arg2, arg3, arg4, arg5);
break;
+ case PR_PPC_GET_DEXCR:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = PPC_GET_DEXCR_ASPECT(me, arg2);
+ break;
+ case PR_PPC_SET_DEXCR:
+ if (arg4 || arg5)
+ return -EINVAL;
+ error = PPC_SET_DEXCR_ASPECT(me, arg2, arg3);
+ break;
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;
@@ -2743,7 +2798,34 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_RISCV_V_GET_CONTROL:
error = RISCV_V_GET_CONTROL();
break;
+ case PR_RISCV_SET_ICACHE_FLUSH_CTX:
+ error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
+ break;
+ case PR_GET_SHADOW_STACK_STATUS:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_get_shadow_stack_status(me, (unsigned long __user *) arg2);
+ break;
+ case PR_SET_SHADOW_STACK_STATUS:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_set_shadow_stack_status(me, arg2);
+ break;
+ case PR_LOCK_SHADOW_STACK_STATUS:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_lock_shadow_stack_status(me, arg2);
+ break;
+ case PR_TIMER_CREATE_RESTORE_IDS:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = posixtimer_create_prctl(arg2);
+ break;
+ case PR_FUTEX_HASH:
+ error = futex_hash_prctl(arg2, arg3, arg4);
+ break;
default:
+ trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5);
error = -EINVAL;
break;
}