summaryrefslogtreecommitdiff
path: root/kernel/acct.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/acct.c')
-rw-r--r--kernel/acct.c228
1 files changed, 117 insertions, 111 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 010667ce6080..2a2b3c874acd 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -44,19 +44,14 @@
* a struct file opened for write. Fixed. 2/6/2000, AV.
*/
-#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/acct.h>
#include <linux/capability.h>
-#include <linux/file.h>
#include <linux/tty.h>
-#include <linux/security.h>
-#include <linux/vfs.h>
+#include <linux/statfs.h>
#include <linux/jiffies.h>
-#include <linux/times.h>
#include <linux/syscalls.h>
-#include <linux/mount.h>
-#include <linux/uaccess.h>
+#include <linux/namei.h>
#include <linux/sched/cputime.h>
#include <asm/div64.h>
@@ -76,7 +71,7 @@ static int acct_parm[3] = {4, 2, 30};
#define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */
#ifdef CONFIG_SYSCTL
-static struct ctl_table kern_acct_table[] = {
+static const struct ctl_table kern_acct_table[] = {
{
.procname = "acct",
.data = &acct_parm,
@@ -84,7 +79,6 @@ static struct ctl_table kern_acct_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static __init int kernel_acct_sysctls_init(void)
@@ -104,48 +98,50 @@ struct bsd_acct_struct {
atomic_long_t count;
struct rcu_head rcu;
struct mutex lock;
- int active;
+ bool active;
+ bool check_space;
unsigned long needcheck;
struct file *file;
struct pid_namespace *ns;
struct work_struct work;
struct completion done;
+ acct_t ac;
};
-static void do_acct_process(struct bsd_acct_struct *acct);
+static void fill_ac(struct bsd_acct_struct *acct);
+static void acct_write_process(struct bsd_acct_struct *acct);
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static int check_free_space(struct bsd_acct_struct *acct)
+static bool check_free_space(struct bsd_acct_struct *acct)
{
struct kstatfs sbuf;
- if (time_is_after_jiffies(acct->needcheck))
- goto out;
+ if (!acct->check_space)
+ return acct->active;
/* May block */
if (vfs_statfs(&acct->file->f_path, &sbuf))
- goto out;
+ return acct->active;
if (acct->active) {
u64 suspend = sbuf.f_blocks * SUSPEND;
do_div(suspend, 100);
if (sbuf.f_bavail <= suspend) {
- acct->active = 0;
+ acct->active = false;
pr_info("Process accounting paused\n");
}
} else {
u64 resume = sbuf.f_blocks * RESUME;
do_div(resume, 100);
if (sbuf.f_bavail >= resume) {
- acct->active = 1;
+ acct->active = true;
pr_info("Process accounting resumed\n");
}
}
acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
-out:
return acct->active;
}
@@ -190,7 +186,11 @@ static void acct_pin_kill(struct fs_pin *pin)
{
struct bsd_acct_struct *acct = to_acct(pin);
mutex_lock(&acct->lock);
- do_acct_process(acct);
+ /*
+ * Fill the accounting struct with the exiting task's info
+ * before punting to the workqueue.
+ */
+ fill_ac(acct);
schedule_work(&acct->work);
wait_for_completion(&acct->done);
cmpxchg(&acct->ns->bacct, pin, NULL);
@@ -203,76 +203,79 @@ static void close_work(struct work_struct *work)
{
struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
struct file *file = acct->file;
+
+ /* We were fired by acct_pin_kill() which holds acct->lock. */
+ acct_write_process(acct);
if (file->f_op->flush)
file->f_op->flush(file, NULL);
__fput_sync(file);
complete(&acct->done);
}
-static int acct_on(struct filename *pathname)
+DEFINE_FREE(fput_sync, struct file *, if (!IS_ERR_OR_NULL(_T)) __fput_sync(_T))
+static int acct_on(const char __user *name)
{
- struct file *file;
- struct vfsmount *mnt, *internal;
+ /* Difference from BSD - they don't do O_APPEND */
+ const int open_flags = O_WRONLY|O_APPEND|O_LARGEFILE;
struct pid_namespace *ns = task_active_pid_ns(current);
+ struct filename *pathname __free(putname) = getname(name);
+ struct file *original_file __free(fput) = NULL; // in that order
+ struct path internal __free(path_put) = {}; // in that order
+ struct file *file __free(fput_sync) = NULL; // in that order
struct bsd_acct_struct *acct;
+ struct vfsmount *mnt;
struct fs_pin *old;
- int err;
- acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
- if (!acct)
- return -ENOMEM;
+ if (IS_ERR(pathname))
+ return PTR_ERR(pathname);
+ original_file = file_open_name(pathname, open_flags, 0);
+ if (IS_ERR(original_file))
+ return PTR_ERR(original_file);
- /* Difference from BSD - they don't do O_APPEND */
- file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
- if (IS_ERR(file)) {
- kfree(acct);
+ mnt = mnt_clone_internal(&original_file->f_path);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ internal.mnt = mnt;
+ internal.dentry = dget(mnt->mnt_root);
+
+ file = dentry_open(&internal, open_flags, current_cred());
+ if (IS_ERR(file))
return PTR_ERR(file);
- }
- if (!S_ISREG(file_inode(file)->i_mode)) {
- kfree(acct);
- filp_close(file, NULL);
+ if (!S_ISREG(file_inode(file)->i_mode))
return -EACCES;
- }
- if (!(file->f_mode & FMODE_CAN_WRITE)) {
- kfree(acct);
- filp_close(file, NULL);
+ /* Exclude kernel kernel internal filesystems. */
+ if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT))
+ return -EINVAL;
+
+ /* Exclude procfs and sysfs. */
+ if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE)
+ return -EINVAL;
+
+ if (!(file->f_mode & FMODE_CAN_WRITE))
return -EIO;
- }
- internal = mnt_clone_internal(&file->f_path);
- if (IS_ERR(internal)) {
- kfree(acct);
- filp_close(file, NULL);
- return PTR_ERR(internal);
- }
- err = __mnt_want_write(internal);
- if (err) {
- mntput(internal);
- kfree(acct);
- filp_close(file, NULL);
- return err;
- }
- mnt = file->f_path.mnt;
- file->f_path.mnt = internal;
+
+ acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+ if (!acct)
+ return -ENOMEM;
atomic_long_set(&acct->count, 1);
init_fs_pin(&acct->pin, acct_pin_kill);
- acct->file = file;
+ acct->file = no_free_ptr(file);
acct->needcheck = jiffies;
acct->ns = ns;
mutex_init(&acct->lock);
INIT_WORK(&acct->work, close_work);
init_completion(&acct->done);
mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
- pin_insert(&acct->pin, mnt);
+ pin_insert(&acct->pin, original_file->f_path.mnt);
rcu_read_lock();
old = xchg(&ns->bacct, &acct->pin);
mutex_unlock(&acct->lock);
pin_kill(old);
- __mnt_drop_write(mnt);
- mntput(mnt);
return 0;
}
@@ -297,14 +300,9 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
return -EPERM;
if (name) {
- struct filename *tmp = getname(name);
-
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
mutex_lock(&acct_on_mutex);
- error = acct_on(tmp);
+ error = acct_on(name);
mutex_unlock(&acct_on_mutex);
- putname(tmp);
} else {
rcu_read_lock();
pin_kill(task_active_pid_ns(current)->bacct);
@@ -431,13 +429,27 @@ static u32 encode_float(u64 value)
* do_exit() or when switching to a different output file.
*/
-static void fill_ac(acct_t *ac)
+static void fill_ac(struct bsd_acct_struct *acct)
{
struct pacct_struct *pacct = &current->signal->pacct;
+ struct file *file = acct->file;
+ acct_t *ac = &acct->ac;
u64 elapsed, run_time;
time64_t btime;
struct tty_struct *tty;
+ lockdep_assert_held(&acct->lock);
+
+ if (time_is_after_jiffies(acct->needcheck)) {
+ acct->check_space = false;
+
+ /* Don't fill in @ac if nothing will be written. */
+ if (!acct->active)
+ return;
+ } else {
+ acct->check_space = true;
+ }
+
/*
* Fill the accounting struct with the needed info as recorded
* by the different kernel functions.
@@ -445,7 +457,7 @@ static void fill_ac(acct_t *ac)
memset(ac, 0, sizeof(acct_t));
ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER;
- strlcpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm));
+ strscpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm));
/* calculate run_time in nsec*/
run_time = ktime_get_ns();
@@ -470,7 +482,7 @@ static void fill_ac(acct_t *ac)
do_div(elapsed, AHZ);
btime = ktime_get_real_seconds() - elapsed;
ac->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
-#if ACCT_VERSION==2
+#if ACCT_VERSION == 2
ac->ac_ahz = AHZ;
#endif
@@ -485,64 +497,58 @@ static void fill_ac(acct_t *ac)
ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
ac->ac_exitcode = pacct->ac_exitcode;
spin_unlock_irq(&current->sighand->siglock);
-}
-/*
- * do_acct_process does all actual work. Caller holds the reference to file.
- */
-static void do_acct_process(struct bsd_acct_struct *acct)
-{
- acct_t ac;
- unsigned long flim;
- const struct cred *orig_cred;
- struct file *file = acct->file;
- /*
- * Accounting records are not subject to resource limits.
- */
- flim = rlimit(RLIMIT_FSIZE);
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
- /* Perform file operations on behalf of whoever enabled accounting */
- orig_cred = override_creds(file->f_cred);
-
- /*
- * First check to see if there is enough free_space to continue
- * the process accounting system.
- */
- if (!check_free_space(acct))
- goto out;
-
- fill_ac(&ac);
/* we really need to bite the bullet and change layout */
- ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
- ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
+ ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid());
+ ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid());
#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/* backward-compatible 16 bit fields */
- ac.ac_uid16 = ac.ac_uid;
- ac.ac_gid16 = ac.ac_gid;
+ ac->ac_uid16 = ac->ac_uid;
+ ac->ac_gid16 = ac->ac_gid;
#elif ACCT_VERSION == 3
{
struct pid_namespace *ns = acct->ns;
- ac.ac_pid = task_tgid_nr_ns(current, ns);
+ ac->ac_pid = task_tgid_nr_ns(current, ns);
rcu_read_lock();
- ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent),
- ns);
+ ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
rcu_read_unlock();
}
#endif
- /*
- * Get freeze protection. If the fs is frozen, just skip the write
- * as we could deadlock the system otherwise.
- */
- if (file_start_write_trylock(file)) {
- /* it's been opened O_APPEND, so position is irrelevant */
- loff_t pos = 0;
- __kernel_write(file, &ac, sizeof(acct_t), &pos);
- file_end_write(file);
+}
+
+static void acct_write_process(struct bsd_acct_struct *acct)
+{
+ struct file *file = acct->file;
+ acct_t *ac = &acct->ac;
+
+ /* Perform file operations on behalf of whoever enabled accounting */
+ scoped_with_creds(file->f_cred) {
+ /*
+ * First check to see if there is enough free_space to continue
+ * the process accounting system. Then get freeze protection. If
+ * the fs is frozen, just skip the write as we could deadlock
+ * the system otherwise.
+ */
+ if (check_free_space(acct) && file_start_write_trylock(file)) {
+ /* it's been opened O_APPEND, so position is irrelevant */
+ loff_t pos = 0;
+ __kernel_write(file, ac, sizeof(acct_t), &pos);
+ file_end_write(file);
+ }
}
-out:
+}
+
+static void do_acct_process(struct bsd_acct_struct *acct)
+{
+ unsigned long flim;
+
+ /* Accounting records are not subject to resource limits. */
+ flim = rlimit(RLIMIT_FSIZE);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+ fill_ac(acct);
+ acct_write_process(acct);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
- revert_creds(orig_cred);
}
/**