From 59eda0e07f43c950d31756213b607af673e551f0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 17:53:21 -0500 Subject: new fs_pin killing logics Signed-off-by: Al Viro --- fs/fs_pin.c | 54 +++++++++++++++++++++++++---- include/linux/fs_pin.h | 13 ++++++- include/linux/pid_namespace.h | 4 +-- kernel/acct.c | 81 ++++++++++++++++++------------------------- 4 files changed, 96 insertions(+), 56 deletions(-) diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 50ef7d2ef03c..0c77bdc238b2 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -1,4 +1,5 @@ #include +#include #include #include #include "internal.h" @@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin) hlist_del(&pin->m_list); hlist_del(&pin->s_list); spin_unlock(&pin_lock); + spin_lock_irq(&pin->wait.lock); + pin->done = 1; + wake_up_locked(&pin->wait); + spin_unlock_irq(&pin->wait.lock); } void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) @@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) pin_insert_group(pin, m, &m->mnt_sb->s_pins); } +void pin_kill(struct fs_pin *p) +{ + wait_queue_t wait; + + if (!p) { + rcu_read_unlock(); + return; + } + init_wait(&wait); + spin_lock_irq(&p->wait.lock); + if (likely(!p->done)) { + p->done = -1; + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + p->kill(p); + return; + } + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + return; + } + __add_wait_queue(&p->wait, &wait); + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + if (likely(list_empty(&wait.task_list))) + break; + /* OK, we know p couldn't have been freed yet */ + spin_lock_irq(&p->wait.lock); + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + break; + } + } + rcu_read_unlock(); +} + void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; - struct fs_pin *pin; rcu_read_lock(); p = ACCESS_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; } - pin = hlist_entry(p, struct fs_pin, m_list); - pin->kill(pin); + pin_kill(hlist_entry(p, struct fs_pin, m_list)); } } @@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p) { while (1) { struct hlist_node *q; - struct fs_pin *pin; rcu_read_lock(); q = ACCESS_ONCE(p->first); if (!q) { rcu_read_unlock(); break; } - pin = hlist_entry(q, struct fs_pin, s_list); - pin->kill(pin); + pin_kill(hlist_entry(q, struct fs_pin, s_list)); } } diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 2be38d1464ae..9dc4e0384bfb 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -1,11 +1,22 @@ -#include +#include struct fs_pin { + wait_queue_head_t wait; + int done; struct hlist_node s_list; struct hlist_node m_list; void (*kill)(struct fs_pin *); }; +struct vfsmount; + +static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) +{ + init_waitqueue_head(&p->wait); + p->kill = kill; +} + void pin_remove(struct fs_pin *); void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); void pin_insert(struct fs_pin *, struct vfsmount *); +void pin_kill(struct fs_pin *); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b9cf6c51b181..918b117a7cd3 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -19,7 +19,7 @@ struct pidmap { #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) #define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) -struct bsd_acct_struct; +struct fs_pin; struct pid_namespace { struct kref kref; @@ -37,7 +37,7 @@ struct pid_namespace { struct dentry *proc_thread_self; #endif #ifdef CONFIG_BSD_PROCESS_ACCT - struct bsd_acct_struct *bacct; + struct fs_pin *bacct; #endif struct user_namespace *user_ns; struct work_struct proc_work; diff --git a/kernel/acct.c b/kernel/acct.c index cf6588ab517b..e6c10d1a4058 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(struct bsd_acct_struct *acct); struct bsd_acct_struct { struct fs_pin pin; @@ -91,6 +90,8 @@ struct bsd_acct_struct { struct completion done; }; +static void do_acct_process(struct bsd_acct_struct *acct); + /* * Check the amount of free space and suspend/resume accordingly. */ @@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p) kfree_rcu(p, rcu); } +static inline struct bsd_acct_struct *to_acct(struct fs_pin *p) +{ + return p ? container_of(p, struct bsd_acct_struct, pin) : NULL; +} + static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; again: smp_rmb(); rcu_read_lock(); - res = ACCESS_ONCE(ns->bacct); + res = to_acct(ACCESS_ONCE(ns->bacct)); if (!res) { rcu_read_unlock(); return NULL; @@ -150,7 +156,7 @@ again: } rcu_read_unlock(); mutex_lock(&res->lock); - if (!res->ns) { + if (res != to_acct(ACCESS_ONCE(ns->bacct))) { mutex_unlock(&res->lock); acct_put(res); goto again; @@ -158,6 +164,19 @@ again: return res; } +static void acct_pin_kill(struct fs_pin *pin) +{ + struct bsd_acct_struct *acct = to_acct(pin); + mutex_lock(&acct->lock); + do_acct_process(acct); + schedule_work(&acct->work); + wait_for_completion(&acct->done); + cmpxchg(&acct->ns->bacct, pin, NULL); + mutex_unlock(&acct->lock); + pin_remove(pin); + acct_put(acct); +} + static void close_work(struct work_struct *work) { struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); @@ -168,49 +187,13 @@ static void close_work(struct work_struct *work) complete(&acct->done); } -static void acct_kill(struct bsd_acct_struct *acct) -{ - if (acct) { - struct pid_namespace *ns = acct->ns; - do_acct_process(acct); - INIT_WORK(&acct->work, close_work); - init_completion(&acct->done); - schedule_work(&acct->work); - wait_for_completion(&acct->done); - pin_remove(&acct->pin); - cmpxchg(&ns->bacct, acct, NULL); - acct->ns = NULL; - atomic_long_dec(&acct->count); - mutex_unlock(&acct->lock); - acct_put(acct); - } -} - -static void acct_pin_kill(struct fs_pin *pin) -{ - struct bsd_acct_struct *acct; - acct = container_of(pin, struct bsd_acct_struct, pin); - if (!atomic_long_inc_not_zero(&acct->count)) { - rcu_read_unlock(); - cpu_relax(); - return; - } - rcu_read_unlock(); - mutex_lock(&acct->lock); - if (!acct->ns) { - mutex_unlock(&acct->lock); - acct_put(acct); - acct = NULL; - } - acct_kill(acct); -} - static int acct_on(struct filename *pathname) { struct file *file; struct vfsmount *mnt, *internal; struct pid_namespace *ns = task_active_pid_ns(current); - struct bsd_acct_struct *acct, *old; + struct bsd_acct_struct *acct; + struct fs_pin *old; int err; acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); @@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname) file->f_path.mnt = internal; atomic_long_set(&acct->count, 1); - acct->pin.kill = acct_pin_kill; + init_fs_pin(&acct->pin, acct_pin_kill); acct->file = file; acct->needcheck = jiffies; acct->ns = ns; mutex_init(&acct->lock); + INIT_WORK(&acct->work, close_work); + init_completion(&acct->done); mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ pin_insert(&acct->pin, mnt); - old = acct_get(ns); - ns->bacct = acct; - acct_kill(old); + rcu_read_lock(); + old = xchg(&ns->bacct, &acct->pin); mutex_unlock(&acct->lock); + pin_kill(old); mnt_drop_write(mnt); mntput(mnt); return 0; @@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) mutex_unlock(&acct_on_mutex); putname(tmp); } else { - acct_kill(acct_get(task_active_pid_ns(current))); + rcu_read_lock(); + pin_kill(task_active_pid_ns(current)->bacct); } return error; @@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) void acct_exit_ns(struct pid_namespace *ns) { - acct_kill(acct_get(ns)); + rcu_read_lock(); + pin_kill(ns->bacct); } /* -- cgit