diff options
Diffstat (limited to 'fs/btrfs/async-thread.c')
| -rw-r--r-- | fs/btrfs/async-thread.c | 294 |
1 files changed, 131 insertions, 163 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index d522494698fa..6c6f3bb58f4e 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -9,17 +9,18 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/freezer.h> +#include <trace/events/btrfs.h> #include "async-thread.h" -#include "ctree.h" -#define WORK_DONE_BIT 0 -#define WORK_ORDER_DONE_BIT 1 -#define WORK_HIGH_PRIO_BIT 2 +enum { + WORK_DONE_BIT, + WORK_ORDER_DONE_BIT, +}; #define NO_THRESHOLD (-1) -#define DFT_THRESHOLD (32) +#define DEFAULT_THRESHOLD (32) -struct __btrfs_workqueue { +struct btrfs_workqueue { struct workqueue_struct *normal_wq; /* File system this workqueue services */ @@ -46,29 +47,12 @@ struct __btrfs_workqueue { spinlock_t thres_lock; }; -struct btrfs_workqueue { - struct __btrfs_workqueue *normal; - struct __btrfs_workqueue *high; -}; - -static void normal_work_helper(struct btrfs_work *work); - -#define BTRFS_WORK_HELPER(name) \ -noinline_for_stack void btrfs_##name(struct work_struct *arg) \ -{ \ - struct btrfs_work *work = container_of(arg, struct btrfs_work, \ - normal_work); \ - normal_work_helper(work); \ -} - -struct btrfs_fs_info * -btrfs_workqueue_owner(const struct __btrfs_workqueue *wq) +struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct btrfs_workqueue *wq) { return wq->fs_info; } -struct btrfs_fs_info * -btrfs_work_owner(const struct btrfs_work *work) +struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work) { return work->wq->fs_info; } @@ -76,56 +60,43 @@ btrfs_work_owner(const struct btrfs_work *work) bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq) { /* - * We could compare wq->normal->pending with num_online_cpus() + * We could compare wq->pending with num_online_cpus() * to support "thresh == NO_THRESHOLD" case, but it requires * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's * postpone it until someone needs the support of that case. */ - if (wq->normal->thresh == NO_THRESHOLD) + if (wq->thresh == NO_THRESHOLD) return false; - return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2; + return atomic_read(&wq->pending) > wq->thresh * 2; } -BTRFS_WORK_HELPER(worker_helper); -BTRFS_WORK_HELPER(delalloc_helper); -BTRFS_WORK_HELPER(flush_delalloc_helper); -BTRFS_WORK_HELPER(cache_helper); -BTRFS_WORK_HELPER(submit_helper); -BTRFS_WORK_HELPER(fixup_helper); -BTRFS_WORK_HELPER(endio_helper); -BTRFS_WORK_HELPER(endio_meta_helper); -BTRFS_WORK_HELPER(endio_meta_write_helper); -BTRFS_WORK_HELPER(endio_raid56_helper); -BTRFS_WORK_HELPER(endio_repair_helper); -BTRFS_WORK_HELPER(rmw_helper); -BTRFS_WORK_HELPER(endio_write_helper); -BTRFS_WORK_HELPER(freespace_write_helper); -BTRFS_WORK_HELPER(delayed_meta_helper); -BTRFS_WORK_HELPER(readahead_helper); -BTRFS_WORK_HELPER(qgroup_rescan_helper); -BTRFS_WORK_HELPER(extent_refs_helper); -BTRFS_WORK_HELPER(scrub_helper); -BTRFS_WORK_HELPER(scrubwrc_helper); -BTRFS_WORK_HELPER(scrubnc_helper); -BTRFS_WORK_HELPER(scrubparity_helper); - -static struct __btrfs_workqueue * -__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, - unsigned int flags, int limit_active, int thresh) +static void btrfs_init_workqueue(struct btrfs_workqueue *wq, + struct btrfs_fs_info *fs_info) { - struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); + wq->fs_info = fs_info; + atomic_set(&wq->pending, 0); + INIT_LIST_HEAD(&wq->ordered_list); + spin_lock_init(&wq->list_lock); + spin_lock_init(&wq->thres_lock); +} + +struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, + const char *name, unsigned int flags, + int limit_active, int thresh) +{ + struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; - ret->fs_info = fs_info; + btrfs_init_workqueue(ret, fs_info); + ret->limit_active = limit_active; - atomic_set(&ret->pending, 0); if (thresh == 0) - thresh = DFT_THRESHOLD; + thresh = DEFAULT_THRESHOLD; /* For low threshold, disabling threshold is a better choice */ - if (thresh < DFT_THRESHOLD) { + if (thresh < DEFAULT_THRESHOLD) { ret->current_active = limit_active; ret->thresh = NO_THRESHOLD; } else { @@ -138,57 +109,41 @@ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, ret->thresh = thresh; } - if (flags & WQ_HIGHPRI) - ret->normal_wq = alloc_workqueue("%s-%s-high", flags, - ret->current_active, "btrfs", - name); - else - ret->normal_wq = alloc_workqueue("%s-%s", flags, - ret->current_active, "btrfs", - name); + ret->normal_wq = alloc_workqueue("btrfs-%s", flags, ret->current_active, + name); if (!ret->normal_wq) { kfree(ret); return NULL; } - INIT_LIST_HEAD(&ret->ordered_list); - spin_lock_init(&ret->list_lock); - spin_lock_init(&ret->thres_lock); - trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); + trace_btrfs_workqueue_alloc(ret, name); return ret; } -static inline void -__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); - -struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, - const char *name, - unsigned int flags, - int limit_active, - int thresh) +struct btrfs_workqueue *btrfs_alloc_ordered_workqueue( + struct btrfs_fs_info *fs_info, const char *name, + unsigned int flags) { - struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); + struct btrfs_workqueue *ret; + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; - ret->normal = __btrfs_alloc_workqueue(fs_info, name, - flags & ~WQ_HIGHPRI, - limit_active, thresh); - if (!ret->normal) { + btrfs_init_workqueue(ret, fs_info); + + /* Ordered workqueues don't allow @max_active adjustments. */ + ret->limit_active = 1; + ret->current_active = 1; + ret->thresh = NO_THRESHOLD; + + ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name); + if (!ret->normal_wq) { kfree(ret); return NULL; } - if (flags & WQ_HIGHPRI) { - ret->high = __btrfs_alloc_workqueue(fs_info, name, flags, - limit_active, thresh); - if (!ret->high) { - __btrfs_destroy_workqueue(ret->normal); - kfree(ret); - return NULL; - } - } + trace_btrfs_workqueue_alloc(ret, name); return ret; } @@ -197,7 +152,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, * This hook WILL be called in IRQ handler context, * so workqueue_set_max_active MUST NOT be called in this hook */ -static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) +static inline void thresh_queue_hook(struct btrfs_workqueue *wq) { if (wq->thresh == NO_THRESHOLD) return; @@ -209,11 +164,11 @@ static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) * This hook is called in kthread content. * So workqueue_set_max_active is called here. */ -static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) +static inline void thresh_exec_hook(struct btrfs_workqueue *wq) { int new_current_active; long pending; - int need_change = 0; + bool need_change = false; if (wq->thresh == NO_THRESHOLD) return; @@ -241,34 +196,39 @@ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) new_current_active--; new_current_active = clamp_val(new_current_active, 1, wq->limit_active); if (new_current_active != wq->current_active) { - need_change = 1; + need_change = true; wq->current_active = new_current_active; } out: spin_unlock(&wq->thres_lock); - if (need_change) { + if (need_change) workqueue_set_max_active(wq->normal_wq, wq->current_active); - } } -static void run_ordered_work(struct __btrfs_workqueue *wq) +static void run_ordered_work(struct btrfs_workqueue *wq, + struct btrfs_work *self) { struct list_head *list = &wq->ordered_list; struct btrfs_work *work; spinlock_t *lock = &wq->list_lock; unsigned long flags; + bool free_self = false; while (1) { - void *wtag; - spin_lock_irqsave(lock, flags); if (list_empty(list)) break; - work = list_entry(list->next, struct btrfs_work, - ordered_list); + work = list_first_entry(list, struct btrfs_work, ordered_list); if (!test_bit(WORK_DONE_BIT, &work->flags)) break; + /* + * Orders all subsequent loads after reading WORK_DONE_BIT, + * paired with the smp_mb__before_atomic in btrfs_work_helper + * this guarantees that the ordered function will see all + * updates from ordinary work function. + */ + smp_rmb(); /* * we are going to call the ordered done function, but @@ -280,71 +240,103 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) break; trace_btrfs_ordered_sched(work); spin_unlock_irqrestore(lock, flags); - work->ordered_func(work); + work->ordered_func(work, false); /* now take the lock again and drop our item from the list */ spin_lock_irqsave(lock, flags); list_del(&work->ordered_list); spin_unlock_irqrestore(lock, flags); - /* - * We don't want to call the ordered free functions with the - * lock held though. Save the work as tag for the trace event, - * because the callback could free the structure. - */ - wtag = work; - work->ordered_free(work); - trace_btrfs_all_work_done(wq->fs_info, wtag); + if (work == self) { + /* + * This is the work item that the worker is currently + * executing. + * + * The kernel workqueue code guarantees non-reentrancy + * of work items. I.e., if a work item with the same + * address and work function is queued twice, the second + * execution is blocked until the first one finishes. A + * work item may be freed and recycled with the same + * work function; the workqueue code assumes that the + * original work item cannot depend on the recycled work + * item in that case (see find_worker_executing_work()). + * + * Note that different types of Btrfs work can depend on + * each other, and one type of work on one Btrfs + * filesystem may even depend on the same type of work + * on another Btrfs filesystem via, e.g., a loop device. + * Therefore, we must not allow the current work item to + * be recycled until we are really done, otherwise we + * break the above assumption and can deadlock. + */ + free_self = true; + } else { + /* + * We don't want to call the ordered free functions with + * the lock held. + */ + work->ordered_func(work, true); + /* NB: work must not be dereferenced past this point. */ + trace_btrfs_all_work_done(wq->fs_info, work); + } } spin_unlock_irqrestore(lock, flags); + + if (free_self) { + self->ordered_func(self, true); + /* NB: self must not be dereferenced past this point. */ + trace_btrfs_all_work_done(wq->fs_info, self); + } } -static void normal_work_helper(struct btrfs_work *work) +static void btrfs_work_helper(struct work_struct *normal_work) { - struct __btrfs_workqueue *wq; - void *wtag; - int need_order = 0; + struct btrfs_work *work = container_of(normal_work, struct btrfs_work, + normal_work); + struct btrfs_workqueue *wq = work->wq; + bool need_order = false; /* * We should not touch things inside work in the following cases: - * 1) after work->func() if it has no ordered_free + * 1) after work->func() if it has no ordered_func(..., true) to free * Since the struct is freed in work->func(). * 2) after setting WORK_DONE_BIT * The work may be freed in other threads almost instantly. * So we save the needed things here. */ if (work->ordered_func) - need_order = 1; - wq = work->wq; - /* Safe for tracepoints in case work gets freed by the callback */ - wtag = work; + need_order = true; trace_btrfs_work_sched(work); thresh_exec_hook(wq); work->func(work); if (need_order) { + /* + * Ensures all memory accesses done in the work function are + * ordered before setting the WORK_DONE_BIT. Ensuring the thread + * which is going to executed the ordered work sees them. + * Pairs with the smp_rmb in run_ordered_work. + */ + smp_mb__before_atomic(); set_bit(WORK_DONE_BIT, &work->flags); - run_ordered_work(wq); + run_ordered_work(wq, work); + } else { + /* NB: work must not be dereferenced past this point. */ + trace_btrfs_all_work_done(wq->fs_info, work); } - if (!need_order) - trace_btrfs_all_work_done(wq->fs_info, wtag); } -void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, - btrfs_func_t func, - btrfs_func_t ordered_func, - btrfs_func_t ordered_free) +void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func, + btrfs_ordered_func_t ordered_func) { work->func = func; work->ordered_func = ordered_func; - work->ordered_free = ordered_free; - INIT_WORK(&work->normal_work, uniq_func); + INIT_WORK(&work->normal_work, btrfs_work_helper); INIT_LIST_HEAD(&work->ordered_list); work->flags = 0; } -static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, - struct btrfs_work *work) +void btrfs_queue_work(struct btrfs_workqueue *wq, struct btrfs_work *work) { unsigned long flags; @@ -359,46 +351,22 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, queue_work(wq->normal_wq, &work->normal_work); } -void btrfs_queue_work(struct btrfs_workqueue *wq, - struct btrfs_work *work) -{ - struct __btrfs_workqueue *dest_wq; - - if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) - dest_wq = wq->high; - else - dest_wq = wq->normal; - __btrfs_queue_work(dest_wq, work); -} - -static inline void -__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) -{ - destroy_workqueue(wq->normal_wq); - trace_btrfs_workqueue_destroy(wq); - kfree(wq); -} - void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) { if (!wq) return; - if (wq->high) - __btrfs_destroy_workqueue(wq->high); - __btrfs_destroy_workqueue(wq->normal); + destroy_workqueue(wq->normal_wq); + trace_btrfs_workqueue_destroy(wq); kfree(wq); } void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int limit_active) { - if (!wq) - return; - wq->normal->limit_active = limit_active; - if (wq->high) - wq->high->limit_active = limit_active; + if (wq) + wq->limit_active = limit_active; } -void btrfs_set_work_high_priority(struct btrfs_work *work) +void btrfs_flush_workqueue(struct btrfs_workqueue *wq) { - set_bit(WORK_HIGH_PRIO_BIT, &work->flags); + flush_workqueue(wq->normal_wq); } |
