diff options
Diffstat (limited to 'fs/ubifs/journal.c')
-rw-r--r-- | fs/ubifs/journal.c | 198 |
1 files changed, 167 insertions, 31 deletions
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index f0a5538c84b0..36ba79fbd2ff 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -293,6 +293,96 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, } /** + * __queue_and_wait - queue a task and wait until the task is waked up. + * @c: UBIFS file-system description object + * + * This function adds current task in queue and waits until the task is waked + * up. This function should be called with @c->reserve_space_wq locked. + */ +static void __queue_and_wait(struct ubifs_info *c) +{ + DEFINE_WAIT(wait); + + __add_wait_queue_entry_tail_exclusive(&c->reserve_space_wq, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&c->reserve_space_wq.lock); + + schedule(); + finish_wait(&c->reserve_space_wq, &wait); +} + +/** + * wait_for_reservation - try queuing current task to wait until waked up. + * @c: UBIFS file-system description object + * + * This function queues current task to wait until waked up, if queuing is + * started(@c->need_wait_space is not %0). Returns %true if current task is + * added in queue, otherwise %false is returned. + */ +static bool wait_for_reservation(struct ubifs_info *c) +{ + if (likely(atomic_read(&c->need_wait_space) == 0)) + /* Quick path to check whether queuing is started. */ + return false; + + spin_lock(&c->reserve_space_wq.lock); + if (atomic_read(&c->need_wait_space) == 0) { + /* Queuing is not started, don't queue current task. */ + spin_unlock(&c->reserve_space_wq.lock); + return false; + } + + __queue_and_wait(c); + return true; +} + +/** + * wake_up_reservation - wake up first task in queue or stop queuing. + * @c: UBIFS file-system description object + * + * This function wakes up the first task in queue if it exists, or stops + * queuing if no tasks in queue. + */ +static void wake_up_reservation(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (waitqueue_active(&c->reserve_space_wq)) + wake_up_locked(&c->reserve_space_wq); + else + /* + * Compared with wait_for_reservation(), set @c->need_wait_space + * under the protection of wait queue lock, which can avoid that + * @c->need_wait_space is set to 0 after new task queued. + */ + atomic_set(&c->need_wait_space, 0); + spin_unlock(&c->reserve_space_wq.lock); +} + +/** + * add_or_start_queue - add current task in queue or start queuing. + * @c: UBIFS file-system description object + * + * This function starts queuing if queuing is not started, otherwise adds + * current task in queue. + */ +static void add_or_start_queue(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (atomic_cmpxchg(&c->need_wait_space, 0, 1) == 0) { + /* Starts queuing, task can go on directly. */ + spin_unlock(&c->reserve_space_wq.lock); + return; + } + + /* + * There are at least two tasks have retried more than 32 times + * at certain point, first task has started queuing, just queue + * the left tasks. + */ + __queue_and_wait(c); +} + +/** * make_reservation - reserve journal space. * @c: UBIFS file-system description object * @jhead: journal head @@ -311,33 +401,27 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, static int make_reservation(struct ubifs_info *c, int jhead, int len) { int err, cmt_retries = 0, nospc_retries = 0; + bool blocked = wait_for_reservation(c); again: down_read(&c->commit_sem); err = reserve_space(c, jhead, len); - if (!err) + if (!err) { /* c->commit_sem will get released via finish_reservation(). */ - return 0; + goto out_wake_up; + } up_read(&c->commit_sem); if (err == -ENOSPC) { /* * GC could not make any progress. We should try to commit - * once because it could make some dirty space and GC would - * make progress, so make the error -EAGAIN so that the below + * because it could make some dirty space and GC would make + * progress, so make the error -EAGAIN so that the below * will commit and re-try. */ - if (nospc_retries++ < 2) { - dbg_jnl("no space, retry"); - err = -EAGAIN; - } - - /* - * This means that the budgeting is incorrect. We always have - * to be able to write to the media, because all operations are - * budgeted. Deletions are not budgeted, though, but we reserve - * an extra LEB for them. - */ + nospc_retries++; + dbg_jnl("no space, retry"); + err = -EAGAIN; } if (err != -EAGAIN) @@ -349,15 +433,37 @@ again: */ if (cmt_retries > 128) { /* - * This should not happen unless the journal size limitations - * are too tough. + * This should not happen unless: + * 1. The journal size limitations are too tough. + * 2. The budgeting is incorrect. We always have to be able to + * write to the media, because all operations are budgeted. + * Deletions are not budgeted, though, but we reserve an + * extra LEB for them. */ - ubifs_err(c, "stuck in space allocation"); + ubifs_err(c, "stuck in space allocation, nospc_retries %d", + nospc_retries); err = -ENOSPC; goto out; - } else if (cmt_retries > 32) - ubifs_warn(c, "too many space allocation re-tries (%d)", - cmt_retries); + } else if (cmt_retries > 32) { + /* + * It's almost impossible to happen, unless there are many tasks + * making reservation concurrently and someone task has retried + * gc + commit for many times, generated available space during + * this period are grabbed by other tasks. + * But if it happens, start queuing up all tasks that will make + * space reservation, then there is only one task making space + * reservation at any time, and it can always make success under + * the premise of correct budgeting. + */ + ubifs_warn(c, "too many space allocation cmt_retries (%d) " + "nospc_retries (%d), start queuing tasks", + cmt_retries, nospc_retries); + + if (!blocked) { + blocked = true; + add_or_start_queue(c); + } + } dbg_jnl("-EAGAIN, commit and retry (retried %d times)", cmt_retries); @@ -365,7 +471,7 @@ again: err = ubifs_run_commit(c); if (err) - return err; + goto out_wake_up; goto again; out: @@ -380,6 +486,27 @@ out: cmt_retries = dbg_check_lprops(c); up_write(&c->commit_sem); } +out_wake_up: + if (blocked) { + /* + * Only tasks that have ever started queuing or ever been queued + * can wake up other queued tasks, which can make sure that + * there is only one task waked up to make space reservation. + * For example: + * task A task B task C + * make_reservation make_reservation + * reserve_space // 0 + * wake_up_reservation + * atomic_cmpxchg // 0, start queuing + * reserve_space + * wait_for_reservation + * __queue_and_wait + * add_wait_queue + * if (blocked) // false + * // So that task C won't be waked up to race with task B + */ + wake_up_reservation(c); + } return err; } @@ -516,6 +643,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) * @inode: inode to update * @deletion: indicates a directory entry deletion i.e unlink or rmdir * @xent: non-zero if the directory entry is an extended attribute entry + * @in_orphan: indicates whether the @inode is in orphan list * * This function updates an inode by writing a directory entry (or extended * attribute entry), the inode itself, and the parent directory inode (or the @@ -537,7 +665,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent) + int deletion, int xent, int in_orphan) { int err, dlen, ilen, len, lnum, ino_offs, dent_offs, orphan_added = 0; int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); @@ -623,7 +751,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_release; - if (last_reference) { + if (last_reference && !in_orphan) { err = ubifs_add_orphan(c, inode->i_ino); if (err) { release_head(c, BASEHD); @@ -679,6 +807,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_ro; + if (in_orphan && inode->i_nlink) + ubifs_delete_orphan(c, inode->i_ino); + finish_reservation(c); spin_lock(&ui->ui_lock); ui->synced_i_size = ui->ui_size; @@ -850,6 +981,13 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); + if (kill_xattrs && ui->xattr_cnt > ubifs_xattr_max_cnt(c)) { + ubifs_err(c, "Cannot delete inode, it has too much xattrs!"); + err = -EPERM; + ubifs_ro_mode(c, err); + return err; + } + /* * If the inode is being deleted, do not write the attached data. No * need to synchronize the write-buffer either. @@ -881,12 +1019,6 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) struct inode *xino; struct ubifs_dent_node *xent, *pxent = NULL; - if (ui->xattr_cnt > ubifs_xattr_max_cnt(c)) { - err = -EPERM; - ubifs_err(c, "Cannot delete inode, it has too much xattrs!"); - goto out_release; - } - lowest_xent_key(c, &key, inode->i_ino); while (1) { xent = ubifs_tnc_next_ent(c, &key, &nm); @@ -1209,6 +1341,7 @@ out_free: * @new_nm: new name of the new directory entry * @whiteout: whiteout inode * @sync: non-zero if the write-buffer has to be synchronized + * @delete_orphan: indicates an orphan entry deletion for @whiteout * * This function implements the re-name operation which may involve writing up * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) @@ -1221,7 +1354,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync) + const struct inode *whiteout, int sync, int delete_orphan) { void *p; union ubifs_key key; @@ -1438,6 +1571,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, goto out_ro; } + if (delete_orphan) + ubifs_delete_orphan(c, whiteout->i_ino); + finish_reservation(c); if (new_inode) { mark_inode_clean(c, new_ui); |