diff options
Diffstat (limited to 'fs/inode.c')
| -rw-r--r-- | fs/inode.c | 560 |
1 files changed, 306 insertions, 254 deletions
diff --git a/fs/inode.c b/fs/inode.c index 6b4c77268fc0..521383223d8a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -184,7 +184,7 @@ static int proc_nr_inodes(const struct ctl_table *table, int write, void *buffer return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table inodes_sysctls[] = { +static const struct ctl_table inodes_sysctls[] = { { .procname = "inode-nr", .data = &inodes_stat, @@ -233,7 +233,7 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; - inode->i_state = 0; + inode_state_assign_raw(inode, 0); atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; @@ -327,7 +327,17 @@ static void i_callback(struct rcu_head *head) free_inode_nonrcu(inode); } -static struct inode *alloc_inode(struct super_block *sb) +/** + * alloc_inode - obtain an inode + * @sb: superblock + * + * Allocates a new inode for given superblock. + * Inode wont be chained in superblock s_inodes list + * This means : + * - fs can't be unmount + * - quotas, fsnotify, writeback can't work + */ +struct inode *alloc_inode(struct super_block *sb) { const struct super_operations *ops = sb->s_op; struct inode *inode; @@ -461,7 +471,7 @@ EXPORT_SYMBOL(set_nlink); void inc_nlink(struct inode *inode) { if (unlikely(inode->i_nlink == 0)) { - WARN_ON(!(inode->i_state & I_LINKABLE)); + WARN_ON(!(inode_state_read_once(inode) & I_LINKABLE)); atomic_long_dec(&inode->i_sb->s_remove_count); } @@ -520,11 +530,50 @@ void ihold(struct inode *inode) } EXPORT_SYMBOL(ihold); -static void __inode_add_lru(struct inode *inode, bool rotate) +struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, + struct inode *inode, u32 bit) +{ + void *bit_address; + + bit_address = inode_state_wait_address(inode, bit); + init_wait_var_entry(wqe, bit_address, 0); + return __var_waitqueue(bit_address); +} +EXPORT_SYMBOL(inode_bit_waitqueue); + +void wait_on_new_inode(struct inode *inode) +{ + struct wait_bit_queue_entry wqe; + struct wait_queue_head *wq_head; + + spin_lock(&inode->i_lock); + if (!(inode_state_read(inode) & I_NEW)) { + spin_unlock(&inode->i_lock); + return; + } + + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW); + for (;;) { + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); + if (!(inode_state_read(inode) & I_NEW)) + break; + spin_unlock(&inode->i_lock); + schedule(); + spin_lock(&inode->i_lock); + } + finish_wait(wq_head, &wqe.wq_entry); + WARN_ON(inode_state_read(inode) & I_NEW); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL(wait_on_new_inode); + +static void __inode_lru_list_add(struct inode *inode, bool rotate) { - if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) + lockdep_assert_held(&inode->i_lock); + + if (inode_state_read(inode) & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) return; - if (atomic_read(&inode->i_count)) + if (icount_read(inode)) return; if (!(inode->i_sb->s_flags & SB_ACTIVE)) return; @@ -534,32 +583,22 @@ static void __inode_add_lru(struct inode *inode, bool rotate) if (list_lru_add_obj(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); else if (rotate) - inode->i_state |= I_REFERENCED; + inode_state_set(inode, I_REFERENCED); } -struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, - struct inode *inode, u32 bit) -{ - void *bit_address; - - bit_address = inode_state_wait_address(inode, bit); - init_wait_var_entry(wqe, bit_address, 0); - return __var_waitqueue(bit_address); -} -EXPORT_SYMBOL(inode_bit_waitqueue); - /* * Add inode to LRU if needed (inode is unused and clean). - * - * Needs inode->i_lock held. */ -void inode_add_lru(struct inode *inode) +void inode_lru_list_add(struct inode *inode) { - __inode_add_lru(inode, false); + __inode_lru_list_add(inode, false); } static void inode_lru_list_del(struct inode *inode) { + if (list_empty(&inode->i_lru)) + return; + if (list_lru_del_obj(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_dec(nr_unused); } @@ -567,15 +606,15 @@ static void inode_lru_list_del(struct inode *inode) static void inode_pin_lru_isolating(struct inode *inode) { lockdep_assert_held(&inode->i_lock); - WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); - inode->i_state |= I_LRU_ISOLATING; + WARN_ON(inode_state_read(inode) & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); + inode_state_set(inode, I_LRU_ISOLATING); } static void inode_unpin_lru_isolating(struct inode *inode) { spin_lock(&inode->i_lock); - WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); - inode->i_state &= ~I_LRU_ISOLATING; + WARN_ON(!(inode_state_read(inode) & I_LRU_ISOLATING)); + inode_state_clear(inode, I_LRU_ISOLATING); /* Called with inode->i_lock which ensures memory ordering. */ inode_wake_up_bit(inode, __I_LRU_ISOLATING); spin_unlock(&inode->i_lock); @@ -587,7 +626,7 @@ static void inode_wait_for_lru_isolating(struct inode *inode) struct wait_queue_head *wq_head; lockdep_assert_held(&inode->i_lock); - if (!(inode->i_state & I_LRU_ISOLATING)) + if (!(inode_state_read(inode) & I_LRU_ISOLATING)) return; wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); @@ -597,14 +636,14 @@ static void inode_wait_for_lru_isolating(struct inode *inode) * Checking I_LRU_ISOLATING with inode->i_lock guarantees * memory ordering. */ - if (!(inode->i_state & I_LRU_ISOLATING)) + if (!(inode_state_read(inode) & I_LRU_ISOLATING)) break; spin_unlock(&inode->i_lock); schedule(); spin_lock(&inode->i_lock); } finish_wait(wq_head, &wqe.wq_entry); - WARN_ON(inode->i_state & I_LRU_ISOLATING); + WARN_ON(inode_state_read(inode) & I_LRU_ISOLATING); } /** @@ -613,18 +652,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode) */ void inode_sb_list_add(struct inode *inode) { - spin_lock(&inode->i_sb->s_inode_list_lock); - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); - spin_unlock(&inode->i_sb->s_inode_list_lock); + struct super_block *sb = inode->i_sb; + + spin_lock(&sb->s_inode_list_lock); + list_add(&inode->i_sb_list, &sb->s_inodes); + spin_unlock(&sb->s_inode_list_lock); } EXPORT_SYMBOL_GPL(inode_sb_list_add); static inline void inode_sb_list_del(struct inode *inode) { + struct super_block *sb = inode->i_sb; + if (!list_empty(&inode->i_sb_list)) { - spin_lock(&inode->i_sb->s_inode_list_lock); + spin_lock(&sb->s_inode_list_lock); list_del_init(&inode->i_sb_list); - spin_unlock(&inode->i_sb->s_inode_list_lock); + spin_unlock(&sb->s_inode_list_lock); } } @@ -747,11 +790,11 @@ void clear_inode(struct inode *inode) */ xa_unlock_irq(&inode->i_data.i_pages); BUG_ON(!list_empty(&inode->i_data.i_private_list)); - BUG_ON(!(inode->i_state & I_FREEING)); - BUG_ON(inode->i_state & I_CLEAR); + BUG_ON(!(inode_state_read_once(inode) & I_FREEING)); + BUG_ON(inode_state_read_once(inode) & I_CLEAR); BUG_ON(!list_empty(&inode->i_wb_list)); /* don't need i_lock here, no concurrent mods to i_state */ - inode->i_state = I_FREEING | I_CLEAR; + inode_state_assign_raw(inode, I_FREEING | I_CLEAR); } EXPORT_SYMBOL(clear_inode); @@ -772,12 +815,10 @@ static void evict(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; - BUG_ON(!(inode->i_state & I_FREEING)); + BUG_ON(!(inode_state_read_once(inode) & I_FREEING)); BUG_ON(!list_empty(&inode->i_lru)); - if (!list_empty(&inode->i_io_list)) - inode_io_list_del(inode); - + inode_io_list_del(inode); inode_sb_list_del(inode); spin_lock(&inode->i_lock); @@ -806,23 +847,16 @@ static void evict(struct inode *inode) /* * Wake up waiters in __wait_on_freeing_inode(). * - * Lockless hash lookup may end up finding the inode before we removed - * it above, but only lock it *after* we are done with the wakeup below. - * In this case the potential waiter cannot safely block. + * It is an invariant that any thread we need to wake up is already + * accounted for before remove_inode_hash() acquires ->i_lock -- both + * sides take the lock and sleep is aborted if the inode is found + * unhashed. Thus either the sleeper wins and goes off CPU, or removal + * wins and the sleeper aborts after testing with the lock. * - * The inode being unhashed after the call to remove_inode_hash() is - * used as an indicator whether blocking on it is safe. - */ - spin_lock(&inode->i_lock); - /* - * Pairs with the barrier in prepare_to_wait_event() to make sure - * ___wait_var_event() either sees the bit cleared or - * waitqueue_active() check in wake_up_var() sees the waiter. + * This also means we don't need any fences for the call below. */ - smp_mb__after_spinlock(); inode_wake_up_bit(inode, __I_NEW); - BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); - spin_unlock(&inode->i_lock); + BUG_ON(inode_state_read_once(inode) != (I_FREEING | I_CLEAR)); destroy_inode(inode); } @@ -858,26 +892,26 @@ static void dispose_list(struct list_head *head) */ void evict_inodes(struct super_block *sb) { - struct inode *inode, *next; + struct inode *inode; LIST_HEAD(dispose); again: spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - if (atomic_read(&inode->i_count)) + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (icount_read(inode)) continue; spin_lock(&inode->i_lock); - if (atomic_read(&inode->i_count)) { + if (icount_read(inode)) { spin_unlock(&inode->i_lock); continue; } - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + if (inode_state_read(inode) & (I_NEW | I_FREEING | I_WILL_FREE)) { spin_unlock(&inode->i_lock); continue; } - inode->i_state |= I_FREEING; + inode_state_set(inode, I_FREEING); inode_lru_list_del(inode); spin_unlock(&inode->i_lock); list_add(&inode->i_lru, &dispose); @@ -900,46 +934,6 @@ again: } EXPORT_SYMBOL_GPL(evict_inodes); -/** - * invalidate_inodes - attempt to free all inodes on a superblock - * @sb: superblock to operate on - * - * Attempts to free all inodes (including dirty inodes) for a given superblock. - */ -void invalidate_inodes(struct super_block *sb) -{ - struct inode *inode, *next; - LIST_HEAD(dispose); - -again: - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - spin_lock(&inode->i_lock); - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { - spin_unlock(&inode->i_lock); - continue; - } - if (atomic_read(&inode->i_count)) { - spin_unlock(&inode->i_lock); - continue; - } - - inode->i_state |= I_FREEING; - inode_lru_list_del(inode); - spin_unlock(&inode->i_lock); - list_add(&inode->i_lru, &dispose); - if (need_resched()) { - spin_unlock(&sb->s_inode_list_lock); - cond_resched(); - dispose_list(&dispose); - goto again; - } - } - spin_unlock(&sb->s_inode_list_lock); - - dispose_list(&dispose); -} - /* * Isolate the inode from the LRU in preparation for freeing it. * @@ -970,8 +964,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item, * unreclaimable for a while. Remove them lazily here; iput, * sync, or the last page cache deletion will requeue them. */ - if (atomic_read(&inode->i_count) || - (inode->i_state & ~I_REFERENCED) || + if (icount_read(inode) || + (inode_state_read(inode) & ~I_REFERENCED) || !mapping_shrinkable(&inode->i_data)) { list_lru_isolate(lru, &inode->i_lru); spin_unlock(&inode->i_lock); @@ -980,8 +974,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item, } /* Recently referenced inodes get one more pass */ - if (inode->i_state & I_REFERENCED) { - inode->i_state &= ~I_REFERENCED; + if (inode_state_read(inode) & I_REFERENCED) { + inode_state_clear(inode, I_REFERENCED); spin_unlock(&inode->i_lock); return LRU_ROTATE; } @@ -1008,8 +1002,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item, return LRU_RETRY; } - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; + WARN_ON(inode_state_read(inode) & I_NEW); + inode_state_set(inode, I_FREEING); list_lru_isolate_move(lru, &inode->i_lru, freeable); spin_unlock(&inode->i_lock); @@ -1041,7 +1035,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), - void *data, bool is_inode_hash_locked) + void *data, bool is_inode_hash_locked, + bool *isnew) { struct inode *inode = NULL; @@ -1058,16 +1053,17 @@ repeat: if (!test(inode, data)) continue; spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE)) { + if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) { __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } - if (unlikely(inode->i_state & I_CREATING)) { + if (unlikely(inode_state_read(inode) & I_CREATING)) { spin_unlock(&inode->i_lock); rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); + *isnew = !!(inode_state_read(inode) & I_NEW); spin_unlock(&inode->i_lock); rcu_read_unlock(); return inode; @@ -1082,7 +1078,7 @@ repeat: */ static struct inode *find_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino, - bool is_inode_hash_locked) + bool is_inode_hash_locked, bool *isnew) { struct inode *inode = NULL; @@ -1099,16 +1095,17 @@ repeat: if (inode->i_sb != sb) continue; spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE)) { + if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) { __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } - if (unlikely(inode->i_state & I_CREATING)) { + if (unlikely(inode_state_read(inode) & I_CREATING)) { spin_unlock(&inode->i_lock); rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); + *isnew = !!(inode_state_read(inode) & I_NEW); spin_unlock(&inode->i_lock); rcu_read_unlock(); return inode; @@ -1160,21 +1157,6 @@ unsigned int get_next_ino(void) EXPORT_SYMBOL(get_next_ino); /** - * new_inode_pseudo - obtain an inode - * @sb: superblock - * - * Allocates a new inode for given superblock. - * Inode wont be chained in superblock s_inodes list - * This means : - * - fs can't be unmount - * - quotas, fsnotify, writeback can't work - */ -struct inode *new_inode_pseudo(struct super_block *sb) -{ - return alloc_inode(sb); -} - -/** * new_inode - obtain an inode * @sb: superblock * @@ -1190,7 +1172,7 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - inode = new_inode_pseudo(sb); + inode = alloc_inode(sb); if (inode) inode_sb_list_add(inode); return inode; @@ -1206,9 +1188,8 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode) /* Set new key only if filesystem hasn't already changed it */ if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) { /* - * ensure nobody is actually holding i_mutex + * ensure nobody is actually holding i_rwsem */ - // mutex_destroy(&inode->i_mutex); init_rwsem(&inode->i_rwsem); lockdep_set_class(&inode->i_rwsem, &type->i_mutex_dir_key); @@ -1229,14 +1210,8 @@ void unlock_new_inode(struct inode *inode) { lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); - WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW & ~I_CREATING; - /* - * Pairs with the barrier in prepare_to_wait_event() to make sure - * ___wait_var_event() either sees the bit cleared or - * waitqueue_active() check in wake_up_var() sees the waiter. - */ - smp_mb(); + WARN_ON(!(inode_state_read(inode) & I_NEW)); + inode_state_clear(inode, I_NEW | I_CREATING); inode_wake_up_bit(inode, __I_NEW); spin_unlock(&inode->i_lock); } @@ -1246,14 +1221,8 @@ void discard_new_inode(struct inode *inode) { lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); - WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; - /* - * Pairs with the barrier in prepare_to_wait_event() to make sure - * ___wait_var_event() either sees the bit cleared or - * waitqueue_active() check in wake_up_var() sees the waiter. - */ - smp_mb(); + WARN_ON(!(inode_state_read(inode) & I_NEW)); + inode_state_clear(inode, I_NEW); inode_wake_up_bit(inode, __I_NEW); spin_unlock(&inode->i_lock); iput(inode); @@ -1309,6 +1278,7 @@ EXPORT_SYMBOL(unlock_two_nondirectories); * @test: callback used for comparisons between inodes * @set: callback used to initialize a new struct inode * @data: opaque data pointer to pass to @test and @set + * @isnew: pointer to a bool which will indicate whether I_NEW is set * * Search for the inode specified by @hashval and @data in the inode cache, * and if present return it with an increased reference count. This is a @@ -1327,10 +1297,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, { struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); struct inode *old; + bool isnew; + + might_sleep(); again: spin_lock(&inode_hash_lock); - old = find_inode(inode->i_sb, head, test, data, true); + old = find_inode(inode->i_sb, head, test, data, true, &isnew); if (unlikely(old)) { /* * Uhhuh, somebody else created the same inode under us. @@ -1339,7 +1312,8 @@ again: spin_unlock(&inode_hash_lock); if (IS_ERR(old)) return NULL; - wait_on_inode(old); + if (unlikely(isnew)) + wait_on_new_inode(old); if (unlikely(inode_unhashed(old))) { iput(old); goto again; @@ -1348,8 +1322,8 @@ again: } if (set && unlikely(set(inode, data))) { - inode = NULL; - goto unlock; + spin_unlock(&inode_hash_lock); + return NULL; } /* @@ -1357,18 +1331,18 @@ again: * caller is responsible for filling in the contents */ spin_lock(&inode->i_lock); - inode->i_state |= I_NEW; + inode_state_set(inode, I_NEW); hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); + spin_unlock(&inode_hash_lock); + /* * Add inode to the sb list if it's not already. It has I_NEW at this * point, so it should be safe to test i_sb_list locklessly. */ if (list_empty(&inode->i_sb_list)) inode_sb_list_add(inode); -unlock: - spin_unlock(&inode_hash_lock); return inode; } @@ -1430,13 +1404,17 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval, { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode, *new; + bool isnew; + + might_sleep(); again: - inode = find_inode(sb, head, test, data, false); + inode = find_inode(sb, head, test, data, false, &isnew); if (inode) { if (IS_ERR(inode)) return NULL; - wait_on_inode(inode); + if (unlikely(isnew)) + wait_on_new_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; @@ -1471,12 +1449,17 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; + bool isnew; + + might_sleep(); + again: - inode = find_inode_fast(sb, head, ino, false); + inode = find_inode_fast(sb, head, ino, false, &isnew); if (inode) { if (IS_ERR(inode)) return NULL; - wait_on_inode(inode); + if (unlikely(isnew)) + wait_on_new_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; @@ -1490,15 +1473,15 @@ again: spin_lock(&inode_hash_lock); /* We released the lock, so.. */ - old = find_inode_fast(sb, head, ino, true); + old = find_inode_fast(sb, head, ino, true, &isnew); if (!old) { inode->i_ino = ino; spin_lock(&inode->i_lock); - inode->i_state = I_NEW; + inode_state_assign(inode, I_NEW); hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); - inode_sb_list_add(inode); spin_unlock(&inode_hash_lock); + inode_sb_list_add(inode); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -1516,7 +1499,8 @@ again: if (IS_ERR(old)) return NULL; inode = old; - wait_on_inode(inode); + if (unlikely(isnew)) + wait_on_new_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; @@ -1587,7 +1571,7 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode->i_lock); - if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { + if (!(inode_state_read(inode) & (I_FREEING | I_WILL_FREE))) { __iget(inode); spin_unlock(&inode->i_lock); } else { @@ -1620,13 +1604,13 @@ EXPORT_SYMBOL(igrab); * Note2: @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data) + int (*test)(struct inode *, void *), void *data, bool *isnew) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; spin_lock(&inode_hash_lock); - inode = find_inode(sb, head, test, data, true); + inode = find_inode(sb, head, test, data, true, isnew); spin_unlock(&inode_hash_lock); return IS_ERR(inode) ? NULL : inode; @@ -1654,10 +1638,15 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct inode *inode; + bool isnew; + + might_sleep(); + again: - inode = ilookup5_nowait(sb, hashval, test, data); + inode = ilookup5_nowait(sb, hashval, test, data, &isnew); if (inode) { - wait_on_inode(inode); + if (unlikely(isnew)) + wait_on_new_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; @@ -1679,13 +1668,18 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; + bool isnew; + + might_sleep(); + again: - inode = find_inode_fast(sb, head, ino, false); + inode = find_inode_fast(sb, head, ino, false, &isnew); if (inode) { if (IS_ERR(inode)) return NULL; - wait_on_inode(inode); + if (unlikely(isnew)) + wait_on_new_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; @@ -1777,7 +1771,7 @@ struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_sb == sb && - !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) && + !(inode_state_read_once(inode) & (I_FREEING | I_WILL_FREE)) && test(inode, data)) return inode; } @@ -1816,7 +1810,7 @@ struct inode *find_inode_by_ino_rcu(struct super_block *sb, hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb && - !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE))) + !(inode_state_read_once(inode) & (I_FREEING | I_WILL_FREE))) return inode; } return NULL; @@ -1828,6 +1822,9 @@ int insert_inode_locked(struct inode *inode) struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; struct hlist_head *head = inode_hashtable + hash(sb, ino); + bool isnew; + + might_sleep(); while (1) { struct inode *old = NULL; @@ -1838,7 +1835,7 @@ int insert_inode_locked(struct inode *inode) if (old->i_sb != sb) continue; spin_lock(&old->i_lock); - if (old->i_state & (I_FREEING|I_WILL_FREE)) { + if (inode_state_read(old) & (I_FREEING | I_WILL_FREE)) { spin_unlock(&old->i_lock); continue; } @@ -1846,21 +1843,23 @@ int insert_inode_locked(struct inode *inode) } if (likely(!old)) { spin_lock(&inode->i_lock); - inode->i_state |= I_NEW | I_CREATING; + inode_state_set(inode, I_NEW | I_CREATING); hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); return 0; } - if (unlikely(old->i_state & I_CREATING)) { + if (unlikely(inode_state_read(old) & I_CREATING)) { spin_unlock(&old->i_lock); spin_unlock(&inode_hash_lock); return -EBUSY; } __iget(old); + isnew = !!(inode_state_read(old) & I_NEW); spin_unlock(&old->i_lock); spin_unlock(&inode_hash_lock); - wait_on_inode(old); + if (isnew) + wait_on_new_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; @@ -1875,7 +1874,9 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, { struct inode *old; - inode->i_state |= I_CREATING; + might_sleep(); + + inode_state_set_raw(inode, I_CREATING); old = inode_insert5(inode, hashval, test, NULL, data); if (old != inode) { @@ -1887,11 +1888,11 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, EXPORT_SYMBOL(insert_inode_locked4); -int generic_delete_inode(struct inode *inode) +int inode_just_drop(struct inode *inode) { return 1; } -EXPORT_SYMBOL(generic_delete_inode); +EXPORT_SYMBOL(inode_just_drop); /* * Called when we're dropping the last reference @@ -1907,40 +1908,44 @@ static void iput_final(struct inode *inode) { struct super_block *sb = inode->i_sb; const struct super_operations *op = inode->i_sb->s_op; - unsigned long state; int drop; - WARN_ON(inode->i_state & I_NEW); + WARN_ON(inode_state_read(inode) & I_NEW); + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); if (op->drop_inode) drop = op->drop_inode(inode); else - drop = generic_drop_inode(inode); + drop = inode_generic_drop(inode); if (!drop && - !(inode->i_state & I_DONTCACHE) && + !(inode_state_read(inode) & I_DONTCACHE) && (sb->s_flags & SB_ACTIVE)) { - __inode_add_lru(inode, true); + __inode_lru_list_add(inode, true); spin_unlock(&inode->i_lock); return; } - state = inode->i_state; - if (!drop) { - WRITE_ONCE(inode->i_state, state | I_WILL_FREE); + /* + * Re-check ->i_count in case the ->drop_inode() hooks played games. + * Note we only execute this if the verdict was to drop the inode. + */ + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); + + if (drop) { + inode_state_set(inode, I_FREEING); + } else { + inode_state_set(inode, I_WILL_FREE); spin_unlock(&inode->i_lock); write_inode_now(inode, 1); spin_lock(&inode->i_lock); - state = inode->i_state; - WARN_ON(state & I_NEW); - state &= ~I_WILL_FREE; + WARN_ON(inode_state_read(inode) & I_NEW); + inode_state_replace(inode, I_WILL_FREE, I_FREEING); } - WRITE_ONCE(inode->i_state, state | I_FREEING); - if (!list_empty(&inode->i_lru)) - inode_lru_list_del(inode); + inode_lru_list_del(inode); spin_unlock(&inode->i_lock); evict(inode); @@ -1957,23 +1962,61 @@ static void iput_final(struct inode *inode) */ void iput(struct inode *inode) { - if (!inode) + might_sleep(); + if (unlikely(!inode)) return; - BUG_ON(inode->i_state & I_CLEAR); + retry: - if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) { - if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) { - atomic_inc(&inode->i_count); - spin_unlock(&inode->i_lock); - trace_writeback_lazytime_iput(inode); - mark_inode_dirty_sync(inode); - goto retry; - } - iput_final(inode); + lockdep_assert_not_held(&inode->i_lock); + VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_CLEAR), inode); + /* + * Note this assert is technically racy as if the count is bogusly + * equal to one, then two CPUs racing to further drop it can both + * conclude it's fine. + */ + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) < 1, inode); + + if (atomic_add_unless(&inode->i_count, -1, 1)) + return; + + if ((inode_state_read_once(inode) & I_DIRTY_TIME) && inode->i_nlink) { + trace_writeback_lazytime_iput(inode); + mark_inode_dirty_sync(inode); + goto retry; + } + + spin_lock(&inode->i_lock); + if (unlikely((inode_state_read(inode) & I_DIRTY_TIME) && inode->i_nlink)) { + spin_unlock(&inode->i_lock); + goto retry; } + + if (!atomic_dec_and_test(&inode->i_count)) { + spin_unlock(&inode->i_lock); + return; + } + + /* + * iput_final() drops ->i_lock, we can't assert on it as the inode may + * be deallocated by the time the call returns. + */ + iput_final(inode); } EXPORT_SYMBOL(iput); +/** + * iput_not_last - put an inode assuming this is not the last reference + * @inode: inode to put + */ +void iput_not_last(struct inode *inode) +{ + VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_CLEAR), inode); + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) < 2, inode); + + WARN_ON(atomic_sub_return(1, &inode->i_count) == 0); +} +EXPORT_SYMBOL(iput_not_last); + #ifdef CONFIG_BLOCK /** * bmap - find a block number in a file @@ -2238,7 +2281,7 @@ static int __remove_privs(struct mnt_idmap *idmap, return notify_change(idmap, dentry, &newattrs, NULL); } -int file_remove_privs_flags(struct file *file, unsigned int flags) +static int file_remove_privs_flags(struct file *file, unsigned int flags) { struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); @@ -2263,7 +2306,6 @@ int file_remove_privs_flags(struct file *file, unsigned int flags) inode_has_no_xattr(inode); return error; } -EXPORT_SYMBOL_GPL(file_remove_privs_flags); /** * file_remove_privs - remove special file privileges (suid, capabilities) @@ -2318,42 +2360,40 @@ out: } EXPORT_SYMBOL(current_time); -static int inode_needs_update_time(struct inode *inode) +static int file_update_time_flags(struct file *file, unsigned int flags) { + struct inode *inode = file_inode(file); struct timespec64 now, ts; - int sync_it = 0; + int sync_mode = 0; + int ret = 0; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return 0; + if (unlikely(file->f_mode & FMODE_NOCMTIME)) + return 0; now = current_time(inode); ts = inode_get_mtime(inode); if (!timespec64_equal(&ts, &now)) - sync_it |= S_MTIME; - + sync_mode |= S_MTIME; ts = inode_get_ctime(inode); if (!timespec64_equal(&ts, &now)) - sync_it |= S_CTIME; - + sync_mode |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) - sync_it |= S_VERSION; - - return sync_it; -} + sync_mode |= S_VERSION; -static int __file_update_time(struct file *file, int sync_mode) -{ - int ret = 0; - struct inode *inode = file_inode(file); + if (!sync_mode) + return 0; - /* try to update time settings */ - if (!mnt_get_write_access_file(file)) { - ret = inode_update_time(inode, sync_mode); - mnt_put_write_access_file(file); - } + if (flags & IOCB_NOWAIT) + return -EAGAIN; + if (mnt_get_write_access_file(file)) + return 0; + ret = inode_update_time(inode, sync_mode); + mnt_put_write_access_file(file); return ret; } @@ -2373,14 +2413,7 @@ static int __file_update_time(struct file *file, int sync_mode) */ int file_update_time(struct file *file) { - int ret; - struct inode *inode = file_inode(file); - - ret = inode_needs_update_time(inode); - if (ret <= 0) - return ret; - - return __file_update_time(file, ret); + return file_update_time_flags(file, 0); } EXPORT_SYMBOL(file_update_time); @@ -2402,7 +2435,6 @@ EXPORT_SYMBOL(file_update_time); static int file_modified_flags(struct file *file, int flags) { int ret; - struct inode *inode = file_inode(file); /* * Clear the security bits if the process is not being run by root. @@ -2411,17 +2443,7 @@ static int file_modified_flags(struct file *file, int flags) ret = file_remove_privs_flags(file, flags); if (ret) return ret; - - if (unlikely(file->f_mode & FMODE_NOCMTIME)) - return 0; - - ret = inode_needs_update_time(inode); - if (ret <= 0) - return ret; - if (flags & IOCB_NOWAIT) - return -EAGAIN; - - return __file_update_time(file, ret); + return file_update_time_flags(file, flags); } /** @@ -2568,21 +2590,28 @@ void __init inode_init(void) void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_mode = mode; - if (S_ISCHR(mode)) { + switch (inode->i_mode & S_IFMT) { + case S_IFCHR: inode->i_fop = &def_chr_fops; inode->i_rdev = rdev; - } else if (S_ISBLK(mode)) { + break; + case S_IFBLK: if (IS_ENABLED(CONFIG_BLOCK)) inode->i_fop = &def_blk_fops; inode->i_rdev = rdev; - } else if (S_ISFIFO(mode)) + break; + case S_IFIFO: inode->i_fop = &pipefifo_fops; - else if (S_ISSOCK(mode)) - ; /* leave it no_open_fops */ - else + break; + case S_IFSOCK: + /* leave it no_open_fops */ + break; + default: printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" " inode %s:%lu\n", mode, inode->i_sb->s_id, inode->i_ino); + break; + } } EXPORT_SYMBOL(init_special_inode); @@ -2663,7 +2692,7 @@ EXPORT_SYMBOL(inode_dio_finished); * proceed with a truncate or equivalent operation. * * Must be called under a lock that serializes taking new references - * to i_dio_count, usually by inode->i_mutex. + * to i_dio_count, usually by inode->i_rwsem. */ void inode_dio_wait(struct inode *inode) { @@ -2681,7 +2710,7 @@ EXPORT_SYMBOL(inode_dio_wait_interruptible); /* * inode_set_flags - atomically set some inode flags * - * Note: the caller should be holding i_mutex, or else be sure that + * Note: the caller should be holding i_rwsem exclusively, or else be sure that * they have exclusive access to the inode structure (i.e., while the * inode is being instantiated). The reason for the cmpxchg() loop * --- which wouldn't be necessary if all code paths which modify @@ -2689,7 +2718,7 @@ EXPORT_SYMBOL(inode_dio_wait_interruptible); * code path which doesn't today so we use cmpxchg() out of an abundance * of caution. * - * In the long run, i_mutex is overkill, and we should probably look + * In the long run, i_rwsem is overkill, and we should probably look * at using the i_lock spinlock to protect i_flags, and then make sure * it is so documented in include/linux/fs.h and that all code follows * the locking convention!! @@ -2953,3 +2982,26 @@ umode_t mode_strip_sgid(struct mnt_idmap *idmap, return mode & ~S_ISGID; } EXPORT_SYMBOL(mode_strip_sgid); + +#ifdef CONFIG_DEBUG_VFS +/* + * Dump an inode. + * + * TODO: add a proper inode dumping routine, this is a stub to get debug off the + * ground. + * + * TODO: handle getting to fs type with get_kernel_nofault()? + * See dump_mapping() above. + */ +void dump_inode(struct inode *inode, const char *reason) +{ + struct super_block *sb = inode->i_sb; + + pr_warn("%s encountered for inode %px\n" + "fs %s mode %ho opflags 0x%hx flags 0x%x state 0x%x count %d\n", + reason, inode, sb->s_type->name, inode->i_mode, inode->i_opflags, + inode->i_flags, inode_state_read_once(inode), atomic_read(&inode->i_count)); +} + +EXPORT_SYMBOL(dump_inode); +#endif |
