diff options
Diffstat (limited to 'fs/nilfs2')
35 files changed, 3248 insertions, 2571 deletions
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig index 254d102e79c9..7dae168e346e 100644 --- a/fs/nilfs2/Kconfig +++ b/fs/nilfs2/Kconfig @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config NILFS2_FS tristate "NILFS2 file system support" + select BUFFER_HEAD select CRC32 + select LEGACY_DIRECT_IO help NILFS2 is a log-structured file system (LFS) supporting continuous snapshotting. In addition to versioning capability of the entire diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 6ce8617b562d..6b506995818d 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -21,6 +21,8 @@ * nilfs_palloc_groups_per_desc_block - get the number of groups that a group * descriptor block can maintain * @inode: inode of metadata file using this allocator + * + * Return: Number of groups that a group descriptor block can maintain. */ static inline unsigned long nilfs_palloc_groups_per_desc_block(const struct inode *inode) @@ -32,6 +34,8 @@ nilfs_palloc_groups_per_desc_block(const struct inode *inode) /** * nilfs_palloc_groups_count - get maximum number of groups * @inode: inode of metadata file using this allocator + * + * Return: Maximum number of groups. */ static inline unsigned long nilfs_palloc_groups_count(const struct inode *inode) @@ -43,6 +47,8 @@ nilfs_palloc_groups_count(const struct inode *inode) * nilfs_palloc_init_blockgroup - initialize private variables for allocator * @inode: inode of metadata file using this allocator * @entry_size: size of the persistent object + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size) { @@ -78,6 +84,9 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size) * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) * @offset: pointer to store offset number in the group + * + * Return: Number of the group that contains the entry with the index + * specified by @nr. */ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, unsigned long *offset) @@ -93,8 +102,8 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, * @inode: inode of metadata file using this allocator * @group: group number * - * nilfs_palloc_desc_blkoff() returns block offset of the descriptor - * block which contains a descriptor of the specified group. + * Return: Index number in the metadata file of the descriptor block of + * the group specified by @group. */ static unsigned long nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) @@ -111,6 +120,9 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) * * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap * block used to allocate/deallocate entries in the specified group. + * + * Return: Index number in the metadata file of the bitmap block of + * the group specified by @group. */ static unsigned long nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) @@ -125,6 +137,8 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group * @desc: pointer to descriptor structure for the group * @lock: spin lock protecting @desc + * + * Return: Number of free entries written in the group descriptor @desc. */ static unsigned long nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, @@ -143,6 +157,9 @@ nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, * @desc: pointer to descriptor structure for the group * @lock: spin lock protecting @desc * @n: delta to be added + * + * Return: Number of free entries after adjusting the group descriptor + * @desc. */ static u32 nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, @@ -161,6 +178,9 @@ nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, * nilfs_palloc_entry_blkoff - get block offset of an entry block * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) + * + * Return: Index number in the metadata file of the block containing + * the entry specified by @nr. */ static unsigned long nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) @@ -177,12 +197,14 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block * @inode: inode of metadata file * @bh: buffer head of the buffer to be initialized - * @kaddr: kernel address mapped for the page including the buffer + * @from: kernel address mapped for a chunk of the block + * + * This function does not yet support the case where block size > PAGE_SIZE. */ static void nilfs_palloc_desc_block_init(struct inode *inode, - struct buffer_head *bh, void *kaddr) + struct buffer_head *bh, void *from) { - struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh); + struct nilfs_palloc_group_desc *desc = from; unsigned long n = nilfs_palloc_groups_per_desc_block(inode); __le32 nfrees; @@ -205,7 +227,8 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, int ret; spin_lock(lock); - if (prev->bh && blkoff == prev->blkoff) { + if (prev->bh && blkoff == prev->blkoff && + likely(buffer_uptodate(prev->bh))) { get_bh(prev->bh); *bhp = prev->bh; spin_unlock(lock); @@ -235,6 +258,12 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, * @blkoff: block offset * @prev: nilfs_bh_assoc struct of the last used buffer * @lock: spin lock protecting @prev + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Non-existent block. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, struct nilfs_bh_assoc *prev, @@ -255,6 +284,8 @@ static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, * @group: group number * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_get_desc_block(struct inode *inode, unsigned long group, @@ -274,6 +305,8 @@ static int nilfs_palloc_get_desc_block(struct inode *inode, * @group: group number * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_get_bitmap_block(struct inode *inode, unsigned long group, @@ -291,6 +324,8 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode, * nilfs_palloc_delete_bitmap_block - delete a bitmap block * @inode: inode of metadata file using this allocator * @group: group number + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_delete_bitmap_block(struct inode *inode, unsigned long group) @@ -309,6 +344,8 @@ static int nilfs_palloc_delete_bitmap_block(struct inode *inode, * @nr: serial number of the entry (e.g. inode number) * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, int create, struct buffer_head **bhp) @@ -325,6 +362,8 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, * nilfs_palloc_delete_entry_block - delete an entry block * @inode: inode of metadata file using this allocator * @nr: serial number of the entry + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) { @@ -336,38 +375,55 @@ static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) } /** - * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor + * nilfs_palloc_group_desc_offset - calculate the byte offset of a group + * descriptor in the folio containing it * @inode: inode of metadata file using this allocator * @group: group number - * @bh: buffer head of the buffer storing the group descriptor block - * @kaddr: kernel address mapped for the page including the buffer + * @bh: buffer head of the group descriptor block + * + * Return: Byte offset in the folio of the group descriptor for @group. */ -static struct nilfs_palloc_group_desc * -nilfs_palloc_block_get_group_desc(const struct inode *inode, - unsigned long group, - const struct buffer_head *bh, void *kaddr) +static size_t nilfs_palloc_group_desc_offset(const struct inode *inode, + unsigned long group, + const struct buffer_head *bh) { - return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) + - group % nilfs_palloc_groups_per_desc_block(inode); + return offset_in_folio(bh->b_folio, bh->b_data) + + sizeof(struct nilfs_palloc_group_desc) * + (group % nilfs_palloc_groups_per_desc_block(inode)); +} + +/** + * nilfs_palloc_bitmap_offset - calculate the byte offset of a bitmap block + * in the folio containing it + * @bh: buffer head of the bitmap block + * + * Return: Byte offset in the folio of the bitmap block for @bh. + */ +static size_t nilfs_palloc_bitmap_offset(const struct buffer_head *bh) +{ + return offset_in_folio(bh->b_folio, bh->b_data); } /** - * nilfs_palloc_block_get_entry - get kernel address of an entry + * nilfs_palloc_entry_offset - calculate the byte offset of an entry in the + * folio containing it * @inode: inode of metadata file using this allocator - * @nr: serial number of the entry (e.g. inode number) - * @bh: buffer head of the buffer storing the entry block - * @kaddr: kernel address mapped for the page including the buffer + * @nr: serial number of the entry (e.g. inode number) + * @bh: buffer head of the entry block + * + * Return: Byte offset in the folio of the entry @nr. */ -void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, - const struct buffer_head *bh, void *kaddr) +size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr, + const struct buffer_head *bh) { - unsigned long entry_offset, group_offset; + unsigned long entry_index_in_group, entry_index_in_block; - nilfs_palloc_group(inode, nr, &group_offset); - entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block; + nilfs_palloc_group(inode, nr, &entry_index_in_group); + entry_index_in_block = entry_index_in_group % + NILFS_MDT(inode)->mi_entries_per_block; - return kaddr + bh_offset(bh) + - entry_offset * NILFS_MDT(inode)->mi_entry_size; + return offset_in_folio(bh->b_folio, bh->b_data) + + entry_index_in_block * NILFS_MDT(inode)->mi_entry_size; } /** @@ -376,11 +432,15 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, * @target: offset number of an entry in the group (start point) * @bsize: size in bits * @lock: spin lock protecting @bitmap + * @wrap: whether to wrap around + * + * Return: Offset number within the group of the found free entry, or + * %-ENOSPC if not found. */ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, unsigned int bsize, - spinlock_t *lock) + spinlock_t *lock, bool wrap) { int pos, end = bsize; @@ -396,6 +456,8 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, end = target; } + if (!wrap) + return -ENOSPC; /* wrap around */ for (pos = 0; pos < end; pos++) { @@ -415,6 +477,9 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, * @inode: inode of metadata file using this allocator * @curr: current group number * @max: maximum number of groups + * + * Return: Number of remaining descriptors (= groups) managed by the descriptor + * block. */ static unsigned long nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, @@ -430,6 +495,8 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, * nilfs_palloc_count_desc_blocks - count descriptor blocks number * @inode: inode of metadata file using this allocator * @desc_blocks: descriptor blocks number [out] + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_count_desc_blocks(struct inode *inode, unsigned long *desc_blocks) @@ -450,6 +517,8 @@ static int nilfs_palloc_count_desc_blocks(struct inode *inode, * MDT file growing * @inode: inode of metadata file using this allocator * @desc_blocks: known current descriptor blocks count + * + * Return: true if a group can be added in the metadata file, false if not. */ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, unsigned long desc_blocks) @@ -464,6 +533,12 @@ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, * @inode: inode of metadata file using this allocator * @nused: current number of used entries * @nmaxp: max number of entries [out] + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ERANGE - Number of entries in use is out of range. */ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) { @@ -494,14 +569,22 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation + * @wrap: whether to wrap around + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - Entries exhausted (No entries available for allocation). + * * %-EROFS - Read only filesystem */ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, - struct nilfs_palloc_req *req) + struct nilfs_palloc_req *req, bool wrap) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned long group, maxgroup, ngroups; unsigned long group_offset, maxgroup_offset; unsigned long n, entries_per_group; @@ -515,7 +598,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, entries_per_group = nilfs_palloc_entries_per_group(inode); for (i = 0; i < ngroups; i += n) { - if (group >= ngroups) { + if (group >= ngroups && wrap) { /* wrap around */ group = 0; maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, @@ -524,54 +607,64 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); if (ret < 0) return ret; - desc_kaddr = kmap(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); + + doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh); + desc = kmap_local_folio(desc_bh->b_folio, doff); n = nilfs_palloc_rest_groups_in_desc_block(inode, group, maxgroup); - for (j = 0; j < n; j++, desc++, group++) { + for (j = 0; j < n; j++, group++, group_offset = 0) { lock = nilfs_mdt_bgl_lock(inode, group); - if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) { - ret = nilfs_palloc_get_bitmap_block( - inode, group, 1, &bitmap_bh); - if (ret < 0) - goto out_desc; - bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(bitmap_bh); - pos = nilfs_palloc_find_available_slot( - bitmap, group_offset, - entries_per_group, lock); - if (pos >= 0) { - /* found a free entry */ - nilfs_palloc_group_desc_add_entries( - desc, lock, -1); - req->pr_entry_nr = - entries_per_group * group + pos; - kunmap(desc_bh->b_page); - kunmap(bitmap_bh->b_page); - - req->pr_desc_bh = desc_bh; - req->pr_bitmap_bh = bitmap_bh; - return 0; - } - kunmap(bitmap_bh->b_page); - brelse(bitmap_bh); + if (nilfs_palloc_group_desc_nfrees(&desc[j], lock) == 0) + continue; + + kunmap_local(desc); + ret = nilfs_palloc_get_bitmap_block(inode, group, 1, + &bitmap_bh); + if (unlikely(ret < 0)) { + brelse(desc_bh); + return ret; } - group_offset = 0; + /* + * Re-kmap the folio containing the first (and + * subsequent) group descriptors. + */ + desc = kmap_local_folio(desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(bitmap_bh); + bitmap = kmap_local_folio(bitmap_bh->b_folio, boff); + pos = nilfs_palloc_find_available_slot( + bitmap, group_offset, entries_per_group, lock, + wrap); + /* + * Since the search for a free slot in the second and + * subsequent bitmap blocks always starts from the + * beginning, the wrap flag only has an effect on the + * first search. + */ + kunmap_local(bitmap); + if (pos >= 0) + goto found; + + brelse(bitmap_bh); } - kunmap(desc_bh->b_page); + kunmap_local(desc); brelse(desc_bh); } /* no entries left */ return -ENOSPC; - out_desc: - kunmap(desc_bh->b_page); - brelse(desc_bh); - return ret; +found: + /* found a free entry */ + nilfs_palloc_group_desc_add_entries(&desc[j], lock, -1); + req->pr_entry_nr = entries_per_group * group + pos; + kunmap_local(desc); + + req->pr_desc_bh = desc_bh; + req->pr_bitmap_bh = bitmap_bh; + return 0; } /** @@ -598,18 +691,18 @@ void nilfs_palloc_commit_alloc_entry(struct inode *inode, void nilfs_palloc_commit_free_entry(struct inode *inode, struct nilfs_palloc_req *req) { - struct nilfs_palloc_group_desc *desc; unsigned long group, group_offset; + size_t doff, boff; + struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap(req->pr_desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc(inode, group, - req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); + doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh); + desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh); + bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff); lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) @@ -620,8 +713,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, else nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap(req->pr_bitmap_bh->b_page); - kunmap(req->pr_desc_bh->b_page); + kunmap_local(bitmap); + kunmap_local(desc); mark_buffer_dirty(req->pr_desc_bh); mark_buffer_dirty(req->pr_bitmap_bh); @@ -640,17 +733,17 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { struct nilfs_palloc_group_desc *desc; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned char *bitmap; unsigned long group, group_offset; spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap(req->pr_desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc(inode, group, - req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); + doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh); + desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh); + bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff); lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) @@ -661,8 +754,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, else nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap(req->pr_bitmap_bh->b_page); - kunmap(req->pr_desc_bh->b_page); + kunmap_local(bitmap); + kunmap_local(desc); brelse(req->pr_bitmap_bh); brelse(req->pr_desc_bh); @@ -676,6 +769,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the removal + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_prepare_free_entry(struct inode *inode, struct nilfs_palloc_req *req) @@ -720,13 +815,15 @@ void nilfs_palloc_abort_free_entry(struct inode *inode, * @inode: inode of metadata file using this allocator * @entry_nrs: array of entry numbers to be deallocated * @nitems: number of entries stored in @entry_nrs + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned long group, group_offset; __u64 group_min_nr, last_nrs[8]; const unsigned long epg = nilfs_palloc_entries_per_group(inode); @@ -754,8 +851,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) /* Get the first entry number of the group */ group_min_nr = (__u64)group * epg; - bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(bitmap_bh); + boff = nilfs_palloc_bitmap_offset(bitmap_bh); + bitmap = kmap_local_folio(bitmap_bh->b_folio, boff); lock = nilfs_mdt_bgl_lock(inode, group); j = i; @@ -800,7 +897,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) entry_start = rounddown(group_offset, epb); } while (true); - kunmap(bitmap_bh->b_page); + kunmap_local(bitmap); mark_buffer_dirty(bitmap_bh); brelse(bitmap_bh); @@ -814,11 +911,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) inode->i_ino); } - desc_kaddr = kmap_atomic(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); + doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh); + desc = kmap_local_folio(desc_bh->b_folio, doff); nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n); - kunmap_atomic(desc_kaddr); + kunmap_local(desc); mark_buffer_dirty(desc_bh); nilfs_mdt_mark_dirty(inode); brelse(desc_bh); diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index b667e869ac07..046d876ea3e0 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -21,6 +21,8 @@ * * The number of entries per group is defined by the number of bits * that a bitmap block can maintain. + * + * Return: Number of entries per group. */ static inline unsigned long nilfs_palloc_entries_per_group(const struct inode *inode) @@ -31,13 +33,13 @@ nilfs_palloc_entries_per_group(const struct inode *inode) int nilfs_palloc_init_blockgroup(struct inode *, unsigned int); int nilfs_palloc_get_entry_block(struct inode *, __u64, int, struct buffer_head **); -void *nilfs_palloc_block_get_entry(const struct inode *, __u64, - const struct buffer_head *, void *); +size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr, + const struct buffer_head *bh); int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); /** - * nilfs_palloc_req - persistent allocator request and reply + * struct nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap @@ -50,8 +52,8 @@ struct nilfs_palloc_req { struct buffer_head *pr_entry_bh; }; -int nilfs_palloc_prepare_alloc_entry(struct inode *, - struct nilfs_palloc_req *); +int nilfs_palloc_prepare_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req, bool wrap); void nilfs_palloc_commit_alloc_entry(struct inode *, struct nilfs_palloc_req *); void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 798a2c1b38c6..ccc1a7aa52d2 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -47,17 +47,14 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, * @ptrp: place to store the value associated to @key * * Description: nilfs_bmap_lookup_at_level() finds a record whose key - * matches @key in the block at @level of the bmap. - * - * Return Value: On success, 0 is returned and the record associated with @key - * is stored in the place pointed by @ptrp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A record associated with @key does not exist. + * matches @key in the block at @level of the bmap. The record associated + * with @key is stored in the place pointed to by @ptrp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A record associated with @key does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, __u64 *ptrp) @@ -67,20 +64,28 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); - if (ret < 0) { - ret = nilfs_bmap_convert_error(bmap, __func__, ret); + if (ret < 0) goto out; - } + if (NILFS_BMAP_USE_VBN(bmap)) { ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, &blocknr); if (!ret) *ptrp = blocknr; + else if (ret == -ENOENT) { + /* + * If there was no valid entry in DAT for the block + * address obtained by b_ops->bop_lookup, then pass + * internal code -EINVAL to nilfs_bmap_convert_error + * to treat it as metadata corruption. + */ + ret = -EINVAL; + } } out: up_read(&bmap->b_sem); - return ret; + return nilfs_bmap_convert_error(bmap, __func__, ret); } int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, @@ -130,14 +135,11 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) * Description: nilfs_bmap_insert() inserts the new key-record pair specified * by @key and @rec into @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EEXIST - A record associated with @key already exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - A record associated with @key already exists. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec) { @@ -185,14 +187,11 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap * starting from @start, and stores it to @keyp if found. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No valid entry was found + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid entry was found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp) { @@ -228,14 +227,11 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp) * Description: nilfs_bmap_delete() deletes the key-record pair specified by * @key from @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A record associated with @key does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A record associated with @key does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key) { @@ -282,12 +278,10 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key) * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are * greater than or equal to @key from @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key) { @@ -322,12 +316,10 @@ void nilfs_bmap_clear(struct nilfs_bmap *bmap) * Description: nilfs_bmap_propagate() marks the buffers that directly or * indirectly refer to the block specified by @bh dirty. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) { @@ -341,7 +333,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) } /** - * nilfs_bmap_lookup_dirty_buffers - + * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers * @bmap: bmap * @listp: pointer to buffer head list */ @@ -354,22 +346,22 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, /** * nilfs_bmap_assign - assign a new block number to a block - * @bmap: bmap - * @bh: pointer to buffer head + * @bmap: bmap + * @bh: place to store a pointer to the buffer head to which a block + * address is assigned (in/out) * @blocknr: block number - * @binfo: block information + * @binfo: block information * * Description: nilfs_bmap_assign() assigns the block number @blocknr to the - * buffer specified by @bh. - * - * Return Value: On success, 0 is returned and the buffer head of a newly - * create buffer and the block information associated with the buffer are - * stored in the place pointed by @bh and @binfo, respectively. On error, one - * of the following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * buffer specified by @bh. The block information is stored in the memory + * pointed to by @binfo, and the buffer head may be replaced as a block + * address is assigned, in which case a pointer to the new buffer head is + * stored in the memory pointed to by @bh. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_assign(struct nilfs_bmap *bmap, struct buffer_head **bh, @@ -394,12 +386,10 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap, * Description: nilfs_bmap_mark() marks the block specified by @key and @level * as dirty. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) { @@ -422,7 +412,7 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) * Description: nilfs_test_and_clear() is the atomic operation to test and * clear the dirty state of @bmap. * - * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. + * Return: 1 if @bmap is dirty, or 0 if clear. */ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) { @@ -442,15 +432,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { - struct buffer_head *pbh; - __u64 key; - - key = page_index(bh->b_page) << (PAGE_SHIFT - - bmap->b_inode->i_blkbits); - for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) - key++; + loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh); - return key; + return pos >> bmap->b_inode->i_blkbits; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) @@ -488,10 +472,10 @@ static struct lock_class_key nilfs_bmap_mdt_lock_key; * * Description: nilfs_bmap_read() initializes the bmap @bmap. * - * Return Value: On success, 0 is returned. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (corrupted bmap). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { @@ -540,13 +524,10 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) */ void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { - down_write(&bmap->b_sem); memcpy(raw_inode->i_bmap, bmap->b_u.u_data, NILFS_INODE_BMAP_SIZE * sizeof(__le64)); if (bmap->b_inode->i_ino == NILFS_DAT_INO) bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; - - up_write(&bmap->b_sem); } void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 608168a5cb88..4656df392722 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -44,6 +44,19 @@ struct nilfs_bmap_stats { /** * struct nilfs_bmap_operations - bmap operation table + * @bop_lookup: single block search operation + * @bop_lookup_contig: consecutive block search operation + * @bop_insert: block insertion operation + * @bop_delete: block delete operation + * @bop_clear: block mapping resource release operation + * @bop_propagate: operation to propagate dirty state towards the + * mapping root + * @bop_lookup_dirty_buffers: operation to collect dirty block buffers + * @bop_assign: disk block address assignment operation + * @bop_mark: operation to mark in-use blocks as dirty for + * relocation by GC + * @bop_seek_key: find valid block key operation + * @bop_last_key: find last valid block key operation */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); @@ -66,7 +79,7 @@ struct nilfs_bmap_operations { int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *); int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); - /* The following functions are internal use only. */ + /* private: internal use only */ int (*bop_check_insert)(const struct nilfs_bmap *, __u64); int (*bop_check_delete)(struct nilfs_bmap *, __u64); int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); @@ -74,9 +87,8 @@ struct nilfs_bmap_operations { #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) -#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) -#define NILFS_BMAP_NEW_PTR_INIT \ - (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) +#define NILFS_BMAP_KEY_BIT BITS_PER_LONG +#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1)) static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) { diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index e74fda212620..568367129092 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -35,6 +35,7 @@ void nilfs_init_btnc_inode(struct inode *btnc_inode) ii->i_flags = 0; memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); + btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; } void nilfs_btnode_cache_clear(struct address_space *btnc) @@ -51,22 +52,36 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) - return NULL; + return ERR_PTR(-ENOMEM); if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { - brelse(bh); - BUG(); + /* + * The block buffer at the specified new address was already + * in use. This can happen if it is a virtual block number + * and has been reallocated due to corruption of the bitmap + * used to manage its allocation state (if not, the buffer + * clearing of an abandoned b-tree node is missing somewhere). + */ + nilfs_error(inode->i_sb, + "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)", + (unsigned long long)blocknr, inode->i_ino); + goto failed; } memset(bh->b_data, 0, i_blocksize(inode)); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); return bh; + +failed: + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + brelse(bh); + return ERR_PTR(-EIO); } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, @@ -75,7 +90,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, { struct buffer_head *bh; struct inode *inode = btnc->host; - struct page *page; + struct folio *folio; int err; bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); @@ -83,7 +98,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, return -ENOMEM; err = -EEXIST; /* internal code */ - page = bh->b_page; + folio = bh->b_folio; if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -118,7 +133,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, goto found; } set_buffer_mapped(bh); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -130,8 +144,8 @@ found: *pbh = bh; out_locked: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return err; } @@ -145,30 +159,52 @@ out_locked: void nilfs_btnode_delete(struct buffer_head *bh) { struct address_space *mapping; - struct page *page = bh->b_page; - pgoff_t index = page_index(page); + struct folio *folio = bh->b_folio; + pgoff_t index = folio->index; int still_dirty; - get_page(page); - lock_page(page); - wait_on_page_writeback(page); + folio_get(folio); + folio_lock(folio); + folio_wait_writeback(folio); nilfs_forget_buffer(bh); - still_dirty = PageDirty(page); - mapping = page->mapping; - unlock_page(page); - put_page(page); + still_dirty = folio_test_dirty(folio); + mapping = folio->mapping; + folio_unlock(folio); + folio_put(folio); if (!still_dirty && mapping) invalidate_inode_pages2_range(mapping, index, index); } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -185,23 +221,23 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, ctxt->newbh = NULL; if (inode->i_blkbits == PAGE_SHIFT) { - struct page *opage = obh->b_page; - lock_page(opage); + struct folio *ofolio = obh->b_folio; + folio_lock(ofolio); retry: - /* BUG_ON(oldkey != obh->b_page->index); */ - if (unlikely(oldkey != opage->index)) - NILFS_PAGE_BUG(opage, + /* BUG_ON(oldkey != obh->b_folio->index); */ + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); xa_lock_irq(&btnc->i_pages); - err = __xa_insert(&btnc->i_pages, newkey, opage, GFP_NOFS); + err = __xa_insert(&btnc->i_pages, newkey, ofolio, GFP_NOFS); xa_unlock_irq(&btnc->i_pages); /* - * Note: page->index will not change to newkey until + * Note: folio->index will not change to newkey until * nilfs_btnode_commit_change_key() will be called. - * To protect the page in intermediate state, the page lock + * To protect the folio in intermediate state, the folio lock * is held. */ if (!err) @@ -213,40 +249,53 @@ retry: if (!err) goto retry; /* fallback to copy mode */ - unlock_page(opage); + folio_unlock(ofolio); } nbh = nilfs_btnode_create_block(btnc, newkey); - if (!nbh) - return -ENOMEM; + if (IS_ERR(nbh)) + return PTR_ERR(nbh); BUG_ON(nbh == obh); ctxt->newbh = nbh; return 0; failed_unlock: - unlock_page(obh->b_page); + folio_unlock(obh->b_folio); return err; } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; - struct page *opage; + struct folio *ofolio; if (oldkey == newkey) return; if (nbh == NULL) { /* blocksize == pagesize */ - opage = obh->b_page; - if (unlikely(oldkey != opage->index)) - NILFS_PAGE_BUG(opage, + ofolio = obh->b_folio; + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); @@ -257,8 +306,8 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, __xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY); xa_unlock_irq(&btnc->i_pages); - opage->index = obh->b_blocknr = newkey; - unlock_page(opage); + ofolio->index = obh->b_blocknr = newkey; + folio_unlock(ofolio); } else { nilfs_copy_buffer(nbh, obh); mark_buffer_dirty(nbh); @@ -270,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -284,7 +344,15 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, if (nbh == NULL) { /* blocksize == pagesize */ xa_erase_irq(&btnc->i_pages, newkey); - unlock_page(ctxt->bh->b_page); - } else - brelse(nbh); + folio_unlock(ctxt->bh->b_folio); + } else { + /* + * When canceling a buffer that a prepare operation has + * allocated to copy a node block to another location, use + * nilfs_btnode_delete() to initialize and release the buffer + * so that the buffer flags will not be in an inconsistent + * state when it is reallocated. + */ + nilfs_btnode_delete(nbh); + } } diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 40ce92a332fe..dd0c8e560ef6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); - if (!bh) - return -ENOMEM; + if (IS_ERR(bh)) + return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; @@ -334,7 +334,7 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, * @inode: host inode of btree * @blocknr: block number * - * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + * Return: 0 if normal, 1 if the node is broken. */ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, size_t size, struct inode *inode, @@ -350,7 +350,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || - nchildren < 0 || + nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", @@ -366,7 +366,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, * @node: btree root node to be examined * @inode: host inode of btree * - * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + * Return: 0 if normal, 1 if the root node is broken. */ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, struct inode *inode) @@ -381,7 +381,8 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || - nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || + (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); @@ -398,7 +399,7 @@ int nilfs_btree_broken_node_block(struct buffer_head *bh) if (buffer_nilfs_checked(bh)) return 0; - inode = bh->b_page->mapping->host; + inode = bh->b_folio->mapping->host; ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, bh->b_size, inode, bh->b_blocknr); if (likely(!ret)) @@ -651,8 +652,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, * @minlevel: start level * @nextkey: place to store the next valid key * - * Return Value: If a next key was found, 0 is returned. Otherwise, - * -ENOENT is returned. + * Return: 0 if the next key was found, %-ENOENT if not found. */ static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, @@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr = blocknr; } cnt = 1; @@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) @@ -781,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, out: nilfs_btree_free_path(path); return ret; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, @@ -1653,13 +1658,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) int nchildren, ret; root = nilfs_btree_get_root(btree); + nchildren = nilfs_btree_node_get_nchildren(root); + if (unlikely(nchildren == 0)) + return 0; + switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, @@ -1668,12 +1676,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } - nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; @@ -1852,13 +1860,22 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, } /** - * nilfs_btree_convert_and_insert - - * @bmap: - * @key: - * @ptr: - * @keys: - * @ptrs: - * @n: + * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree + * @btree: NILFS B-tree structure + * @key: Key of the new entry to be inserted + * @ptr: Pointer (block number) associated with the key to be inserted + * @keys: Array of keys to be inserted in addition to @key + * @ptrs: Array of pointers associated with @keys + * @n: Number of keys and pointers in @keys and @ptrs + * + * This function is used to insert a new entry specified by @key and @ptr, + * along with additional entries specified by @keys and @ptrs arrays, into a + * NILFS B-tree. + * It prepares the necessary changes by allocating the required blocks and any + * necessary intermediate nodes. It converts configurations from other forms of + * block mapping (the one that currently exists is direct mapping) to a B-tree. + * + * Return: 0 on success or a negative error code on failure. */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, @@ -2085,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } @@ -2150,7 +2169,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; - struct pagevec pvec; + struct folio_batch fbatch; struct buffer_head *bh, *head; pgoff_t index = 0; int level, i; @@ -2160,19 +2179,19 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, level++) INIT_LIST_HEAD(&lists[level]); - pagevec_init(&pvec); + folio_batch_init(&fbatch); - while (pagevec_lookup_tag(&pvec, btcache, &index, - PAGECACHE_TAG_DIRTY)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - bh = head = page_buffers(pvec.pages[i]); + while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, + PAGECACHE_TAG_DIRTY, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + bh = head = folio_buffers(fbatch.folios[i]); do { if (buffer_dirty(bh)) nilfs_btree_add_dirty_buffer(btree, lists, bh); } while ((bh = bh->b_this_page) != head); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } @@ -2219,6 +2238,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, /* on-disk format */ binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 92868e1a48ca..2a220f716c91 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -24,6 +24,7 @@ * @bp_index: index of child node * @bp_oldreq: ptr end request for old ptr * @bp_newreq: ptr alloc request for new ptr + * @bp_ctxt: context information for changing the key of a b-tree node block * @bp_op: rebalance operation */ struct nilfs_btree_path { diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 9ebefb3acb0e..4bbdc832d7f2 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -28,7 +28,7 @@ nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) { __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; - do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); + tcno = div64_ul(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); return (unsigned long)tcno; } @@ -68,54 +68,41 @@ static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile, static unsigned int nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile, struct buffer_head *bh, - void *kaddr, unsigned int n) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp; unsigned int count; + cp = kmap_local_folio(bh->b_folio, + offset_in_folio(bh->b_folio, bh->b_data)); count = le32_to_cpu(cp->cp_checkpoints_count) + n; cp->cp_checkpoints_count = cpu_to_le32(count); + kunmap_local(cp); return count; } static unsigned int nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, struct buffer_head *bh, - void *kaddr, unsigned int n) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp; unsigned int count; + cp = kmap_local_folio(bh->b_folio, + offset_in_folio(bh->b_folio, bh->b_data)); WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); count = le32_to_cpu(cp->cp_checkpoints_count) - n; cp->cp_checkpoints_count = cpu_to_le32(count); + kunmap_local(cp); return count; } -static inline struct nilfs_cpfile_header * -nilfs_cpfile_block_get_header(const struct inode *cpfile, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh); -} - -static struct nilfs_checkpoint * -nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) * - NILFS_MDT(cpfile)->mi_entry_size; -} - static void nilfs_cpfile_block_init(struct inode *cpfile, struct buffer_head *bh, - void *kaddr) + void *from) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp = from; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; int n = nilfs_cpfile_checkpoints_per_block(cpfile); @@ -125,10 +112,65 @@ static void nilfs_cpfile_block_init(struct inode *cpfile, } } -static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, - struct buffer_head **bhp) +/** + * nilfs_cpfile_checkpoint_offset - calculate the byte offset of a checkpoint + * entry in the folio containing it + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @bh: buffer head of block containing checkpoint indexed by @cno + * + * Return: Byte offset in the folio of the checkpoint specified by @cno. + */ +static size_t nilfs_cpfile_checkpoint_offset(const struct inode *cpfile, + __u64 cno, + struct buffer_head *bh) +{ + return offset_in_folio(bh->b_folio, bh->b_data) + + nilfs_cpfile_get_offset(cpfile, cno) * + NILFS_MDT(cpfile)->mi_entry_size; +} + +/** + * nilfs_cpfile_cp_snapshot_list_offset - calculate the byte offset of a + * checkpoint snapshot list in the folio + * containing it + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @bh: buffer head of block containing checkpoint indexed by @cno + * + * Return: Byte offset in the folio of the checkpoint snapshot list specified + * by @cno. + */ +static size_t nilfs_cpfile_cp_snapshot_list_offset(const struct inode *cpfile, + __u64 cno, + struct buffer_head *bh) +{ + return nilfs_cpfile_checkpoint_offset(cpfile, cno, bh) + + offsetof(struct nilfs_checkpoint, cp_snapshot_list); +} + +/** + * nilfs_cpfile_ch_snapshot_list_offset - calculate the byte offset of the + * snapshot list in the header + * + * Return: Byte offset in the folio of the checkpoint snapshot list + */ +static size_t nilfs_cpfile_ch_snapshot_list_offset(void) +{ + return offsetof(struct nilfs_cpfile_header, ch_snapshot_list); +} + +static int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(cpfile->i_sb, + "missing header block in checkpoint metadata"); + err = -EIO; + } + return err; } static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, @@ -149,14 +191,11 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, * @cnop: place to store the next checkpoint number * @bhp: place to store a pointer to buffer_head struct * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error - * - * %-ENOENT - no block exists in the range. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - no block exists in the range. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile, __u64 start_cno, __u64 end_cno, @@ -187,106 +226,215 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, } /** - * nilfs_cpfile_get_checkpoint - get a checkpoint - * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * @create: create flag - * @cpp: pointer to a checkpoint - * @bhp: pointer to a buffer head + * nilfs_cpfile_read_checkpoint - read a checkpoint entry in cpfile + * @cpfile: checkpoint file inode + * @cno: number of checkpoint entry to read + * @root: nilfs root object + * @ifile: ifile's inode to read and attach to @root * - * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint - * specified by @cno. A new checkpoint will be created if @cno is the current - * checkpoint number and @create is nonzero. + * This function imports checkpoint information from the checkpoint file and + * stores it to the inode file given by @ifile and the nilfs root object + * given by @root. * - * Return Value: On success, 0 is returned, and the checkpoint and the - * buffer head of the buffer on which the checkpoint is located are stored in - * the place pointed by @cpp and @bhp, respectively. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid checkpoint. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + */ +int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, struct inode *ifile) +{ + struct buffer_head *cp_bh; + struct nilfs_checkpoint *cp; + size_t offset; + int ret; + + if (cno < 1 || cno > nilfs_mdt_cno(cpfile)) + return -EINVAL; + + down_read(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + ret = -EINVAL; + goto out_sem; + } + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); + if (nilfs_checkpoint_invalid(cp)) { + ret = -EINVAL; + goto put_cp; + } + + ret = nilfs_read_inode_common(ifile, &cp->cp_ifile_inode); + if (unlikely(ret)) { + /* + * Since this inode is on a checkpoint entry, treat errors + * as metadata corruption. + */ + nilfs_err(cpfile->i_sb, + "ifile inode (checkpoint number=%llu) corrupted", + (unsigned long long)cno); + ret = -EIO; + goto put_cp; + } + + /* Configure the nilfs root object */ + atomic64_set(&root->inodes_count, le64_to_cpu(cp->cp_inodes_count)); + atomic64_set(&root->blocks_count, le64_to_cpu(cp->cp_blocks_count)); + root->ifile = ifile; + +put_cp: + kunmap_local(cp); + brelse(cp_bh); +out_sem: + up_read(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +/** + * nilfs_cpfile_create_checkpoint - create a checkpoint entry on cpfile + * @cpfile: checkpoint file inode + * @cno: number of checkpoint to set up * - * %-ENOENT - No such checkpoint. + * This function creates a checkpoint with the number specified by @cno on + * cpfile. If the specified checkpoint entry already exists due to a past + * failure, it will be reused without returning an error. + * In either case, the buffer of the block containing the checkpoint entry + * and the cpfile inode are made dirty for inclusion in the write log. * - * %-EINVAL - invalid checkpoint. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + * * %-EROFS - Read only filesystem */ -int nilfs_cpfile_get_checkpoint(struct inode *cpfile, - __u64 cno, - int create, - struct nilfs_checkpoint **cpp, - struct buffer_head **bhp) +int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) { struct buffer_head *header_bh, *cp_bh; struct nilfs_cpfile_header *header; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; - if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) || - (cno < nilfs_mdt_cno(cpfile) && create))) - return -EINVAL; + if (WARN_ON_ONCE(cno < 1)) + return -EIO; down_write(&NILFS_MDT(cpfile)->mi_sem); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) + if (unlikely(ret < 0)) goto out_sem; - ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh); - if (ret < 0) + + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh); + if (unlikely(ret < 0)) goto out_header; - kaddr = kmap(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { - if (!create) { - kunmap(cp_bh->b_page); - brelse(cp_bh); - ret = -ENOENT; - goto out_header; - } /* a newly-created checkpoint */ nilfs_checkpoint_clear_invalid(cp); + kunmap_local(cp); if (!nilfs_cpfile_is_in_first(cpfile, cno)) nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, - kaddr, 1); - mark_buffer_dirty(cp_bh); + 1); - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, - kaddr); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_ncheckpoints, 1); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); - nilfs_mdt_mark_dirty(cpfile); + } else { + kunmap_local(cp); } - if (cpp != NULL) - *cpp = cp; - *bhp = cp_bh; + /* Force the buffer and the inode to become dirty */ + mark_buffer_dirty(cp_bh); + brelse(cp_bh); + nilfs_mdt_mark_dirty(cpfile); - out_header: +out_header: brelse(header_bh); - out_sem: +out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; } /** - * nilfs_cpfile_put_checkpoint - put a checkpoint - * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * @bh: buffer head + * nilfs_cpfile_finalize_checkpoint - fill in a checkpoint entry in cpfile + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @root: nilfs root object + * @blkinc: number of blocks added by this checkpoint + * @ctime: checkpoint creation time + * @minor: minor checkpoint flag * - * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint - * specified by @cno. @bh must be the buffer head which has been returned by - * a previous call to nilfs_cpfile_get_checkpoint() with @cno. + * This function completes the checkpoint entry numbered by @cno in the + * cpfile with the data given by the arguments @root, @blkinc, @ctime, and + * @minor. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). */ -void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno, - struct buffer_head *bh) +int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, __u64 blkinc, + time64_t ctime, bool minor) { - kunmap(bh->b_page); - brelse(bh); + struct buffer_head *cp_bh; + struct nilfs_checkpoint *cp; + size_t offset; + int ret; + + if (WARN_ON_ONCE(cno < 1)) + return -EIO; + + down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + goto error; + goto out_sem; + } + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); + if (unlikely(nilfs_checkpoint_invalid(cp))) { + kunmap_local(cp); + brelse(cp_bh); + goto error; + } + + cp->cp_snapshot_list.ssl_next = 0; + cp->cp_snapshot_list.ssl_prev = 0; + cp->cp_inodes_count = cpu_to_le64(atomic64_read(&root->inodes_count)); + cp->cp_blocks_count = cpu_to_le64(atomic64_read(&root->blocks_count)); + cp->cp_nblk_inc = cpu_to_le64(blkinc); + cp->cp_create = cpu_to_le64(ctime); + cp->cp_cno = cpu_to_le64(cno); + + if (minor) + nilfs_checkpoint_set_minor(cp); + else + nilfs_checkpoint_clear_minor(cp); + + nilfs_write_inode_common(root->ifile, &cp->cp_ifile_inode); + nilfs_bmap_write(NILFS_I(root->ifile)->i_bmap, &cp->cp_ifile_inode); + + kunmap_local(cp); + brelse(cp_bh); +out_sem: + up_write(&NILFS_MDT(cpfile)->mi_sem); + return ret; + +error: + nilfs_error(cpfile->i_sb, + "checkpoint finalization failed due to metadata corruption."); + ret = -EIO; + goto out_sem; } /** @@ -299,14 +447,11 @@ void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno, * the period from @start to @end, excluding @end itself. The checkpoints * which have been already deleted are ignored. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - invalid checkpoints. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid checkpoints. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, __u64 start, @@ -317,6 +462,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, struct nilfs_checkpoint *cp; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cno; + size_t offset; void *kaddr; unsigned long tnicps; int ret, ncps, nicps, nss, count, i; @@ -347,9 +493,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; } - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint( - cpfile, cno, cp_bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kaddr = kmap_local_folio(cp_bh->b_folio, offset); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { if (nilfs_checkpoint_snapshot(cp)) { @@ -359,43 +504,42 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, nicps++; } } - if (nicps > 0) { - tnicps += nicps; - mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_dirty(cpfile); - if (!nilfs_cpfile_is_in_first(cpfile, cno)) { - count = - nilfs_cpfile_block_sub_valid_checkpoints( - cpfile, cp_bh, kaddr, nicps); - if (count == 0) { - /* make hole */ - kunmap_atomic(kaddr); - brelse(cp_bh); - ret = - nilfs_cpfile_delete_checkpoint_block( - cpfile, cno); - if (ret == 0) - continue; - nilfs_err(cpfile->i_sb, - "error %d deleting checkpoint block", - ret); - break; - } - } + kunmap_local(kaddr); + + if (nicps <= 0) { + brelse(cp_bh); + continue; } - kunmap_atomic(kaddr); + tnicps += nicps; + mark_buffer_dirty(cp_bh); + nilfs_mdt_mark_dirty(cpfile); + if (nilfs_cpfile_is_in_first(cpfile, cno)) { + brelse(cp_bh); + continue; + } + + count = nilfs_cpfile_block_sub_valid_checkpoints(cpfile, cp_bh, + nicps); brelse(cp_bh); + if (count) + continue; + + /* Delete the block if there are no more valid checkpoints */ + ret = nilfs_cpfile_delete_checkpoint_block(cpfile, cno); + if (unlikely(ret)) { + nilfs_err(cpfile->i_sb, + "error %d deleting checkpoint block", ret); + break; + } } if (tnicps > 0) { - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, - kaddr); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); - kunmap_atomic(kaddr); + kunmap_local(header); } brelse(header_bh); @@ -429,6 +573,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, struct buffer_head *bh; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; + size_t offset; void *kaddr; int n, ret; int ncps, i; @@ -447,8 +592,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, } ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); - kaddr = kmap_atomic(bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh); + cp = kaddr = kmap_local_folio(bh->b_folio, offset); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { if (!nilfs_checkpoint_invalid(cp)) { nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, @@ -457,7 +602,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, n++; } } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); } @@ -482,7 +627,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, struct nilfs_cpinfo *ci = buf; __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; - void *kaddr; + size_t offset; int n = 0, ret; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -491,10 +636,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + header = kmap_local_folio(bh->b_folio, 0); curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(bh); if (curr == 0) { ret = 0; @@ -512,9 +656,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = 0; /* No snapshots (started from a hole block) */ goto out; } - kaddr = kmap_atomic(bh->b_page); + offset = nilfs_cpfile_checkpoint_offset(cpfile, curr, bh); + cp = kmap_local_folio(bh->b_folio, offset); while (n < nci) { - cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); curr = ~(__u64)0; /* Terminator */ if (unlikely(nilfs_checkpoint_invalid(cp) || !nilfs_checkpoint_snapshot(cp))) @@ -526,9 +670,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, if (next == 0) break; /* reach end of the snapshot list */ + kunmap_local(cp); next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { - kunmap_atomic(kaddr); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); @@ -536,12 +680,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, WARN_ON(ret == -ENOENT); goto out; } - kaddr = kmap_atomic(bh->b_page); } + offset = nilfs_cpfile_checkpoint_offset(cpfile, next, bh); + cp = kmap_local_folio(bh->b_folio, offset); curr = next; curr_blkoff = next_blkoff; } - kunmap_atomic(kaddr); + kunmap_local(cp); brelse(bh); *cnop = curr; ret = n; @@ -552,11 +697,29 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, } /** - * nilfs_cpfile_get_cpinfo - - * @cpfile: - * @cno: - * @ci: - * @nci: + * nilfs_cpfile_get_cpinfo - get information on checkpoints + * @cpfile: checkpoint file inode + * @cnop: place to pass a starting checkpoint number and receive a + * checkpoint number to continue the search + * @mode: mode of checkpoints that the caller wants to retrieve + * @buf: buffer for storing checkpoints' information + * @cisz: byte size of one checkpoint info item in array + * @nci: number of checkpoint info items to retrieve + * + * nilfs_cpfile_get_cpinfo() searches for checkpoints in @mode state + * starting from the checkpoint number stored in @cnop, and stores + * information about found checkpoints in @buf. + * The buffer pointed to by @buf must be large enough to store information + * for @nci checkpoints. If at least one checkpoint information is + * successfully retrieved, @cnop is updated to point to the checkpoint + * number to continue searching. + * + * Return: Count of checkpoint info items stored in the output buffer on + * success, or one of the following negative error codes on failure: + * * %-EINVAL - Invalid checkpoint mode. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Invalid checkpoint number specified. */ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, @@ -573,9 +736,16 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, } /** - * nilfs_cpfile_delete_checkpoint - - * @cpfile: - * @cno: + * nilfs_cpfile_delete_checkpoint - delete a checkpoint + * @cpfile: checkpoint file inode + * @cno: checkpoint number to delete + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Checkpoint in use (snapshot specified). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid checkpoint found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { @@ -594,26 +764,6 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); } -static struct nilfs_snapshot_list * -nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile, - __u64 cno, - struct buffer_head *bh, - void *kaddr) -{ - struct nilfs_cpfile_header *header; - struct nilfs_checkpoint *cp; - struct nilfs_snapshot_list *list; - - if (cno != 0) { - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); - list = &cp->cp_snapshot_list; - } else { - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); - list = &header->ch_snapshot_list; - } - return list; -} - static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) { struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh; @@ -622,94 +772,103 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) struct nilfs_snapshot_list *list; __u64 curr, prev; unsigned long curr_blkoff, prev_blkoff; - void *kaddr; + size_t offset, curr_list_offset, prev_list_offset; int ret; if (cno == 0) return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (unlikely(ret < 0)) + goto out_sem; + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) - goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + goto out_header; + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } if (nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } - kunmap_atomic(kaddr); + kunmap_local(cp); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) - goto out_cp; - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* + * Find the last snapshot before the checkpoint being changed to + * snapshot mode by going backwards through the snapshot list. + * Set "prev" to its checkpoint number, or 0 if not found. + */ + header = kmap_local_folio(header_bh->b_folio, 0); list = &header->ch_snapshot_list; curr_bh = header_bh; get_bh(curr_bh); curr = 0; curr_blkoff = 0; + curr_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); prev = le64_to_cpu(list->ssl_prev); while (prev > cno) { prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); curr = prev; + kunmap_local(list); if (curr_blkoff != prev_blkoff) { - kunmap_atomic(kaddr); brelse(curr_bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &curr_bh); - if (ret < 0) - goto out_header; - kaddr = kmap_atomic(curr_bh->b_page); + if (unlikely(ret < 0)) + goto out_cp; } + curr_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, curr, curr_bh); + list = kmap_local_folio(curr_bh->b_folio, curr_list_offset); curr_blkoff = prev_blkoff; - cp = nilfs_cpfile_block_get_checkpoint( - cpfile, curr, curr_bh, kaddr); - list = &cp->cp_snapshot_list; prev = le64_to_cpu(list->ssl_prev); } - kunmap_atomic(kaddr); + kunmap_local(list); if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, &prev_bh); if (ret < 0) goto out_curr; + + prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, prev, prev_bh); } else { prev_bh = header_bh; get_bh(prev_bh); + prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } - kaddr = kmap_atomic(curr_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, curr, curr_bh, kaddr); + /* Update the list entry for the next snapshot */ + list = kmap_local_folio(curr_bh->b_folio, curr_list_offset); list->ssl_prev = cpu_to_le64(cno); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + /* Update the checkpoint being changed to a snapshot */ + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); nilfs_checkpoint_set_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(cp); - kaddr = kmap_atomic(prev_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, prev, prev_bh, kaddr); + /* Update the list entry for the previous snapshot */ + list = kmap_local_folio(prev_bh->b_folio, prev_list_offset); list->ssl_next = cpu_to_le64(cno); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* Update the statistics in the header */ + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_nsnapshots, 1); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(prev_bh); mark_buffer_dirty(curr_bh); @@ -722,12 +881,12 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) out_curr: brelse(curr_bh); - out_header: - brelse(header_bh); - out_cp: brelse(cp_bh); + out_header: + brelse(header_bh); + out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; @@ -740,79 +899,87 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) struct nilfs_checkpoint *cp; struct nilfs_snapshot_list *list; __u64 next, prev; - void *kaddr; + size_t offset, next_list_offset, prev_list_offset; int ret; if (cno == 0) return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (unlikely(ret < 0)) + goto out_sem; + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) - goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + goto out_header; + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } if (!nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } list = &cp->cp_snapshot_list; next = le64_to_cpu(list->ssl_next); prev = le64_to_cpu(list->ssl_prev); - kunmap_atomic(kaddr); + kunmap_local(cp); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) - goto out_cp; if (next != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &next_bh); if (ret < 0) - goto out_header; + goto out_cp; + + next_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, next, next_bh); } else { next_bh = header_bh; get_bh(next_bh); + next_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, &prev_bh); if (ret < 0) goto out_next; + + prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, prev, prev_bh); } else { prev_bh = header_bh; get_bh(prev_bh); + prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } - kaddr = kmap_atomic(next_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, next, next_bh, kaddr); + /* Update the list entry for the next snapshot */ + list = kmap_local_folio(next_bh->b_folio, next_list_offset); list->ssl_prev = cpu_to_le64(prev); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(prev_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, prev, prev_bh, kaddr); + /* Update the list entry for the previous snapshot */ + list = kmap_local_folio(prev_bh->b_folio, prev_list_offset); list->ssl_next = cpu_to_le64(next); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + /* Update the snapshot being changed back to a plain checkpoint */ + cp = kmap_local_folio(cp_bh->b_folio, offset); cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); nilfs_checkpoint_clear_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(cp); - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* Update the statistics in the header */ + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_nsnapshots, -1); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(next_bh); mark_buffer_dirty(prev_bh); @@ -825,39 +992,33 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) out_next: brelse(next_bh); - out_header: - brelse(header_bh); - out_cp: brelse(cp_bh); + out_header: + brelse(header_bh); + out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; } /** - * nilfs_cpfile_is_snapshot - + * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * - * Description: - * - * Return Value: On success, 1 is returned if the checkpoint specified by - * @cno is a snapshot, or 0 if not. On error, one of the following negative - * error codes is returned. + * @cno: checkpoint number * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No such checkpoint. + * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or + * one of the following negative error codes on failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) { struct buffer_head *bh; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; /* @@ -871,13 +1032,14 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh); + cp = kmap_local_folio(bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) ret = -ENOENT; else ret = nilfs_checkpoint_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(cp); brelse(bh); out: @@ -894,14 +1056,11 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) * Description: nilfs_change_cpmode() changes the mode of the checkpoint * specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No such checkpoint. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) { @@ -933,20 +1092,17 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) * @cpstat: pointer to a structure of checkpoint statistics * * Description: nilfs_cpfile_get_stat() returns information about checkpoints. + * The checkpoint statistics are stored in the location pointed to by @cpstat. * - * Return Value: On success, 0 is returned, and checkpoints information is - * stored in the place pointed by @cpstat. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) { struct buffer_head *bh; struct nilfs_cpfile_header *header; - void *kaddr; int ret; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -954,12 +1110,11 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + header = kmap_local_folio(bh->b_folio, 0); cpstat->cs_cno = nilfs_mdt_cno(cpfile); cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(bh); out_sem: @@ -973,6 +1128,8 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) * @cpsize: size of a checkpoint entry * @raw_inode: on-disk cpfile inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, struct nilfs_inode *raw_inode, struct inode **inodep) @@ -991,7 +1148,7 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); if (unlikely(!cpfile)) return -ENOMEM; - if (!(cpfile->i_state & I_NEW)) + if (!(inode_state_read_once(cpfile) & I_NEW)) goto out; err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0); diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index edabb2dc5756..f5b1d59289eb 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -16,10 +16,12 @@ #include <linux/nilfs2_ondisk.h> /* nilfs_inode, nilfs_checkpoint */ -int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, - struct nilfs_checkpoint **, - struct buffer_head **); -void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *); +int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, struct inode *ifile); +int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno); +int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, __u64 blkinc, + time64_t ctime, bool minor); int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64); int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9930fa901039..674380837ab9 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { - return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, - create, &req->pr_entry_bh); + int ret; + + ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); + if (unlikely(ret == -ENOENT)) { + nilfs_err(dat->i_sb, + "DAT doesn't have a block to manage vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + /* + * Return internal code -EINVAL to notify bmap layer of + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } static void nilfs_dat_commit_entry(struct inode *dat, @@ -62,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) { int ret; - ret = nilfs_palloc_prepare_alloc_entry(dat, req); + ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); if (ret < 0) return ret; @@ -76,15 +89,15 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MAX); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr); + kunmap_local(entry); nilfs_palloc_commit_alloc_entry(dat, req); nilfs_dat_commit_entry(dat, req); @@ -100,15 +113,15 @@ static void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MIN); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr); + kunmap_local(entry); nilfs_dat_commit_entry(dat, req); @@ -123,25 +136,21 @@ static void nilfs_dat_commit_free(struct inode *dat, int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { - int ret; - - ret = nilfs_dat_prepare_entry(dat, req, 0); - WARN_ON(ret == -ENOENT); - return ret; + return nilfs_dat_prepare_entry(dat, req, 0); } void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, sector_t blocknr) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); nilfs_dat_commit_entry(dat, req); } @@ -149,21 +158,21 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; + __u64 start; sector_t blocknr; - void *kaddr; + size_t offset; int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - WARN_ON(ret == -ENOENT); + if (ret < 0) return ret; - } - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); + start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); if (blocknr == 0) { ret = nilfs_palloc_prepare_free_entry(dat, req); @@ -172,6 +181,15 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) return ret; } } + if (unlikely(start > nilfs_mdt_cno(dat))) { + nilfs_err(dat->i_sb, + "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)", + (unsigned long long)req->pr_entry_nr, + (unsigned long long)start, + (unsigned long long)nilfs_mdt_cno(dat)); + nilfs_dat_abort_entry(dat, req); + return -EINVAL; + } return 0; } @@ -182,11 +200,11 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, struct nilfs_dat_entry *entry; __u64 start, end; sector_t blocknr; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); end = start = le64_to_cpu(entry->de_start); if (!dead) { end = nilfs_mdt_cno(dat); @@ -194,7 +212,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); if (blocknr == 0) nilfs_dat_commit_free(dat, req); @@ -207,14 +225,14 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; __u64 start; sector_t blocknr; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); if (start == nilfs_mdt_cno(dat) && blocknr == 0) nilfs_palloc_abort_free_entry(dat, req); @@ -253,18 +271,16 @@ void nilfs_dat_abort_update(struct inode *dat, } /** - * nilfs_dat_mark_dirty - - * @dat: DAT file inode + * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified + * virtual block address entry as dirty + * @dat: DAT file inode * @vblocknr: virtual block number * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid DAT entry (internal code). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) { @@ -287,14 +303,11 @@ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) * Description: nilfs_dat_freev() frees the virtual block numbers specified by * @vblocknrs and @nitems. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The virtual block number have not been allocated. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The virtual block number have not been allocated. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) { @@ -310,18 +323,16 @@ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) * Description: nilfs_dat_move() changes the block number associated with * @vblocknr to @blocknr. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) { struct buffer_head *entry_bh; struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); @@ -344,21 +355,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) } } - kaddr = kmap_atomic(entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); + entry = kmap_local_folio(entry_bh->b_folio, offset); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { nilfs_crit(dat->i_sb, "%s: invalid vblocknr = %llu, [%llu, %llu)", __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - kunmap_atomic(kaddr); + kunmap_local(entry); brelse(entry_bh); return -EINVAL; } WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); @@ -375,24 +386,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) * @blocknrp: pointer to a block number * * Description: nilfs_dat_translate() maps the virtual block number @vblocknr - * to the corresponding block number. - * - * Return Value: On success, 0 is returned and the block number associated - * with @vblocknr is stored in the place pointed by @blocknrp. On error, one - * of the following negative error codes is returned. - * - * %-EIO - I/O error. + * to the corresponding block number. The block number associated with + * @vblocknr is stored in the place pointed to by @blocknrp. * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A block number associated with @vblocknr does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A block number associated with @vblocknr does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) { struct buffer_head *entry_bh, *bh; struct nilfs_dat_entry *entry; sector_t blocknr; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); @@ -408,8 +416,8 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) } } - kaddr = kmap_atomic(entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); + entry = kmap_local_folio(entry_bh->b_folio, offset); blocknr = le64_to_cpu(entry->de_blocknr); if (blocknr == 0) { ret = -ENOENT; @@ -418,7 +426,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) *blocknrp = blocknr; out: - kunmap_atomic(kaddr); + kunmap_local(entry); brelse(entry_bh); return ret; } @@ -427,11 +435,12 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, size_t nvi) { struct buffer_head *entry_bh; - struct nilfs_dat_entry *entry; + struct nilfs_dat_entry *entry, *first_entry; struct nilfs_vinfo *vinfo = buf; __u64 first, last; - void *kaddr; + size_t offset; unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; + unsigned int entry_size = NILFS_MDT(dat)->mi_entry_size; int i, j, n, ret; for (i = 0; i < nvi; i += n) { @@ -439,23 +448,28 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page); - /* last virtual block number in this block */ + first = vinfo->vi_vblocknr; - do_div(first, entries_per_block); + first = div64_ul(first, entries_per_block); first *= entries_per_block; + /* first virtual block number in this block */ + last = first + entries_per_block - 1; + /* last virtual block number in this block */ + + offset = nilfs_palloc_entry_offset(dat, first, entry_bh); + first_entry = kmap_local_folio(entry_bh->b_folio, offset); for (j = i, n = 0; j < nvi && vinfo->vi_vblocknr >= first && vinfo->vi_vblocknr <= last; j++, n++, vinfo = (void *)vinfo + visz) { - entry = nilfs_palloc_block_get_entry( - dat, vinfo->vi_vblocknr, entry_bh, kaddr); + entry = (void *)first_entry + + (vinfo->vi_vblocknr - first) * entry_size; vinfo->vi_start = le64_to_cpu(entry->de_start); vinfo->vi_end = le64_to_cpu(entry->de_end); vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } - kunmap_atomic(kaddr); + kunmap_local(first_entry); brelse(entry_bh); } @@ -468,6 +482,8 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, * @entry_size: size of a dat entry * @raw_inode: on-disk dat inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_dat_read(struct super_block *sb, size_t entry_size, struct nilfs_inode *raw_inode, struct inode **inodep) @@ -490,7 +506,7 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); if (unlikely(!dat)) return -ENOMEM; - if (!(dat->i_state & I_NEW)) + if (!(inode_state_read_once(dat) & I_NEW)) goto out; err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index decd6471300b..6ca3d74be1e1 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -64,19 +64,13 @@ static inline unsigned int nilfs_chunk_size(struct inode *inode) return inode->i_sb->s_blocksize; } -static inline void nilfs_put_page(struct page *page) -{ - kunmap(page); - put_page(page); -} - /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. */ static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr) { - unsigned int last_byte = inode->i_size; + u64 last_byte = inode->i_size; last_byte -= page_nr << PAGE_SHIFT; if (last_byte > PAGE_SIZE) @@ -84,48 +78,46 @@ static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static int nilfs_prepare_chunk(struct page *page, unsigned int from, +static int nilfs_prepare_chunk(struct folio *folio, unsigned int from, unsigned int to) { - loff_t pos = page_offset(page) + from; + loff_t pos = folio_pos(folio) + from; - return __block_write_begin(page, pos, to - from, nilfs_get_block); + return __block_write_begin(folio, pos, to - from, nilfs_get_block); } -static void nilfs_commit_chunk(struct page *page, - struct address_space *mapping, - unsigned int from, unsigned int to) +static void nilfs_commit_chunk(struct folio *folio, + struct address_space *mapping, size_t from, size_t to) { struct inode *dir = mapping->host; - loff_t pos = page_offset(page) + from; - unsigned int len = to - from; - unsigned int nr_dirty, copied; + loff_t pos = folio_pos(folio) + from; + size_t copied, len = to - from; + unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(page, from, to); - copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); + nr_dirty = nilfs_page_count_clean_buffers(folio, from, to); + copied = block_write_end(pos, len, len, folio); if (pos + copied > dir->i_size) i_size_write(dir, pos + copied); if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); err = nilfs_set_file_dirty(dir, nr_dirty); WARN_ON(err); /* do not happen */ - unlock_page(page); + folio_unlock(folio); } -static bool nilfs_check_page(struct page *page) +static bool nilfs_check_folio(struct folio *folio, char *kaddr) { - struct inode *dir = page->mapping->host; + struct inode *dir = folio->mapping->host; struct super_block *sb = dir->i_sb; unsigned int chunk_size = nilfs_chunk_size(dir); - char *kaddr = page_address(page); - unsigned int offs, rec_len; - unsigned int limit = PAGE_SIZE; + size_t offs, rec_len; + size_t limit = folio_size(folio); struct nilfs_dir_entry *p; char *error; - if ((dir->i_size >> PAGE_SHIFT) == page->index) { - limit = dir->i_size & ~PAGE_MASK; + if (dir->i_size < folio_pos(folio) + limit) { + limit = dir->i_size - folio_pos(folio); if (limit & (chunk_size - 1)) goto Ebadsize; if (!limit) @@ -143,11 +135,14 @@ static bool nilfs_check_page(struct page *page) goto Enamelen; if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) goto Espan; + if (unlikely(p->inode && + NILFS_PRIVATE_INODE(le64_to_cpu(p->inode)))) + goto Einumber; } if (offs != limit) goto Eend; out: - SetPageChecked(page); + folio_set_checked(folio); return true; /* Too bad, we had an error */ @@ -168,10 +163,13 @@ Enamelen: goto bad_entry; Espan: error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "disallowed inode number"; bad_entry: nilfs_error(sb, - "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error, (page->index << PAGE_SHIFT) + offs, + "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d", + dir->i_ino, error, (folio->index << PAGE_SHIFT) + offs, (unsigned long)le64_to_cpu(p->inode), rec_len, p->name_len); goto fail; @@ -179,29 +177,33 @@ Eend: p = (struct nilfs_dir_entry *)(kaddr + offs); nilfs_error(sb, "entry in directory #%lu spans the page boundary offset=%lu, inode=%lu", - dir->i_ino, (page->index << PAGE_SHIFT) + offs, + dir->i_ino, (folio->index << PAGE_SHIFT) + offs, (unsigned long)le64_to_cpu(p->inode)); fail: - SetPageError(page); return false; } -static struct page *nilfs_get_page(struct inode *dir, unsigned long n) +static void *nilfs_get_folio(struct inode *dir, unsigned long n, + struct folio **foliop) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_mapping_page(mapping, n, NULL); + struct folio *folio = read_mapping_folio(mapping, n, NULL); + void *kaddr; - if (!IS_ERR(page)) { - kmap(page); - if (unlikely(!PageChecked(page))) { - if (!nilfs_check_page(page)) - goto fail; - } + if (IS_ERR(folio)) + return folio; + + kaddr = kmap_local_folio(folio, 0); + if (unlikely(!folio_test_checked(folio))) { + if (!nilfs_check_folio(folio, kaddr)) + goto fail; } - return page; + + *foliop = folio; + return kaddr; fail: - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return ERR_PTR(-EIO); } @@ -229,37 +231,6 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) nilfs_rec_len_from_disk(p->rec_len)); } -static unsigned char -nilfs_filetype_table[NILFS_FT_MAX] = { - [NILFS_FT_UNKNOWN] = DT_UNKNOWN, - [NILFS_FT_REG_FILE] = DT_REG, - [NILFS_FT_DIR] = DT_DIR, - [NILFS_FT_CHRDEV] = DT_CHR, - [NILFS_FT_BLKDEV] = DT_BLK, - [NILFS_FT_FIFO] = DT_FIFO, - [NILFS_FT_SOCK] = DT_SOCK, - [NILFS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, -}; - -static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) -{ - umode_t mode = inode->i_mode; - - de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - static int nilfs_readdir(struct file *file, struct dir_context *ctx) { loff_t pos = ctx->pos; @@ -275,99 +246,93 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; struct nilfs_dir_entry *de; - struct page *page = nilfs_get_page(inode, n); + struct folio *folio; - if (IS_ERR(page)) { + kaddr = nilfs_get_folio(inode, n, &folio); + if (IS_ERR(kaddr)) { nilfs_error(sb, "bad page in #%lu", inode->i_ino); ctx->pos += PAGE_SIZE - offset; return -EIO; } - kaddr = page_address(page); de = (struct nilfs_dir_entry *)(kaddr + offset); limit = kaddr + nilfs_last_byte(inode, n) - NILFS_DIR_REC_LEN(1); for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) { if (de->rec_len == 0) { nilfs_error(sb, "zero-length directory entry"); - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return -EIO; } if (de->inode) { unsigned char t; - if (de->file_type < NILFS_FT_MAX) - t = nilfs_filetype_table[de->file_type]; - else - t = DT_UNKNOWN; + t = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), t)) { - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return 0; } } ctx->pos += nilfs_rec_len_from_disk(de->rec_len); } - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); } return 0; } /* - * nilfs_find_entry() + * nilfs_find_entry() * - * finds an entry in the specified directory with the wanted name. It - * returns the page in which the entry was found, and the entry itself - * (as a parameter - res_dir). Page is returned mapped and unlocked. - * Entry is guaranteed to be valid. + * Finds an entry in the specified directory with the wanted name. It + * returns the folio in which the entry was found, and the entry itself. + * The folio is mapped and unlocked. When the caller is finished with + * the entry, it should call folio_release_kmap(). + * + * On failure, returns an error pointer and the caller should ignore foliop. */ -struct nilfs_dir_entry * -nilfs_find_entry(struct inode *dir, const struct qstr *qstr, - struct page **res_page) +struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, + const struct qstr *qstr, struct folio **foliop) { const unsigned char *name = qstr->name; int namelen = qstr->len; unsigned int reclen = NILFS_DIR_REC_LEN(namelen); unsigned long start, n; unsigned long npages = dir_pages(dir); - struct page *page = NULL; struct nilfs_inode_info *ei = NILFS_I(dir); struct nilfs_dir_entry *de; if (npages == 0) goto out; - /* OFFSET_CACHE */ - *res_page = NULL; - start = ei->i_dir_start_lookup; if (start >= npages) start = 0; n = start; do { - char *kaddr; - - page = nilfs_get_page(dir, n); - if (!IS_ERR(page)) { - kaddr = page_address(page); - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, - "zero-length directory entry"); - nilfs_put_page(page); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + char *kaddr = nilfs_get_folio(dir, n, foliop); + + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - nilfs_put_page(page); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; - /* next page is past the blocks we've got */ + /* next folio is past the blocks we've got */ if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) { nilfs_error(dir->i_sb, "dir %lu size %lld exceeds block count %llu", @@ -377,59 +342,83 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: - *res_page = page; ei->i_dir_start_lookup = n; return de; } -struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) +struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) { - struct page *page = nilfs_get_page(dir, 0); - struct nilfs_dir_entry *de = NULL; + struct folio *folio; + struct nilfs_dir_entry *de, *next_de; + size_t limit; + char *msg; + + de = nilfs_get_folio(dir, 0, &folio); + if (IS_ERR(de)) + return NULL; + + limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */ + if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino || + !nilfs_match(1, ".", de))) { + msg = "missing '.'"; + goto fail; + } - if (!IS_ERR(page)) { - de = nilfs_next_entry( - (struct nilfs_dir_entry *)page_address(page)); - *p = page; + next_de = nilfs_next_entry(de); + /* + * If "next_de" has not reached the end of the chunk, there is + * at least one more record. Check whether it matches "..". + */ + if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) || + !nilfs_match(2, "..", next_de))) { + msg = "missing '..'"; + goto fail; } - return de; + *foliop = folio; + return next_de; + +fail: + nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); + folio_release_kmap(folio, de); + return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; - struct page *page; + struct folio *folio; - de = nilfs_find_entry(dir, qstr, &page); - if (de) { - res = le64_to_cpu(de->inode); - kunmap(page); - put_page(page); - } - return res; + de = nilfs_find_entry(dir, qstr, &folio); + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } -/* Releases the page */ -void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, - struct page *page, struct inode *inode) +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct folio *folio, struct inode *inode) { - unsigned int from = (char *)de - (char *)page_address(page); - unsigned int to = from + nilfs_rec_len_from_disk(de->rec_len); - struct address_space *mapping = page->mapping; + size_t from = offset_in_folio(folio, de); + size_t to = from + nilfs_rec_len_from_disk(de->rec_len); + struct address_space *mapping = folio->mapping; int err; - lock_page(page); - err = nilfs_prepare_chunk(page, from, to); - BUG_ON(err); + folio_lock(folio); + err = nilfs_prepare_chunk(folio, from, to); + if (unlikely(err)) { + folio_unlock(folio); + return err; + } de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); - nilfs_commit_chunk(page, mapping, from, to); - nilfs_put_page(page); - dir->i_mtime = dir->i_ctime = current_time(dir); + de->file_type = fs_umode_to_ftype(inode->i_mode); + nilfs_commit_chunk(folio, mapping, from, to); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); + return 0; } /* @@ -443,31 +432,28 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) unsigned int chunk_size = nilfs_chunk_size(dir); unsigned int reclen = NILFS_DIR_REC_LEN(namelen); unsigned short rec_len, name_len; - struct page *page = NULL; + struct folio *folio = NULL; struct nilfs_dir_entry *de; unsigned long npages = dir_pages(dir); unsigned long n; - char *kaddr; - unsigned int from, to; + size_t from, to; int err; /* * We take care of directory expansion in the same loop. - * This code plays outside i_size, so it locks the page + * This code plays outside i_size, so it locks the folio * to protect that region. */ for (n = 0; n <= npages; n++) { + char *kaddr = nilfs_get_folio(dir, n, &folio); char *dir_end; - page = nilfs_get_page(dir, n); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto out; - lock_page(page); - kaddr = page_address(page); + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); + folio_lock(folio); dir_end = kaddr + nilfs_last_byte(dir, n); de = (struct nilfs_dir_entry *)kaddr; - kaddr += PAGE_SIZE - reclen; + kaddr += folio_size(folio) - reclen; while ((char *)de <= kaddr) { if ((char *)de == dir_end) { /* We hit i_size */ @@ -494,16 +480,16 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) goto got_it; de = (struct nilfs_dir_entry *)((char *)de + rec_len); } - unlock_page(page); - nilfs_put_page(page); + folio_unlock(folio); + folio_release_kmap(folio, kaddr); } BUG(); return -EINVAL; got_it: - from = (char *)de - (char *)page_address(page); + from = offset_in_folio(folio, de); to = from + rec_len; - err = nilfs_prepare_chunk(page, from, to); + err = nilfs_prepare_chunk(folio, from, to); if (err) goto out_unlock; if (de->inode) { @@ -517,30 +503,29 @@ got_it: de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); - nilfs_commit_chunk(page, page->mapping, from, to); - dir->i_mtime = dir->i_ctime = current_time(dir); + de->file_type = fs_umode_to_ftype(inode->i_mode); + nilfs_commit_chunk(folio, folio->mapping, from, to); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: - nilfs_put_page(page); -out: + folio_release_kmap(folio, de); return err; out_unlock: - unlock_page(page); + folio_unlock(folio); goto out_put; } /* * nilfs_delete_entry deletes a directory entry by merging it with the - * previous entry. Page is up-to-date. Releases the page. + * previous entry. Folio is up-to-date. */ -int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) +int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; - char *kaddr = page_address(page); - unsigned int from, to; + char *kaddr = (char *)((unsigned long)dir & ~(folio_size(folio) - 1)); + size_t from, to; struct nilfs_dir_entry *de, *pde = NULL; int err; @@ -559,17 +544,19 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) de = nilfs_next_entry(de); } if (pde) - from = (char *)pde - (char *)page_address(page); - lock_page(page); - err = nilfs_prepare_chunk(page, from, to); - BUG_ON(err); + from = (char *)pde - kaddr; + folio_lock(folio); + err = nilfs_prepare_chunk(folio, from, to); + if (unlikely(err)) { + folio_unlock(folio); + goto out; + } if (pde) pde->rec_len = nilfs_rec_len_to_disk(to - from); dir->inode = 0; - nilfs_commit_chunk(page, mapping, from, to); - inode->i_ctime = inode->i_mtime = current_time(inode); + nilfs_commit_chunk(folio, mapping, from, to); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); out: - nilfs_put_page(page); return err; } @@ -579,39 +566,39 @@ out: int nilfs_make_empty(struct inode *inode, struct inode *parent) { struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); + struct folio *folio = filemap_grab_folio(mapping, 0); unsigned int chunk_size = nilfs_chunk_size(inode); struct nilfs_dir_entry *de; int err; void *kaddr; - if (!page) - return -ENOMEM; + if (IS_ERR(folio)) + return PTR_ERR(folio); - err = nilfs_prepare_chunk(page, 0, chunk_size); + err = nilfs_prepare_chunk(folio, 0, chunk_size); if (unlikely(err)) { - unlock_page(page); + folio_unlock(folio); goto fail; } - kaddr = kmap_atomic(page); + kaddr = kmap_local_folio(folio, 0); memset(kaddr, 0, chunk_size); de = (struct nilfs_dir_entry *)kaddr; de->name_len = 1; de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); - nilfs_set_de_type(de, inode); - kunmap_atomic(kaddr); - nilfs_commit_chunk(page, mapping, 0, chunk_size); + de->file_type = fs_umode_to_ftype(inode->i_mode); + kunmap_local(kaddr); + nilfs_commit_chunk(folio, mapping, 0, chunk_size); fail: - put_page(page); + folio_put(folio); return err; } @@ -620,18 +607,17 @@ fail: */ int nilfs_empty_dir(struct inode *inode) { - struct page *page = NULL; + struct folio *folio = NULL; + char *kaddr; unsigned long i, npages = dir_pages(inode); for (i = 0; i < npages; i++) { - char *kaddr; struct nilfs_dir_entry *de; - page = nilfs_get_page(inode, i); - if (IS_ERR(page)) - continue; + kaddr = nilfs_get_folio(inode, i, &folio); + if (IS_ERR(kaddr)) + return 0; - kaddr = page_address(page); de = (struct nilfs_dir_entry *)kaddr; kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); @@ -657,12 +643,12 @@ int nilfs_empty_dir(struct inode *inode) } de = nilfs_next_entry(de); } - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); } return 1; not_empty: - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return 0; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index a35f2795b242..2d8dc6b35b54 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr = blocknr; } @@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt) @@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, } *ptrp = ptr; return cnt; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + return ret; } static __u64 @@ -268,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; @@ -314,6 +322,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct, binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = 0; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a265d391ffe9..1b8d754db44d 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -45,34 +45,36 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; + struct buffer_head *bh, *head; int ret = 0; if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ sb_start_pagefault(inode->i_sb); - lock_page(page); - if (page->mapping != inode->i_mapping || - page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { - unlock_page(page); + folio_lock(folio); + if (folio->mapping != inode->i_mapping || + folio_pos(folio) >= i_size_read(inode) || + !folio_test_uptodate(folio)) { + folio_unlock(folio); ret = -EFAULT; /* make the VM retry the fault */ goto out; } /* - * check to see if the page is mapped already (no holes) + * check to see if the folio is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto mapped; - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { int fully_mapped = 1; - bh = head = page_buffers(page); + bh = head; do { if (!buffer_mapped(bh)) { fully_mapped = 0; @@ -81,11 +83,11 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) } while (bh = bh->b_this_page, bh != head); if (fully_mapped) { - SetPageMappedToDisk(page); + folio_set_mappedtodisk(folio); goto mapped; } } - unlock_page(page); + folio_unlock(folio); /* * fill hole blocks @@ -105,10 +107,16 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - wait_for_stable_page(page); + /* + * Since checksumming including data blocks is performed to determine + * the validity of the log to be written and used for recovery, it is + * necessary to wait for writeback to finish here, regardless of the + * stable write requirement of the backing device. + */ + folio_wait_writeback(folio); out: sb_end_pagefault(inode->i_sb); - return block_page_mkwrite_return(ret); + return vmf_fs_error(ret); } static const struct vm_operations_struct nilfs_file_vm_ops = { @@ -117,10 +125,10 @@ static const struct vm_operations_struct nilfs_file_vm_ops = { .page_mkwrite = nilfs_page_mkwrite, }; -static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int nilfs_file_mmap_prepare(struct vm_area_desc *desc) { - file_accessed(file); - vma->vm_ops = &nilfs_file_vm_ops; + file_accessed(desc->file); + desc->vm_ops = &nilfs_file_vm_ops; return 0; } @@ -136,11 +144,11 @@ const struct file_operations nilfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ - .mmap = nilfs_file_mmap, + .mmap_prepare = nilfs_file_mmap_prepare, .open = generic_file_open, /* .release = nilfs_release_file, */ .fsync = nilfs_sync_file, - .splice_read = generic_file_splice_read, + .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, }; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index b0d22ff24b67..561c220799c7 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -46,14 +46,11 @@ * specified by @pbn to the GC pagecache with the key @blkoff. * This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer. * - * Return Value: On success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The block specified with @pbn does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The block specified with @pbn does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, sector_t pbn, __u64 vbn, @@ -73,10 +70,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, struct the_nilfs *nilfs = inode->i_sb->s_fs_info; err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); - if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ - brelse(bh); + if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */ goto failed; - } } lock_buffer(bh); @@ -85,10 +80,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, goto out; } - if (!buffer_mapped(bh)) { - bh->b_bdev = inode->i_sb->s_bdev; + if (!buffer_mapped(bh)) set_buffer_mapped(bh); - } bh->b_blocknr = pbn; bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -100,8 +93,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, *out_bh = bh; failed: - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + if (unlikely(err)) + brelse(bh); return err; } @@ -116,12 +111,11 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, * specified by @vbn to the GC pagecache. @pbn can be supplied by the * caller to avoid translation of the disk block address. * - * Return Value: On success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Invalid virtual block address. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) @@ -140,7 +134,7 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) { wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - struct inode *inode = bh->b_page->mapping->host; + struct inode *inode = bh->b_folio->mapping->host; nilfs_err(inode->i_sb, "I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)", @@ -165,7 +159,7 @@ int nilfs_init_gcinode(struct inode *inode) inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); - inode->i_mapping->a_ops = &empty_aops; + inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); @@ -175,6 +169,7 @@ int nilfs_init_gcinode(struct inode *inode) /** * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes + * @nilfs: NILFS filesystem instance */ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) { diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index a8a4bc8490b4..99eb8a59009e 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -15,6 +15,7 @@ #include "mdt.h" #include "alloc.h" #include "ifile.h" +#include "cpfile.h" /** * struct nilfs_ifile_info - on-memory private data of ifile @@ -37,17 +38,16 @@ static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile) * @out_ino: pointer to a variable to store inode number * @out_bh: buffer_head contains newly allocated disk inode * - * Return Value: On success, 0 is returned and the newly allocated inode - * number is stored in the place pointed by @ino, and buffer_head pointer - * that contains newly allocated disk inode structure is stored in the - * place pointed by @out_bh - * On error, one of the following negative error codes is returned. + * nilfs_ifile_create_inode() allocates a new inode in the ifile metadata + * file and stores the inode number in the variable pointed to by @out_ino, + * as well as storing the ifile's buffer with the disk inode in the location + * pointed to by @out_bh. * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOSPC - No inode left. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No inode left. */ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct buffer_head **out_bh) @@ -55,13 +55,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct nilfs_palloc_req req; int ret; - req.pr_entry_nr = 0; /* - * 0 says find free inode from beginning - * of a group. dull code!! - */ + req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb); req.pr_entry_bh = NULL; - ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); + ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false); if (!ret) { ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, &req.pr_entry_bh); @@ -85,14 +82,11 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, * @ifile: ifile inode * @ino: inode number * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The inode number @ino have not been allocated. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Inode number unallocated. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) { @@ -100,7 +94,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) .pr_entry_nr = ino, .pr_entry_bh = NULL }; struct nilfs_inode *raw_inode; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_prepare_free_entry(ifile, &req); @@ -115,11 +109,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) return ret; } - kaddr = kmap_atomic(req.pr_entry_bh->b_page); - raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, - req.pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(ifile, req.pr_entry_nr, + req.pr_entry_bh); + raw_inode = kmap_local_folio(req.pr_entry_bh->b_folio, offset); raw_inode->i_flags = 0; - kunmap_atomic(kaddr); + kunmap_local(raw_inode); mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); @@ -152,6 +146,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, * @ifile: ifile inode * @nmaxinodes: current maximum of available inodes count [out] * @nfreeinodes: free inodes count [out] + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_ifile_count_free_inodes(struct inode *ifile, u64 *nmaxinodes, u64 *nfreeinodes) @@ -173,21 +169,26 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile, * nilfs_ifile_read - read or get ifile inode * @sb: super block instance * @root: root object + * @cno: number of checkpoint entry to read * @inode_size: size of an inode - * @raw_inode: on-disk ifile inode - * @inodep: buffer to store the inode + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid checkpoint. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). */ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, - size_t inode_size, struct nilfs_inode *raw_inode, - struct inode **inodep) + __u64 cno, size_t inode_size) { + struct the_nilfs *nilfs; struct inode *ifile; int err; ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO); if (unlikely(!ifile)) return -ENOMEM; - if (!(ifile->i_state & I_NEW)) + if (!(inode_state_read_once(ifile) & I_NEW)) goto out; err = nilfs_mdt_init(ifile, NILFS_MDT_GFP, @@ -201,13 +202,13 @@ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); - err = nilfs_read_inode_common(ifile, raw_inode); + nilfs = sb->s_fs_info; + err = nilfs_cpfile_read_checkpoint(nilfs->ns_cpfile, cno, root, ifile); if (err) goto failed; unlock_new_inode(ifile); out: - *inodep = ifile; return 0; failed: iget_failed(ifile); diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 35c5273f4821..5d116a566d9e 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -21,15 +21,14 @@ static inline struct nilfs_inode * nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) { - void *kaddr = kmap(ibh->b_page); + size_t __offset_in_folio = nilfs_palloc_entry_offset(ifile, ino, ibh); - return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr); + return kmap_local_folio(ibh->b_folio, __offset_in_folio); } -static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino, - struct buffer_head *ibh) +static inline void nilfs_ifile_unmap_inode(struct nilfs_inode *raw_inode) { - kunmap(ibh->b_page); + kunmap_local(raw_inode); } int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); @@ -39,7 +38,6 @@ int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, - size_t inode_size, struct nilfs_inode *raw_inode, - struct inode **inodep); + __u64 cno, size_t inode_size); #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 232dd7b6cca1..51bde45d5865 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -15,6 +15,7 @@ #include <linux/writeback.h> #include <linux/uio.h> #include <linux/fiemap.h> +#include <linux/random.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -28,17 +29,13 @@ * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag - * @for_btnc: inode for B-tree node cache flag - * @for_shadow: inode for shadowed page cache flag + * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - bool for_gc; - bool for_btnc; - bool for_shadow; + unsigned int type; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -71,6 +68,8 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) * * This function does not issue actual read request of the specified data * block. It is done by VFS. + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) @@ -112,7 +111,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", __func__, inode->i_ino, (unsigned long long)blkoff); - err = 0; + err = -EAGAIN; } nilfs_transaction_abort(inode->i_sb); goto out; @@ -144,6 +143,8 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, * address_space_operations. * @file: file struct of the file to be read * @folio: the folio to be read + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_read_folio(struct file *file, struct folio *folio) { @@ -162,7 +163,7 @@ static int nilfs_writepages(struct address_space *mapping, int err = 0; if (sb_rdonly(inode->i_sb)) { - nilfs_clear_dirty_pages(mapping, false); + nilfs_clear_dirty_pages(mapping); return -EROFS; } @@ -173,36 +174,6 @@ static int nilfs_writepages(struct address_space *mapping, return err; } -static int nilfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - int err; - - if (sb_rdonly(inode->i_sb)) { - /* - * It means that filesystem was remounted in read-only - * mode because of error or metadata corruption. But we - * have dirty pages that try to be flushed in background. - * So, here we simply discard this dirty page. - */ - nilfs_clear_dirty_page(page, false); - unlock_page(page); - return -EROFS; - } - - redirty_page_for_writepage(wbc, page); - unlock_page(page); - - if (wbc->sync_mode == WB_SYNC_ALL) { - err = nilfs_construct_segment(inode->i_sb); - if (unlikely(err)) - return err; - } else if (wbc->for_reclaim) - nilfs_flush_segment(inode->i_sb, inode->i_ino); - - return 0; -} - static bool nilfs_dirty_folio(struct address_space *mapping, struct folio *folio) { @@ -214,7 +185,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping, /* * The page may not be locked, eg if called from try_to_unmap_one() */ - spin_lock(&mapping->private_lock); + spin_lock(&mapping->i_private_lock); head = folio_buffers(folio); if (head) { struct buffer_head *bh = head; @@ -230,7 +201,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping, } else if (ret) { nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); } - spin_unlock(&mapping->private_lock); + spin_unlock(&mapping->i_private_lock); if (nr_dirty) nilfs_set_file_dirty(inode, nr_dirty); @@ -247,9 +218,10 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to) } } -static int nilfs_write_begin(struct file *file, struct address_space *mapping, +static int nilfs_write_begin(const struct kiocb *iocb, + struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; @@ -258,7 +230,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - err = block_write_begin(mapping, pos, len, pagep, nilfs_get_block); + err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); if (unlikely(err)) { nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); @@ -266,18 +238,19 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, return err; } -static int nilfs_write_end(struct file *file, struct address_space *mapping, +static int nilfs_write_end(const struct kiocb *iocb, + struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; unsigned int start = pos & (PAGE_SIZE - 1); unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(page, start, + nr_dirty = nilfs_page_count_clean_buffers(folio, start, start + copied); - copied = generic_write_end(file, mapping, pos, len, copied, page, + copied = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); @@ -297,7 +270,6 @@ nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } const struct address_space_operations nilfs_aops = { - .writepage = nilfs_writepage, .read_folio = nilfs_read_folio, .writepages = nilfs_writepages, .dirty_folio = nilfs_dirty_folio, @@ -306,16 +278,20 @@ const struct address_space_operations nilfs_aops = { .write_end = nilfs_write_end, .invalidate_folio = block_invalidate_folio, .direct_IO = nilfs_direct_IO, + .migrate_folio = buffer_migrate_folio_norefs, .is_partially_uptodate = block_is_partially_uptodate, }; +const struct address_space_operations nilfs_buffer_cache_aops = { + .invalidate_folio = block_invalidate_folio, +}; + static int nilfs_insert_inode_locked(struct inode *inode, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -324,7 +300,6 @@ static int nilfs_insert_inode_locked(struct inode *inode, struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; @@ -342,31 +317,19 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = BIT(NILFS_I_NEW); + ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - - if (unlikely(ino < NILFS_USER_INO)) { - nilfs_warn(sb, - "inode bitmap is inconsistent for reserved inodes"); - do { - brelse(bh); - err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); - if (unlikely(err)) - goto failed_ifile_create_inode; - } while (ino < NILFS_USER_INO); - - nilfs_info(sb, "repaired inode bitmap for reserved inodes"); - } ii->i_bh = bh; atomic64_inc(&root->inodes_count); - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + simple_inode_init_ts(inode); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); @@ -384,9 +347,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&nilfs->ns_next_gen_lock); - inode->i_generation = nilfs->ns_next_generation++; - spin_unlock(&nilfs->ns_next_gen_lock); + inode->i_generation = get_random_u32(); if (nilfs_insert_inode_locked(inode, root, ino) < 0) { err = -EIO; goto failed_after_creation; @@ -404,7 +365,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) failed_after_creation: clear_nlink(inode); - if (inode->i_state & I_NEW) + if (inode_state_read_once(inode) & I_NEW) unlock_new_inode(inode); iput(inode); /* * raw_inode will be deleted through @@ -449,12 +410,12 @@ int nilfs_read_inode_common(struct inode *inode, i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime), + le32_to_cpu(raw_inode->i_mtime_nsec)); + inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), + le32_to_cpu(raw_inode->i_ctime_nsec)); + inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime), + le32_to_cpu(raw_inode->i_mtime_nsec)); if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) return -EIO; /* this inode is for metadata and corrupted */ if (inode->i_nlink == 0) @@ -513,13 +474,20 @@ static int __nilfs_read_inode(struct super_block *sb, inode->i_op = &nilfs_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &nilfs_aops; - } else { + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { inode->i_op = &nilfs_special_inode_operations; init_special_inode( inode, inode->i_mode, huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); + } else { + nilfs_error(sb, + "invalid file type bits in mode 0%o for inode %lu", + inode->i_mode, ino); + err = -EIO; + goto failed_unmap; } - nilfs_ifile_unmap_inode(root->ifile, ino, bh); + nilfs_ifile_unmap_inode(raw_inode); brelse(bh); up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); @@ -528,7 +496,7 @@ static int __nilfs_read_inode(struct super_block *sb, return 0; failed_unmap: - nilfs_ifile_unmap_inode(root->ifile, ino, bh); + nilfs_ifile_unmap_inode(raw_inode); brelse(bh); bad_inode: @@ -545,23 +513,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); - if (test_bit(NILFS_I_BTNC, &ii->i_state)) { - if (!args->for_btnc) - return 0; - } else if (args->for_btnc) { + if (ii->i_type != args->type) return 0; - } - if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { - if (!args->for_shadow) - return 0; - } else if (args->for_shadow) { - return 0; - } - - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; - return args->for_gc && args->cno == ii->i_cno; + return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) @@ -571,15 +526,9 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) inode->i_ino = args->ino; NILFS_I(inode)->i_cno = args->cno; NILFS_I(inode)->i_root = args->root; + NILFS_I(inode)->i_type = args->type; if (args->root && args->ino == NILFS_ROOT_INO) nilfs_get_root(args->root); - - if (args->for_gc) - NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - if (args->for_btnc) - NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); - if (args->for_shadow) - NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -587,8 +536,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -598,8 +546,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -614,8 +561,14 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, inode = nilfs_iget_locked(sb, root, ino); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + + if (!(inode_state_read_once(inode) & I_NEW)) { + if (!inode->i_nlink) { + iput(inode); + return ERR_PTR(-ESTALE); + } return inode; + } err = __nilfs_read_inode(sb, root, ino, inode); if (unlikely(err)) { @@ -630,8 +583,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; @@ -639,7 +591,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + if (!(inode_state_read_once(inode) & I_NEW)) return inode; err = nilfs_init_gcinode(inode); @@ -659,10 +611,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, * or does nothing if the inode already has it. This function allocates * an additional inode to maintain page cache of B-tree nodes one-on-one. * - * Return Value: On success, 0 is returned. On errors, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or %-ENOMEM if memory is insufficient. */ int nilfs_attach_btree_node_cache(struct inode *inode) { @@ -676,15 +625,13 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.ino = inode->i_ino; args.root = ii->i_root; args.cno = ii->i_cno; - args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; - args.for_btnc = true; - args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + args.type = ii->i_type | NILFS_I_TYPE_BTNC; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); if (unlikely(!btnc_inode)) return -ENOMEM; - if (btnc_inode->i_state & I_NEW) { + if (inode_state_read_once(btnc_inode) & I_NEW) { nilfs_init_btnc_inode(btnc_inode); unlock_new_inode(btnc_inode); } @@ -723,17 +670,14 @@ void nilfs_detach_btree_node_cache(struct inode *inode) * in one inode and the one for b-tree node pages is set up in the * other inode, which is attached to the former inode. * - * Return Value: On success, a pointer to the inode for data pages is - * returned. On errors, one of the following negative error code is returned - * in a pointer type. - * - * %-ENOMEM - Insufficient memory available. + * Return: a pointer to the inode for data pages on success, or %-ENOMEM + * if memory is insufficient. */ struct inode *nilfs_iget_for_shadow(struct inode *inode) { struct nilfs_iget_args args = { - .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = true + .ino = inode->i_ino, .root = NULL, .cno = 0, + .type = NILFS_I_TYPE_SHADOW }; struct inode *s_inode; int err; @@ -742,12 +686,13 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode) nilfs_iget_set, &args); if (unlikely(!s_inode)) return ERR_PTR(-ENOMEM); - if (!(s_inode->i_state & I_NEW)) + if (!(inode_state_read_once(s_inode) & I_NEW)) return inode; NILFS_I(s_inode)->i_flags = 0; memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); + s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; err = nilfs_attach_btree_node_cache(s_inode); if (unlikely(err)) { @@ -758,8 +703,18 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode) return s_inode; } +/** + * nilfs_write_inode_common - export common inode information to on-disk inode + * @inode: inode object + * @raw_inode: on-disk inode + * + * This function writes standard information from the on-memory inode @inode + * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap + * data is not exported, nilfs_bmap_write() must be called separately during + * log writing. + */ void nilfs_write_inode_common(struct inode *inode, - struct nilfs_inode *raw_inode, int has_bmap) + struct nilfs_inode *raw_inode) { struct nilfs_inode_info *ii = NILFS_I(inode); @@ -768,30 +723,15 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); - raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); - raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); - raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); + raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); + raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); + raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); - if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { - struct the_nilfs *nilfs = inode->i_sb->s_fs_info; - - /* zero-fill unused portion in the case of super root block */ - raw_inode->i_xattr = 0; - raw_inode->i_pad = 0; - memset((void *)raw_inode + sizeof(*raw_inode), 0, - nilfs->ns_inode_size - sizeof(*raw_inode)); - } - - if (has_bmap) - nilfs_bmap_write(ii->i_bmap, raw_inode); - else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - raw_inode->i_device_code = - cpu_to_le64(huge_encode_dev(inode->i_rdev)); /* * When extending inode, nilfs->ns_inode_size should be checked * for substitutions of appended fields. @@ -812,14 +752,13 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) if (flags & I_DIRTY_DATASYNC) set_bit(NILFS_I_INODE_SYNC, &ii->i_state); - nilfs_write_inode_common(inode, raw_inode, 0); - /* - * XXX: call with has_bmap = 0 is a workaround to avoid - * deadlock of bmap. This delays update of i_bmap to just - * before writing. - */ + nilfs_write_inode_common(inode, raw_inode); - nilfs_ifile_unmap_inode(ifile, ino, ibh); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_device_code = + cpu_to_le64(huge_encode_dev(inode->i_rdev)); + + nilfs_ifile_unmap_inode(raw_inode); } #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ @@ -875,7 +814,7 @@ void nilfs_truncate(struct inode *inode) nilfs_truncate_bmap(ii, blkoff); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); @@ -905,7 +844,7 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + if (!(ii->i_type & NILFS_I_TYPE_BTNC)) nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) @@ -917,6 +856,7 @@ void nilfs_evict_inode(struct inode *inode) struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs; int ret; if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { @@ -929,6 +869,23 @@ void nilfs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); + nilfs = sb->s_fs_info; + if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) { + /* + * If this inode is about to be disposed after the file system + * has been degraded to read-only due to file system corruption + * or after the writer has been detached, do not make any + * changes that cause writes, just clear it. + * Do this check after read-locking ns_segctor_sem by + * nilfs_transaction_begin() in order to avoid a race with + * the writer detach operation. + */ + clear_inode(inode); + nilfs_clear_inode(inode); + nilfs_transaction_abort(sb); + return; + } + /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); nilfs_mark_inode_dirty(inode); @@ -949,7 +906,7 @@ void nilfs_evict_inode(struct inode *inode) */ } -int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct nilfs_transaction_info ti; @@ -957,7 +914,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct super_block *sb = inode->i_sb; int err; - err = setattr_prepare(&init_user_ns, dentry, iattr); + err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (err) return err; @@ -972,7 +929,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, nilfs_truncate(inode); } - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { @@ -988,7 +945,7 @@ out_err: return err; } -int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct nilfs_root *root = NILFS_I(inode)->i_root; @@ -997,7 +954,7 @@ int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode, root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ - return generic_permission(&init_user_ns, inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) @@ -1007,7 +964,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) int err; spin_lock(&nilfs->ns_inode_lock); - if (ii->i_bh == NULL) { + if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) { spin_unlock(&nilfs->ns_inode_lock); err = nilfs_ifile_get_inode_block(ii->i_root->ifile, inode->i_ino, pbh); @@ -1016,7 +973,10 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL) ii->i_bh = *pbh; - else { + else if (unlikely(!buffer_uptodate(ii->i_bh))) { + __brelse(ii->i_bh); + ii->i_bh = *pbh; + } else { brelse(*pbh); *pbh = ii->i_bh; } @@ -1083,9 +1043,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) int __nilfs_mark_inode_dirty(struct inode *inode, int flags) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct buffer_head *ibh; int err; + /* + * Do not dirty inodes after the log writer has been detached + * and its nilfs_root struct has been freed. + */ + if (unlikely(nilfs_purging(nilfs))) + return 0; + err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { nilfs_warn(inode->i_sb, @@ -1227,7 +1195,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (size) { if (phys && blkphy << blkbits == phys + size) { /* The current extent goes on */ - size += n << blkbits; + size += (u64)n << blkbits; } else { /* Terminate the current extent */ ret = fiemap_fill_next_extent( @@ -1240,14 +1208,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } } else { /* Start a new extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } blkoff += n; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 87e1004b606d..e17b8da66491 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -17,6 +17,7 @@ #include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */ #include <linux/buffer_head.h> #include <linux/fileattr.h> +#include <linux/string.h> #include "nilfs.h" #include "segment.h" #include "bmap.h" @@ -32,17 +33,14 @@ * @dofunc: concrete function of get/set metadata info * * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of - * calling dofunc() function on the basis of @argv argument. - * - * Return Value: On success, 0 is returned and requested metadata info - * is copied into userspace. On error, one of the following - * negative error codes is returned. - * - * %-EINVAL - Invalid arguments from userspace. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during execution of requested operation. + * calling dofunc() function on the basis of @argv argument. If successful, + * the requested metadata information is copied to userspace memory. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EINVAL - Invalid arguments from userspace. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, @@ -51,7 +49,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, void *, size_t, size_t)) { void *buf; - void __user *base = (void __user *)(unsigned long)argv->v_base; + void __user *base = u64_to_user_ptr(argv->v_base); size_t maxmembs, total, n; ssize_t nr; int ret, i; @@ -60,7 +58,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_nmembs == 0) return 0; - if (argv->v_size > PAGE_SIZE) + if ((size_t)argv->v_size > PAGE_SIZE) return -EINVAL; /* @@ -71,7 +69,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_index > ~(__u64)0 - argv->v_nmembs) return -EINVAL; - buf = (void *)__get_free_pages(GFP_NOFS, 0); + buf = (void *)get_zeroed_page(GFP_NOFS); if (unlikely(!buf)) return -ENOMEM; maxmembs = PAGE_SIZE / argv->v_size; @@ -114,9 +112,13 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, } /** - * nilfs_fileattr_get - ioctl to support lsattr + * nilfs_fileattr_get - retrieve miscellaneous file attributes + * @dentry: the object to retrieve from + * @fa: fileattr pointer + * + * Return: always 0 as success. */ -int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) +int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); @@ -126,10 +128,15 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } /** - * nilfs_fileattr_set - ioctl to support chattr + * nilfs_fileattr_set - change miscellaneous file attributes + * @idmap: idmap of the mount + * @dentry: the object to change + * @fa: fileattr pointer + * + * Return: 0 on success, or a negative error code on failure. */ -int nilfs_fileattr_set(struct user_namespace *mnt_userns, - struct dentry *dentry, struct fileattr *fa) +int nilfs_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); struct nilfs_transaction_info ti; @@ -149,7 +156,7 @@ int nilfs_fileattr_set(struct user_namespace *mnt_userns, NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE); nilfs_set_inode_flags(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); @@ -159,6 +166,10 @@ int nilfs_fileattr_set(struct user_namespace *mnt_userns, /** * nilfs_ioctl_getversion - get info about a file's version (generation number) + * @inode: inode object + * @argp: userspace memory where the generation number of @inode is stored + * + * Return: 0 on success, or %-EFAULT on error. */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { @@ -176,13 +187,10 @@ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) * given checkpoint between checkpoint and snapshot state. This ioctl * is used in chcp and mkcp utilities. * - * Return Value: On success, 0 is returned and mode of a checkpoint is - * changed. On error, one of the following negative error codes - * is returned. - * - * %-EPERM - Operation not permitted. - * - * %-EFAULT - Failure during checkpoint mode changing. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * %-EFAULT - Failure during checkpoint mode changing. + * %-EPERM - Operation not permitted. */ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -230,13 +238,10 @@ out: * checkpoint from NILFS2 file system. This ioctl is used in rmcp * utility. * - * Return Value: On success, 0 is returned and a checkpoint is - * removed. On error, one of the following negative error codes - * is returned. - * - * %-EPERM - Operation not permitted. - * - * %-EFAULT - Failure during checkpoint removing. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * %-EFAULT - Failure during checkpoint removing. + * %-EPERM - Operation not permitted. */ static int nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, @@ -282,7 +287,7 @@ out: * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in * lscp utility and by nilfs_cleanerd daemon. * - * Return value: count of nilfs_cpinfo structures in output buffer. + * Return: Count of nilfs_cpinfo structures in output buffer. */ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -306,17 +311,14 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, * * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints. * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities - * and by nilfs_cleanerd daemon. - * - * Return Value: On success, 0 is returned, and checkpoints information is - * copied into userspace pointer @argp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during getting checkpoints statistics. + * and by nilfs_cleanerd daemon. The checkpoint statistics are copied to + * the userspace memory pointed to by @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting checkpoints statistics. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -349,7 +351,8 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon. * - * Return value: count of nilfs_suinfo structures in output buffer. + * Return: Count of nilfs_suinfo structures in output buffer on success, + * or a negative error code on failure. */ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -373,17 +376,14 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, * * Description: nilfs_ioctl_get_sustat() returns segment usage statistics. * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities - * and by nilfs_cleanerd daemon. - * - * Return Value: On success, 0 is returned, and segment usage information is - * copied into userspace pointer @argp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during getting segment usage statistics. + * and by nilfs_cleanerd daemon. The requested segment usage information is + * copied to the userspace memory pointed to by @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting segment usage statistics. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -416,7 +416,8 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used * by nilfs_cleanerd daemon. * - * Return value: count of nilfs_vinfo structures in output buffer. + * Return: Count of nilfs_vinfo structures in output buffer on success, or + * a negative error code on failure. */ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -443,7 +444,8 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl * is used by nilfs_cleanerd daemon. * - * Return value: count of nilfs_bdescs structures in output buffer. + * Return: Count of nilfs_bdescs structures in output buffer on success, or + * a negative error code on failure. */ static ssize_t nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -480,19 +482,15 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, * * Description: nilfs_ioctl_do_get_bdescs() function returns information * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl - * is used by nilfs_cleanerd daemon. - * - * Return Value: On success, 0 is returned, and disk block descriptors are - * copied into userspace pointer @argp. On error, one of the following - * negative error codes is returned. - * - * %-EINVAL - Invalid arguments from userspace. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during getting disk block descriptors. + * is used by nilfs_cleanerd daemon. If successful, disk block descriptors + * are copied to userspace pointer @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting disk block descriptors. + * * %-EINVAL - Invalid arguments from userspace. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -526,16 +524,12 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, * Description: nilfs_ioctl_move_inode_block() function registers data/node * buffer in the GC pagecache and submit read request. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - Requested block doesn't exist. - * - * %-EEXIST - Blocks conflict is detected. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - Block conflict detected. + * * %-EIO - I/O error. + * * %-ENOENT - Requested block doesn't exist. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_move_inode_block(struct inode *inode, struct nilfs_vdesc *vdesc, @@ -590,8 +584,8 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, * blocks that garbage collector specified with the array of nilfs_vdesc * structures and stores them into page caches of GC inodes. * - * Return Value: Number of processed nilfs_vdesc structures or - * error code, otherwise. + * Return: Number of processed nilfs_vdesc structures on success, or + * a negative error code on failure. */ static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) @@ -668,14 +662,11 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, * in the period from p_start to p_end, excluding p_end itself. The checkpoints * which have been already deleted are ignored. * - * Return Value: Number of processed nilfs_period structures or - * error code, otherwise. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - invalid checkpoints. + * Return: Number of processed nilfs_period structures on success, or one of + * the following negative error codes on failure: + * * %-EINVAL - invalid checkpoints. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) @@ -703,14 +694,11 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, * Description: nilfs_ioctl_free_vblocknrs() function frees * the virtual block numbers specified by @buf and @argv->v_nmembs. * - * Return Value: Number of processed virtual block numbers or - * error code, otherwise. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The virtual block number have not been allocated. + * Return: Number of processed virtual block numbers on success, or one of the + * following negative error codes on failure: + * * %-EIO - I/O error. + * * %-ENOENT - Unallocated virtual block number. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) @@ -732,14 +720,11 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, * Description: nilfs_ioctl_mark_blocks_dirty() function marks * metadata file or data blocks as dirty. * - * Return Value: Number of processed block descriptors or - * error code, otherwise. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error - * - * %-ENOENT - the specified block does not exist (hole block) + * Return: Number of processed block descriptors on success, or one of the + * following negative error codes on failure: + * * %-EIO - I/O error. + * * %-ENOENT - Non-existent block (hole block). + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) @@ -838,7 +823,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by * nilfs_cleanerd daemon. * - * Return Value: On success, 0 is returned or error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -851,7 +836,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, sizeof(struct nilfs_bdesc), sizeof(__u64), }; - void __user *base; void *kbufs[5]; struct the_nilfs *nilfs; size_t len, nsegs; @@ -872,16 +856,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; - if (nsegs > UINT_MAX / sizeof(__u64)) - goto out; /* * argv[4] points to segment numbers this ioctl cleans. We - * use kmalloc() for its buffer because memory used for the - * segment numbers is enough small. + * use kmalloc() for its buffer because the memory used for the + * segment numbers is small enough. */ - kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, - nsegs * sizeof(__u64)); + kbufs[4] = memdup_array_user(u64_to_user_ptr(argv[4].v_base), + nsegs, sizeof(__u64)); if (IS_ERR(kbufs[4])) { ret = PTR_ERR(kbufs[4]); goto out; @@ -900,20 +882,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; len = argv[n].v_size * argv[n].v_nmembs; - base = (void __user *)(unsigned long)argv[n].v_base; if (len == 0) { kbufs[n] = NULL; continue; } - kbufs[n] = vmalloc(len); - if (!kbufs[n]) { - ret = -ENOMEM; - goto out_free; - } - if (copy_from_user(kbufs[n], base, len)) { - ret = -EFAULT; - vfree(kbufs[n]); + kbufs[n] = vmemdup_user(u64_to_user_ptr(argv[n].v_base), len); + if (IS_ERR(kbufs[n])) { + ret = PTR_ERR(kbufs[n]); goto out_free; } } @@ -945,7 +921,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, out_free: while (--n >= 0) - vfree(kbufs[n]); + kvfree(kbufs[n]); kfree(kbufs[4]); out: mnt_drop_write_file(filp); @@ -964,20 +940,14 @@ out: * and metadata are written out to the device when it successfully * returned. * - * Return Value: On success, 0 is retured. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EFAULT - Failure during execution of requested operation. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -1011,7 +981,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, * @filp: file object * @argp: pointer on argument from userspace * - * Return Value: On success, 0 is returned or error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp, void __user *argp) @@ -1047,7 +1017,7 @@ out: * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which * performs the actual trim operation. * - * Return Value: On success, 0 is returned or negative error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) { @@ -1089,7 +1059,7 @@ static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) * of segments in bytes and upper limit of segments in bytes. * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility. * - * Return Value: On success, 0 is returned or error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) { @@ -1113,9 +1083,16 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize; minseg = range[0] + segbytes - 1; - do_div(minseg, segbytes); + minseg = div64_ul(minseg, segbytes); + + if (range[1] < 4096) + goto out; + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); - do_div(maxseg, segbytes); + if (maxseg < segbytes) + goto out; + + maxseg = div64_ul(maxseg, segbytes); maxseg--; ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg); @@ -1133,17 +1110,15 @@ out: * @dofunc: concrete function of getting metadata info * * Description: nilfs_ioctl_get_info() gets metadata info by means of - * calling dofunc() function. - * - * Return Value: On success, 0 is returned and requested metadata info - * is copied into userspace. On error, one of the following - * negative error codes is returned. - * - * %-EINVAL - Invalid arguments from userspace. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during execution of requested operation. + * calling dofunc() function. The requested metadata information is copied + * to userspace memory @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EINVAL - Invalid arguments from userspace. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, @@ -1183,18 +1158,14 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, * encapsulated in nilfs_argv and updates the segment usage info * according to the flags in nilfs_suinfo_update. * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EPERM - Not enough permissions - * - * %-EFAULT - Error copying input data - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - Block conflict detected. + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Invalid values in input (segment number, flags or nblocks). + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-EPERM - Not enough permissions. */ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -1203,7 +1174,6 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, struct nilfs_transaction_info ti; struct nilfs_argv argv; size_t len; - void __user *base; void *kbuf; int ret; @@ -1234,18 +1204,12 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, goto out; } - base = (void __user *)(unsigned long)argv.v_base; - kbuf = vmalloc(len); - if (!kbuf) { - ret = -ENOMEM; + kbuf = vmemdup_user(u64_to_user_ptr(argv.v_base), len); + if (IS_ERR(kbuf)) { + ret = PTR_ERR(kbuf); goto out; } - if (copy_from_user(kbuf, base, len)) { - ret = -EFAULT; - goto out_free; - } - nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size, argv.v_nmembs); @@ -1254,13 +1218,98 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, else nilfs_transaction_commit(inode->i_sb); /* never fails */ -out_free: - vfree(kbuf); + kvfree(kbuf); out: mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_get_fslabel - get the volume name of the file system + * @sb: super block instance + * @argp: pointer to userspace memory where the volume name should be stored + * + * Return: 0 on success, %-EFAULT if copying to userspace memory fails. + */ +static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + char label[NILFS_MAX_VOLUME_NAME + 1]; + + BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX); + + down_read(&nilfs->ns_sem); + memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name); + up_read(&nilfs->ns_sem); + + if (copy_to_user(argp, label, sizeof(label))) + return -EFAULT; + return 0; +} + +/** + * nilfs_ioctl_set_fslabel - set the volume name of the file system + * @sb: super block instance + * @filp: file object + * @argp: pointer to userspace memory that contains the volume name + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Label length exceeds record size in superblock. + * * %-EIO - I/O error. + * * %-EPERM - Operation not permitted (insufficient permissions). + * * %-EROFS - Read only file system. + */ +static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp, + void __user *argp) +{ + char label[NILFS_MAX_VOLUME_NAME + 1]; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + size_t len; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) { + ret = -EFAULT; + goto out_drop_write; + } + + len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1); + if (len > NILFS_MAX_VOLUME_NAME) { + nilfs_err(sb, "unable to set label with more than %zu bytes", + NILFS_MAX_VOLUME_NAME); + ret = -EINVAL; + goto out_drop_write; + } + + down_write(&nilfs->ns_sem); + sbp = nilfs_prepare_super(sb, false); + if (unlikely(!sbp)) { + ret = -EIO; + goto out_unlock; + } + + strtomem_pad(sbp[0]->s_volume_name, label, 0); + if (sbp[1]) + strtomem_pad(sbp[1]->s_volume_name, label, 0); + + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + +out_unlock: + up_write(&nilfs->ns_sem); +out_drop_write: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1303,6 +1352,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_set_alloc_range(inode, argp); case FITRIM: return nilfs_ioctl_trim_fs(inode, argp); + case FS_IOC_GETFSLABEL: + return nilfs_ioctl_get_fslabel(inode->i_sb, argp); + case FS_IOC_SETFSLABEL: + return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp); default: return -ENOTTY; } @@ -1329,6 +1382,8 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index cbf4fa60eea2..946b0d3534a5 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -33,7 +33,8 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, struct buffer_head *, void *)) { struct nilfs_inode_info *ii = NILFS_I(inode); - void *kaddr; + struct folio *folio = bh->b_folio; + void *from; int ret; /* Caller exclude read accesses using page lock */ @@ -47,12 +48,14 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_atomic(bh->b_page); - memset(kaddr + bh_offset(bh), 0, i_blocksize(inode)); + /* Initialize block (block size > PAGE_SIZE not yet supported) */ + from = kmap_local_folio(folio, offset_in_folio(folio, bh->b_data)); + memset(from, 0, bh->b_size); if (init_block) - init_block(inode, bh, kaddr); - flush_dcache_page(bh->b_page); - kunmap_atomic(kaddr); + init_block(inode, bh, from); + kunmap_local(from); + + flush_dcache_folio(folio); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -89,7 +92,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, if (buffer_uptodate(bh)) goto failed_bh; - bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); @@ -97,8 +99,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, } failed_bh: - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); brelse(bh); failed_unlock: @@ -158,8 +160,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf, *out_bh = bh; failed_bh: - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); brelse(bh); failed: return ret; @@ -224,20 +226,21 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, * @out_bh: output of a pointer to the buffer_head * * nilfs_mdt_get_block() looks up the specified buffer and tries to create - * a new buffer if @create is not zero. On success, the returned buffer is - * assured to be either existing or formatted using a buffer lock on success. - * @out_bh is substituted only when zero is returned. - * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * a new buffer if @create is not zero. If (and only if) this function + * succeeds, it stores a pointer to the retrieved buffer head in the location + * pointed to by @out_bh. * - * %-EIO - I/O error + * The retrieved buffer may be either an existing one or a newly allocated one. + * For a newly created buffer, if the callback function argument @init_block + * is non-NULL, the callback will be called with the buffer locked to format + * the block. * - * %-ENOENT - the specified block does not exist (hole block) - * - * %-EROFS - Read only filesystem (for create mode) + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The specified block does not exist (hole block). + * * %-ENOMEM - Insufficient memory available. + * * %-EROFS - Read only filesystem (for create mode). */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, void (*init_block)(struct inode *, @@ -273,14 +276,11 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, * @out_bh, and block offset to @blkoff, respectively. @out_bh and * @blkoff are substituted only when zero is returned. * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error - * - * %-ENOENT - no block was found in the range + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No block was found in the range. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_mdt_find_block(struct inode *inode, unsigned long start, unsigned long end, unsigned long *blkoff, @@ -319,12 +319,11 @@ out: * @inode: inode of the meta data file * @block: block offset * - * Return Value: On success, zero is returned. - * On error, one of the following negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Non-existent block. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { @@ -347,39 +346,35 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and * tries to release the page including the buffer from a page cache. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EBUSY - page has an active buffer. - * - * %-ENOENT - page cache has no page addressed by the offset. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Page has an active buffer. + * * %-ENOENT - Page cache has no page addressed by the offset. */ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) { - pgoff_t index = (pgoff_t)block >> - (PAGE_SHIFT - inode->i_blkbits); - struct page *page; - unsigned long first_block; + pgoff_t index = block >> (PAGE_SHIFT - inode->i_blkbits); + struct folio *folio; + struct buffer_head *bh; int ret = 0; int still_dirty; - page = find_lock_page(inode->i_mapping, index); - if (!page) + folio = filemap_lock_folio(inode->i_mapping, index); + if (IS_ERR(folio)) return -ENOENT; - wait_on_page_writeback(page); - - first_block = (unsigned long)index << - (PAGE_SHIFT - inode->i_blkbits); - if (page_has_buffers(page)) { - struct buffer_head *bh; + folio_wait_writeback(folio); - bh = nilfs_page_get_nth_block(page, block - first_block); + bh = folio_buffers(folio); + if (bh) { + unsigned long first_block = index << + (PAGE_SHIFT - inode->i_blkbits); + bh = get_nth_bh(bh, block - first_block); nilfs_forget_buffer(bh); } - still_dirty = PageDirty(page); - unlock_page(page); - put_page(page); + still_dirty = folio_test_dirty(folio); + folio_unlock(folio); + folio_put(folio); if (still_dirty || invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) @@ -398,10 +393,10 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) return test_bit(NILFS_I_DIRTY, &ii->i_state); } -static int -nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) +static int nilfs_mdt_write_folio(struct folio *folio, + struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct super_block *sb; int err = 0; @@ -409,16 +404,16 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we - * have dirty pages that try to be flushed in background. - * So, here we simply discard this dirty page. + * have dirty folios that try to be flushed in background. + * So, here we simply discard this dirty folio. */ - nilfs_clear_dirty_page(page, false); - unlock_page(page); + nilfs_clear_folio_dirty(folio); + folio_unlock(folio); return -EROFS; } - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); if (!inode) return 0; @@ -427,17 +422,27 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); - else if (wbc->for_reclaim) - nilfs_flush_segment(sb, inode->i_ino); return err; } +static int nilfs_mdt_writeback(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct folio *folio = NULL; + int error; + + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + error = nilfs_mdt_write_folio(folio, wbc); + + return error; +} static const struct address_space_operations def_mdt_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, - .writepage = nilfs_mdt_write_page, + .writepages = nilfs_mdt_writeback, + .migrate_folio = buffer_migrate_folio_norefs, }; static const struct inode_operations def_mdt_iops; @@ -512,6 +517,8 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size, * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file * @inode: inode of the metadata file * @shadow: shadow mapping + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) @@ -533,6 +540,8 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, /** * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map * @inode: inode of the metadata file + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_mdt_save_to_shadow_map(struct inode *inode) { @@ -560,17 +569,20 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen; - struct page *page; + struct folio *folio; int blkbits = inode->i_blkbits; - page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); - if (!page) - return -ENOMEM; + folio = filemap_grab_folio(shadow->inode->i_mapping, + bh->b_folio->index); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << blkbits, 0); + bh_frozen = folio_buffers(folio); + if (!bh_frozen) + bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0); - bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); + bh_frozen = get_nth_bh(bh_frozen, + offset_in_folio(folio, bh->b_data) >> blkbits); if (!buffer_uptodate(bh_frozen)) nilfs_copy_buffer(bh_frozen, bh); @@ -582,8 +594,8 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) brelse(bh_frozen); /* already frozen */ } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return 0; } @@ -592,17 +604,20 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen = NULL; - struct page *page; + struct folio *folio; int n; - page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); - if (page) { - if (page_has_buffers(page)) { - n = bh_offset(bh) >> inode->i_blkbits; - bh_frozen = nilfs_page_get_nth_block(page, n); + folio = filemap_lock_folio(shadow->inode->i_mapping, + bh->b_folio->index); + if (!IS_ERR(folio)) { + bh_frozen = folio_buffers(folio); + if (bh_frozen) { + n = offset_in_folio(folio, bh->b_data) >> + inode->i_blkbits; + bh_frozen = get_nth_bh(bh_frozen, n); } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } return bh_frozen; } @@ -635,10 +650,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_clear_dirty_pages(inode->i_mapping); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 23899e0ae850..40f4b1a28705 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -55,12 +55,25 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + if (inode == ERR_PTR(-ESTALE)) { + nilfs_error(dir->i_sb, + "deleted inode referenced: %lu", ino); + return ERR_PTR(-EIO); + } + } + return d_splice_alias(inode, dentry); } @@ -72,7 +85,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int nilfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; @@ -100,7 +113,7 @@ static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir, } static int -nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +nilfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; @@ -125,7 +138,7 @@ nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct nilfs_transaction_info ti; @@ -149,6 +162,9 @@ static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, /* slow symlink */ inode->i_op = &nilfs_symlink_inode_operations; inode_nohighmem(inode); + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_constraint(inode->i_mapping, + ~__GFP_FS)); inode->i_mapping->a_ops = &nilfs_aops; err = page_symlink(inode, symname, l); if (err) @@ -185,7 +201,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, if (err) return err; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); @@ -202,8 +218,8 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, return err; } -static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; struct nilfs_transaction_info ti; @@ -211,7 +227,7 @@ static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) - return err; + return ERR_PTR(err); inc_nlink(dir); @@ -242,7 +258,7 @@ out: else nilfs_transaction_abort(dir->i_sb); - return err; + return ERR_PTR(err); out_fail: drop_nlink(inode); @@ -260,13 +276,14 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode; struct nilfs_dir_entry *de; - struct page *page; + struct folio *folio; int err; - err = -ENOENT; - de = nilfs_find_entry(dir, &dentry->d_name, &page); - if (!de) + de = nilfs_find_entry(dir, &dentry->d_name, &folio); + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } inode = d_inode(dentry); err = -EIO; @@ -279,11 +296,12 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) inode->i_ino, inode->i_nlink); set_nlink(inode, 1); } - err = nilfs_delete_entry(de, page); + err = nilfs_delete_entry(de, folio); + folio_release_kmap(folio, de); if (err) goto out; - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); drop_nlink(inode); err = 0; out: @@ -340,18 +358,19 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) return err; } -static int nilfs_rename(struct user_namespace *mnt_userns, +static int nilfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct page *dir_page = NULL; + struct folio *dir_folio = NULL; struct nilfs_dir_entry *dir_de = NULL; - struct page *old_page; + struct folio *old_folio; struct nilfs_dir_entry *old_de; struct nilfs_transaction_info ti; + bool old_is_dir = S_ISDIR(old_inode->i_mode); int err; if (flags & ~RENAME_NOREPLACE) @@ -361,34 +380,40 @@ static int nilfs_rename(struct user_namespace *mnt_userns, if (unlikely(err)) return err; - err = -ENOENT; - old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_de) + old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } - if (S_ISDIR(old_inode->i_mode)) { + if (old_is_dir && old_dir != new_dir) { err = -EIO; - dir_de = nilfs_dotdot(old_inode, &dir_page); + dir_de = nilfs_dotdot(old_inode, &dir_folio); if (!dir_de) goto out_old; } if (new_inode) { - struct page *new_page; + struct folio *new_folio; struct nilfs_dir_entry *new_de; err = -ENOTEMPTY; - if (dir_de && !nilfs_empty_dir(new_inode)) + if (old_is_dir && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); + goto out_dir; + } + err = nilfs_set_link(new_dir, new_de, new_folio, old_inode); + folio_release_kmap(new_folio, new_de); + if (unlikely(err)) goto out_dir; - nilfs_set_link(new_dir, new_de, new_page, old_inode); nilfs_mark_inode_dirty(new_dir); - new_inode->i_ctime = current_time(new_inode); - if (dir_de) + inode_set_ctime_current(new_inode); + if (old_is_dir) drop_nlink(new_inode); drop_nlink(new_inode); nilfs_mark_inode_dirty(new_inode); @@ -396,7 +421,7 @@ static int nilfs_rename(struct user_namespace *mnt_userns, err = nilfs_add_link(new_dentry, old_inode); if (err) goto out_dir; - if (dir_de) { + if (old_is_dir) { inc_nlink(new_dir); nilfs_mark_inode_dirty(new_dir); } @@ -406,30 +431,30 @@ static int nilfs_rename(struct user_namespace *mnt_userns, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = current_time(old_inode); - - nilfs_delete_entry(old_de, old_page); - - if (dir_de) { - nilfs_set_link(old_inode, dir_de, dir_page, new_dir); - drop_nlink(old_dir); + inode_set_ctime_current(old_inode); + + err = nilfs_delete_entry(old_de, old_folio); + if (likely(!err)) { + if (old_is_dir) { + if (old_dir != new_dir) + err = nilfs_set_link(old_inode, dir_de, + dir_folio, new_dir); + drop_nlink(old_dir); + } + nilfs_mark_inode_dirty(old_dir); } - nilfs_mark_inode_dirty(old_dir); nilfs_mark_inode_dirty(old_inode); - err = nilfs_transaction_commit(old_dir->i_sb); - return err; - out_dir: - if (dir_de) { - kunmap(dir_page); - put_page(dir_page); - } + if (dir_de) + folio_release_kmap(dir_folio, dir_de); out_old: - kunmap(old_page); - put_page(old_page); + folio_release_kmap(old_folio, old_de); out: - nilfs_transaction_abort(old_dir->i_sb); + if (likely(!err)) + err = nilfs_transaction_commit(old_dir->i_sb); + else + nilfs_transaction_abort(old_dir->i_sb); return err; } @@ -438,21 +463,17 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; - struct inode *inode; + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); root = NILFS_I(d_inode(child))->i_root; - inode = nilfs_iget(child->d_sb, root, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - return d_obtain_alias(inode); + return d_obtain_alias(nilfs_iget(child->d_sb, root, ino)); } static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index aecda4fc95f5..b7e3d91b6243 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -14,6 +14,7 @@ #include <linux/buffer_head.h> #include <linux/spinlock.h> #include <linux/blkdev.h> +#include <linux/fs_struct.h> #include <linux/nilfs2_api.h> #include <linux/nilfs2_ondisk.h> #include "the_nilfs.h" @@ -22,6 +23,7 @@ /** * struct nilfs_inode_info - nilfs inode data in memory * @i_flags: inode flags + * @i_type: inode type (combination of flags that inidicate usage) * @i_state: dynamic state flags * @i_bmap: pointer on i_bmap_data * @i_bmap_data: raw block mapping @@ -37,6 +39,7 @@ */ struct nilfs_inode_info { __u32 i_flags; + unsigned int i_type; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; struct nilfs_bmap i_bmap_data; @@ -90,9 +93,16 @@ enum { NILFS_I_UPDATED, /* The file has been written back */ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_BTNC, /* inode for btree node cache */ - NILFS_I_SHADOW, /* inode for shadowed page cache */ +}; + +/* + * Flags to identify the usage of on-memory inodes (i_type) + */ +enum { + NILFS_I_TYPE_NORMAL = 0, + NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */ + NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */ + NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */ }; /* @@ -103,6 +113,18 @@ enum { NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; +/** + * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a + * file system volume name + * + * Defined by the size of the volume name field in the on-disk superblocks. + * This volume name does not include the terminating NULL byte if the string + * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the + * size of the buffer that requires a NULL byte termination. + */ +#define NILFS_MAX_VOLUME_NAME \ + sizeof_field(struct nilfs_super_block, s_volume_name) + /* * Macros to check inode numbers */ @@ -116,9 +138,15 @@ enum { #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino))) + ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino))) + ((ino) >= NILFS_FIRST_INO(sb) || \ + ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + +#define NILFS_PRIVATE_INODE(ino) ({ \ + ino_t __ino = (ino); \ + ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \ + (__ino) != NILFS_SKETCH_INO); }) /** * struct nilfs_transaction_info: context information for synchronization @@ -226,24 +254,24 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) } /* dir.c */ -extern int nilfs_add_link(struct dentry *, struct inode *); -extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); -extern int nilfs_make_empty(struct inode *, struct inode *); -extern struct nilfs_dir_entry * -nilfs_find_entry(struct inode *, const struct qstr *, struct page **); -extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); -extern int nilfs_empty_dir(struct inode *); -extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); -extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, - struct page *, struct inode *); +int nilfs_add_link(struct dentry *, struct inode *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); +int nilfs_make_empty(struct inode *, struct inode *); +struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, + struct folio **); +int nilfs_delete_entry(struct nilfs_dir_entry *, struct folio *); +int nilfs_empty_dir(struct inode *); +struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct folio **); +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct folio *folio, struct inode *inode); /* file.c */ extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); /* ioctl.c */ -int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *m); -int nilfs_fileattr_set(struct user_namespace *mnt_userns, - struct dentry *dentry, struct fileattr *fa); +int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *m); +int nilfs_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa); long nilfs_ioctl(struct file *, unsigned int, unsigned long); long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, @@ -256,7 +284,8 @@ extern struct inode *nilfs_new_inode(struct inode *, umode_t); extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void nilfs_set_inode_flags(struct inode *); extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); -extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); +void nilfs_write_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode); struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino); struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, @@ -271,10 +300,10 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); -extern int nilfs_setattr(struct user_namespace *, struct dentry *, +extern int nilfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern void nilfs_write_failed(struct address_space *mapping, loff_t to); -int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); @@ -334,8 +363,8 @@ void __nilfs_error(struct super_block *sb, const char *function, extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); -extern int nilfs_store_magic_and_option(struct super_block *, - struct nilfs_super_block *, char *); +extern int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp); extern int nilfs_check_feature_compatibility(struct super_block *, struct nilfs_super_block *); extern void nilfs_set_log_cursor(struct nilfs_super_block *, @@ -373,6 +402,7 @@ extern const struct file_operations nilfs_dir_operations; extern const struct inode_operations nilfs_file_inode_operations; extern const struct file_operations nilfs_file_operations; extern const struct address_space_operations nilfs_aops; +extern const struct address_space_operations nilfs_buffer_cache_aops; extern const struct inode_operations nilfs_dir_inode_operations; extern const struct inode_operations nilfs_special_inode_operations; extern const struct inode_operations nilfs_symlink_inode_operations; diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 39b7eea2642a..56c4da417b6a 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -25,21 +25,20 @@ (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \ BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked)) -static struct buffer_head * -__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, - int blkbits, unsigned long b_state) +static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, + unsigned long block, pgoff_t index, int blkbits, + unsigned long b_state) { unsigned long first_block; - struct buffer_head *bh; + struct buffer_head *bh = folio_buffers(folio); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << blkbits, b_state); + if (!bh) + bh = create_empty_buffers(folio, 1 << blkbits, b_state); first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); - bh = nilfs_page_get_nth_block(page, block - first_block); + bh = get_nth_bh(bh, block - first_block); - touch_buffer(bh); wait_on_buffer(bh); return bh; } @@ -51,19 +50,20 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, { int blkbits = inode->i_blkbits; pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits); - struct page *page; + struct folio *folio; struct buffer_head *bh; - page = grab_cache_page(mapping, index); - if (unlikely(!page)) + folio = filemap_grab_folio(mapping, index); + if (IS_ERR(folio)) return NULL; - bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); + bh = __nilfs_get_folio_block(folio, blkoff, index, blkbits, b_state); if (unlikely(!bh)) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return NULL; } + bh->b_bdev = inode->i_sb->s_bdev; return bh; } @@ -73,20 +73,21 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, */ void nilfs_forget_buffer(struct buffer_head *bh) { - struct page *page = bh->b_page; + struct folio *folio = bh->b_folio; const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); - if (nilfs_page_buffers_clean(page)) - __nilfs_clear_page_dirty(page); + if (nilfs_folio_buffers_clean(folio)) + __nilfs_clear_folio_dirty(folio); bh->b_blocknr = -1; - ClearPageUptodate(page); - ClearPageMappedToDisk(page); + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); unlock_buffer(bh); brelse(bh); } @@ -98,16 +99,16 @@ void nilfs_forget_buffer(struct buffer_head *bh) */ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) { - void *kaddr0, *kaddr1; + void *saddr, *daddr; unsigned long bits; - struct page *spage = sbh->b_page, *dpage = dbh->b_page; + struct folio *sfolio = sbh->b_folio, *dfolio = dbh->b_folio; struct buffer_head *bh; - kaddr0 = kmap_atomic(spage); - kaddr1 = kmap_atomic(dpage); - memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); - kunmap_atomic(kaddr1); - kunmap_atomic(kaddr0); + saddr = kmap_local_folio(sfolio, bh_offset(sbh)); + daddr = kmap_local_folio(dfolio, bh_offset(dbh)); + memcpy(daddr, saddr, sbh->b_size); + kunmap_local(daddr); + kunmap_local(saddr); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; @@ -121,58 +122,58 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) unlock_buffer(bh); } if (bits & BIT(BH_Uptodate)) - SetPageUptodate(dpage); + folio_mark_uptodate(dfolio); else - ClearPageUptodate(dpage); + folio_clear_uptodate(dfolio); if (bits & BIT(BH_Mapped)) - SetPageMappedToDisk(dpage); + folio_set_mappedtodisk(dfolio); else - ClearPageMappedToDisk(dpage); + folio_clear_mappedtodisk(dfolio); } /** - * nilfs_page_buffers_clean - check if a page has dirty buffers or not. - * @page: page to be checked + * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not. + * @folio: Folio to be checked. * - * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. - * Otherwise, it returns non-zero value. + * Return: false if the folio has dirty buffers, true otherwise. */ -int nilfs_page_buffers_clean(struct page *page) +bool nilfs_folio_buffers_clean(struct folio *folio) { struct buffer_head *bh, *head; - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); do { if (buffer_dirty(bh)) - return 0; + return false; bh = bh->b_this_page; } while (bh != head); - return 1; + return true; } -void nilfs_page_bug(struct page *page) +void nilfs_folio_bug(struct folio *folio) { + struct buffer_head *bh, *head; struct address_space *m; unsigned long ino; - if (unlikely(!page)) { - printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); + if (unlikely(!folio)) { + printk(KERN_CRIT "NILFS_FOLIO_BUG(NULL)\n"); return; } - m = page->mapping; + m = folio->mapping; ino = m ? m->host->i_ino : 0; - printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " + printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx " "mapping=%p ino=%lu\n", - page, page_ref_count(page), - (unsigned long long)page->index, page->flags, m, ino); + folio, folio_ref_count(folio), + (unsigned long long)folio->index, folio->flags.f, m, ino); - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { int i = 0; - bh = head = page_buffers(page); + bh = head; do { printk(KERN_CRIT " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", @@ -184,30 +185,32 @@ void nilfs_page_bug(struct page *page) } /** - * nilfs_copy_page -- copy the page with buffers - * @dst: destination page - * @src: source page - * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. + * nilfs_copy_folio -- copy the folio with buffers + * @dst: destination folio + * @src: source folio + * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads. * - * This function is for both data pages and btnode pages. The dirty flag - * should be treated by caller. The page must not be under i/o. - * Both src and dst page must be locked + * This function is for both data folios and btnode folios. The dirty flag + * should be treated by caller. The folio must not be under i/o. + * Both src and dst folio must be locked */ -static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) +static void nilfs_copy_folio(struct folio *dst, struct folio *src, + bool copy_dirty) { struct buffer_head *dbh, *dbufs, *sbh; unsigned long mask = NILFS_BUFFER_INHERENT_BITS; - BUG_ON(PageWriteback(dst)); + BUG_ON(folio_test_writeback(dst)); - sbh = page_buffers(src); - if (!page_has_buffers(dst)) - create_empty_buffers(dst, sbh->b_size, 0); + sbh = folio_buffers(src); + dbh = folio_buffers(dst); + if (!dbh) + dbh = create_empty_buffers(dst, sbh->b_size, 0); if (copy_dirty) mask |= BIT(BH_Dirty); - dbh = dbufs = page_buffers(dst); + dbufs = dbh; do { lock_buffer(sbh); lock_buffer(dbh); @@ -218,16 +221,16 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) dbh = dbh->b_this_page; } while (dbh != dbufs); - copy_highpage(dst, src); + folio_copy(dst, src); - if (PageUptodate(src) && !PageUptodate(dst)) - SetPageUptodate(dst); - else if (!PageUptodate(src) && PageUptodate(dst)) - ClearPageUptodate(dst); - if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) - SetPageMappedToDisk(dst); - else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) - ClearPageMappedToDisk(dst); + if (folio_test_uptodate(src) && !folio_test_uptodate(dst)) + folio_mark_uptodate(dst); + else if (!folio_test_uptodate(src) && folio_test_uptodate(dst)) + folio_clear_uptodate(dst); + if (folio_test_mappedtodisk(src) && !folio_test_mappedtodisk(dst)) + folio_set_mappedtodisk(dst); + else if (!folio_test_mappedtodisk(src) && folio_test_mappedtodisk(dst)) + folio_clear_mappedtodisk(dst); do { unlock_buffer(sbh); @@ -240,42 +243,43 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) int nilfs_copy_dirty_pages(struct address_space *dmap, struct address_space *smap) { - struct pagevec pvec; + struct folio_batch fbatch; unsigned int i; pgoff_t index = 0; int err = 0; - pagevec_init(&pvec); + folio_batch_init(&fbatch); repeat: - if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY)) + if (!filemap_get_folios_tag(smap, &index, (pgoff_t)-1, + PAGECACHE_TAG_DIRTY, &fbatch)) return 0; - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i], *dpage; + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i], *dfolio; - lock_page(page); - if (unlikely(!PageDirty(page))) - NILFS_PAGE_BUG(page, "inconsistent dirty state"); + folio_lock(folio); + if (unlikely(!folio_test_dirty(folio))) + NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); - dpage = grab_cache_page(dmap, page->index); - if (unlikely(!dpage)) { + dfolio = filemap_grab_folio(dmap, folio->index); + if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ - err = -ENOMEM; - unlock_page(page); + folio_unlock(folio); + err = PTR_ERR(dfolio); break; } - if (unlikely(!page_has_buffers(page))) - NILFS_PAGE_BUG(page, + if (unlikely(!folio_buffers(folio))) + NILFS_FOLIO_BUG(folio, "found empty page in dat page cache"); - nilfs_copy_page(dpage, page, 1); - __set_page_dirty_nobuffers(dpage); + nilfs_copy_folio(dfolio, folio, true); + filemap_dirty_folio(folio_mapping(dfolio), dfolio); - unlock_page(dpage); - put_page(dpage); - unlock_page(page); + folio_unlock(dfolio); + folio_put(dfolio); + folio_unlock(folio); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); if (likely(!err)) @@ -310,10 +314,10 @@ repeat: folio_lock(folio); dfolio = filemap_lock_folio(dmap, index); - if (dfolio) { + if (!IS_ERR(dfolio)) { /* overwrite existing folio in the destination cache */ WARN_ON(folio_test_dirty(dfolio)); - nilfs_copy_page(&dfolio->page, &folio->page, 0); + nilfs_copy_folio(dfolio, folio, false); folio_unlock(dfolio); folio_put(dfolio); /* Do we not need to remove folio from smap here? */ @@ -353,80 +357,101 @@ repeat: /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) +void nilfs_clear_dirty_pages(struct address_space *mapping) { - struct pagevec pvec; + struct folio_batch fbatch; unsigned int i; pgoff_t index = 0; - pagevec_init(&pvec); + folio_batch_init(&fbatch); + + while (filemap_get_folios_tag(mapping, &index, (pgoff_t)-1, + PAGECACHE_TAG_DIRTY, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + + folio_lock(folio); - while (pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i]; + /* + * This folio may have been removed from the address + * space by truncation or invalidation when the lock + * was acquired. Skip processing in that case. + */ + if (likely(folio->mapping == mapping)) + nilfs_clear_folio_dirty(folio); - lock_page(page); - nilfs_clear_dirty_page(page, silent); - unlock_page(page); + folio_unlock(folio); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } } /** - * nilfs_clear_dirty_page - discard dirty page - * @page: dirty page that will be discarded - * @silent: suppress [true] or print [false] warning messages + * nilfs_clear_folio_dirty - discard dirty folio + * @folio: dirty folio that will be discarded + * + * nilfs_clear_folio_dirty() clears working states including dirty state for + * the folio and its buffers. If the folio has buffers, clear only if it is + * confirmed that none of the buffer heads are busy (none have valid + * references and none are locked). */ -void nilfs_clear_dirty_page(struct page *page, bool silent) +void nilfs_clear_folio_dirty(struct folio *folio) { - struct inode *inode = page->mapping->host; - struct super_block *sb = inode->i_sb; - - BUG_ON(!PageLocked(page)); - - if (!silent) - nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", - page_offset(page), inode->i_ino); + struct buffer_head *bh, *head; - ClearPageUptodate(page); - ClearPageMappedToDisk(page); + BUG_ON(!folio_test_locked(folio)); - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); + bool busy, invalidated = false; - bh = head = page_buffers(page); +recheck_buffers: + busy = false; + bh = head; do { - lock_buffer(bh); - if (!silent) - nilfs_warn(sb, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); + if (atomic_read(&bh->b_count) | buffer_locked(bh)) { + busy = true; + break; + } + } while (bh = bh->b_this_page, bh != head); + if (busy) { + if (invalidated) + return; + invalidate_bh_lrus(); + invalidated = true; + goto recheck_buffers; + } + + bh = head; + do { + lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); } - __nilfs_clear_page_dirty(page); + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); + folio_clear_checked(folio); + __nilfs_clear_folio_dirty(folio); } -unsigned int nilfs_page_count_clean_buffers(struct page *page, +unsigned int nilfs_page_count_clean_buffers(struct folio *folio, unsigned int from, unsigned int to) { unsigned int block_start, block_end; struct buffer_head *bh, *head; unsigned int nc = 0; - for (bh = head = page_buffers(page), block_start = 0; + for (bh = head = folio_buffers(folio), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + bh->b_size; @@ -446,22 +471,23 @@ unsigned int nilfs_page_count_clean_buffers(struct page *page, * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. */ -int __nilfs_clear_page_dirty(struct page *page) +void __nilfs_clear_folio_dirty(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; if (mapping) { xa_lock_irq(&mapping->i_pages); - if (test_bit(PG_dirty, &page->flags)) { - __xa_clear_mark(&mapping->i_pages, page_index(page), + if (folio_test_dirty(folio)) { + __xa_clear_mark(&mapping->i_pages, folio->index, PAGECACHE_TAG_DIRTY); xa_unlock_irq(&mapping->i_pages); - return clear_page_dirty_for_io(page); + folio_clear_dirty_for_io(folio); + return; } xa_unlock_irq(&mapping->i_pages); - return 0; + return; } - return TestClearPageDirty(page); + folio_clear_dirty(folio); } /** @@ -473,8 +499,9 @@ int __nilfs_clear_page_dirty(struct page *page) * This function searches an extent of buffers marked "delayed" which * starts from a block offset equal to or larger than @start_blk. If * such an extent was found, this will store the start offset in - * @blkoff and return its length in blocks. Otherwise, zero is - * returned. + * @blkoff and return its length in blocks. + * + * Return: Length in blocks of found extent, 0 otherwise. */ unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 21ddcdd4d63e..136cd1c143c9 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -30,37 +30,26 @@ BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ -int __nilfs_clear_page_dirty(struct page *); +void __nilfs_clear_folio_dirty(struct folio *); struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, unsigned long, unsigned long); void nilfs_forget_buffer(struct buffer_head *); void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); -int nilfs_page_buffers_clean(struct page *); -void nilfs_page_bug(struct page *); +bool nilfs_folio_buffers_clean(struct folio *); +void nilfs_folio_bug(struct folio *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_dirty_page(struct page *, bool); -void nilfs_clear_dirty_pages(struct address_space *, bool); -unsigned int nilfs_page_count_clean_buffers(struct page *, unsigned int, - unsigned int); +void nilfs_clear_folio_dirty(struct folio *folio); +void nilfs_clear_dirty_pages(struct address_space *mapping); +unsigned int nilfs_page_count_clean_buffers(struct folio *folio, + unsigned int from, unsigned int to); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, sector_t *blkoff); -#define NILFS_PAGE_BUG(page, m, a...) \ - do { nilfs_page_bug(page); BUG(); } while (0) - -static inline struct buffer_head * -nilfs_page_get_nth_block(struct page *page, unsigned int count) -{ - struct buffer_head *bh = page_buffers(page); - - while (count-- > 0) - bh = bh->b_this_page; - get_bh(bh); - return bh; -} +#define NILFS_FOLIO_BUG(folio, m, a...) \ + do { nilfs_folio_bug(folio); BUG(); } while (0) #endif /* _NILFS_PAGE_H */ diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 0955b657938f..a9c61d0492cb 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -88,6 +88,8 @@ static int nilfs_warn_segment_error(struct super_block *sb, int err) * @check_bytes: number of bytes to be checked * @start: DBN of start block * @nblock: number of blocks to be checked + * + * Return: 0 on success, or %-EIO if an I/O error occurs. */ static int nilfs_compute_checksum(struct the_nilfs *nilfs, struct buffer_head *bhs, u32 *sum, @@ -126,6 +128,11 @@ static int nilfs_compute_checksum(struct the_nilfs *nilfs, * @sr_block: disk block number of the super root block * @pbh: address of a buffer_head pointer to return super root buffer * @check: CRC check flag + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Super root block corrupted. + * * %-EIO - I/O error. */ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, struct buffer_head **pbh, int check) @@ -176,6 +183,8 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, * @nilfs: nilfs object * @start_blocknr: start block number of the log * @sum: pointer to return segment summary structure + * + * Return: Buffer head pointer, or NULL if an I/O error occurs. */ static struct buffer_head * nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, @@ -195,6 +204,13 @@ nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, * @seg_seq: sequence number of segment * @bh_sum: buffer head of summary block * @sum: segment summary struct + * + * Return: 0 on success, or one of the following internal codes on failure: + * * %NILFS_SEG_FAIL_MAGIC - Magic number mismatch. + * * %NILFS_SEG_FAIL_SEQ - Sequence number mismatch. + * * %NIFLS_SEG_FAIL_CONSISTENCY - Block count out of range. + * * %NILFS_SEG_FAIL_IO - I/O error. + * * %NILFS_SEG_FAIL_CHECKSUM_FULL - Full log checksum verification failed. */ static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq, struct buffer_head *bh_sum, @@ -238,6 +254,9 @@ out: * @pbh: the current buffer head on summary blocks [in, out] * @offset: the current byte offset on summary blocks [in, out] * @bytes: byte size of the item to be read + * + * Return: Kernel space address of current segment summary entry, or + * NULL if an I/O error occurs. */ static void *nilfs_read_summary_info(struct the_nilfs *nilfs, struct buffer_head **pbh, @@ -300,6 +319,11 @@ static void nilfs_skip_summary_info(struct the_nilfs *nilfs, * @start_blocknr: start block number of the log * @sum: log summary information * @head: list head to add nilfs_recovery_block struct + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, struct nilfs_segment_summary *sum, @@ -433,8 +457,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * The next segment is invalidated by this recovery. */ err = nilfs_sufile_free(sufile, segnum[1]); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated", + (unsigned long long)nilfs->ns_last_pseg, + (unsigned long long)nilfs->ns_segnum, + (unsigned long long)segnum[1]); + err = -EINVAL; + } goto failed; + } for (i = 1; i < 4; i++) { err = nilfs_segment_list_add(head, segnum[i]); @@ -472,18 +505,16 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - struct page *page) + loff_t pos, struct folio *folio) { struct buffer_head *bh_org; - void *kaddr; + size_t from = offset_in_folio(folio, pos); bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); if (unlikely(!bh_org)) return -EIO; - kaddr = kmap_atomic(page); - memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); - kunmap_atomic(kaddr); + memcpy_to_folio(folio, from, bh_org->b_data, bh_org->b_size); brelse(bh_org); return 0; } @@ -497,7 +528,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, struct inode *inode; struct nilfs_recovery_block *rb, *n; unsigned int blocksize = nilfs->ns_blocksize; - struct page *page; + struct folio *folio; loff_t pos; int err = 0, err2 = 0; @@ -511,7 +542,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, pos = rb->blkoff << inode->i_blkbits; err = block_write_begin(inode->i_mapping, pos, blocksize, - &page, nilfs_get_block); + &folio, nilfs_get_block); if (unlikely(err)) { loff_t isize = inode->i_size; @@ -521,26 +552,25 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, folio); if (unlikely(err)) - goto failed_page; + goto failed_folio; err = nilfs_set_file_dirty(inode, 1); if (unlikely(err)) - goto failed_page; + goto failed_folio; - block_write_end(NULL, inode->i_mapping, pos, blocksize, - blocksize, page, NULL); + block_write_end(pos, blocksize, blocksize, folio); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); (*nr_salvaged_blocks)++; goto next; - failed_page: - unlock_page(page); - put_page(page); + failed_folio: + folio_unlock(folio); + folio_put(folio); failed_inode: nilfs_warn(sb, @@ -562,7 +592,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, * checkpoint * @nilfs: nilfs object * @sb: super block instance + * @root: NILFS root instance * @ri: pointer to a nilfs_recovery_info + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Log format error. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct super_block *sb, @@ -697,9 +734,15 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, return; bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize); - BUG_ON(!bh); + if (WARN_ON(!bh)) + return; /* should never happen */ + + lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); set_buffer_dirty(bh); + unlock_buffer(bh); + err = sync_dirty_buffer(bh); if (unlikely(err)) nilfs_warn(nilfs->ns_sb, @@ -708,23 +751,45 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, } /** + * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery + * @nilfs: nilfs object + */ +static void nilfs_abort_roll_forward(struct the_nilfs *nilfs) +{ + struct nilfs_inode_info *ii, *n; + LIST_HEAD(head); + + /* Abandon inodes that have read recovery data */ + spin_lock(&nilfs->ns_inode_lock); + list_splice_init(&nilfs->ns_dirty_files, &head); + spin_unlock(&nilfs->ns_inode_lock); + if (list_empty(&head)) + return; + + set_nilfs_purging(nilfs); + list_for_each_entry_safe(ii, n, &head, i_dirty) { + spin_lock(&nilfs->ns_inode_lock); + list_del_init(&ii->i_dirty); + spin_unlock(&nilfs->ns_inode_lock); + + iput(&ii->vfs_inode); + } + clear_nilfs_purging(nilfs); +} + +/** * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint * @nilfs: nilfs object * @sb: super block instance * @ri: pointer to a nilfs_recovery_info struct to store search results. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EINVAL - Inconsistent filesystem state. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Inconsistent filesystem state. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. */ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb, @@ -765,15 +830,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (unlikely(err)) { nilfs_err(sb, "error %d writing segment for recovery", err); - goto failed; + goto put_root; } nilfs_finish_roll_forward(nilfs, ri); } - failed: +put_root: nilfs_put_root(root); return err; + +failed: + nilfs_abort_roll_forward(nilfs); + goto put_root; } /** @@ -785,14 +854,11 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, * segment pointed by the superblock. It sets up struct the_nilfs through * this search. It fills nilfs_recovery_info (ri) required for recovery. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EINVAL - No valid segment found - * - * %-EIO - I/O error - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - No valid segment found. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 1362ccb64ec7..a8bdf3d318ea 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf) if (unlikely(!bh)) return -ENOMEM; + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); nilfs_segbuf_add_segsum_buffer(segbuf, bh); return 0; } @@ -199,7 +205,6 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, { struct buffer_head *bh; struct nilfs_segment_summary *raw_sum; - void *kaddr; u32 crc; bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, @@ -214,9 +219,13 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - kaddr = kmap_atomic(bh->b_page); - crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); - kunmap_atomic(kaddr); + size_t offset = offset_in_folio(bh->b_folio, bh->b_data); + unsigned char *from; + + /* Do not support block sizes larger than PAGE_SIZE */ + from = kmap_local_folio(bh->b_folio, offset); + crc = crc32_le(crc, from, bh->b_size); + kunmap_local(from); } raw_sum->ss_datasum = cpu_to_le32(crc); } @@ -368,7 +377,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi, struct buffer_head *bh) { - int len, err; + int err; BUG_ON(wi->nr_vecs <= 0); repeat: @@ -379,8 +388,8 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, (wi->nilfs->ns_blocksize_bits - 9); } - len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); - if (len == bh->b_size) { + if (bio_add_folio(wi->bio, bh->b_folio, bh->b_size, + offset_in_folio(bh->b_folio, bh->b_data))) { wi->end++; return 0; } @@ -397,12 +406,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, * @segbuf: buffer storing a log to be written * @nilfs: nilfs object * - * Return Value: On Success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error - * - * %-ENOMEM - Insufficient memory available. + * Return: Always 0. */ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, struct the_nilfs *nilfs) @@ -443,10 +447,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * nilfs_segbuf_wait - wait for completion of requested BIOs * @segbuf: segment buffer * - * Return Value: On Success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error + * Return: 0 on success, or %-EIO if I/O error is detected. */ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) { diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 76c3bd88b858..deee16bc9d4e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_ge(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(a) - (__s32)(b) >= 0)) + ((__s32)((a) - (b)) >= 0)) static int nilfs_prepare_segment_lock(struct super_block *sb, struct nilfs_transaction_info *ti) @@ -191,12 +191,10 @@ static int nilfs_prepare_segment_lock(struct super_block *sb, * When @vacancy_check flag is set, this function will check the amount of * free space, and will wait for the GC to reclaim disk space if low capacity. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-ENOSPC - No space left on device + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (if checking free space). */ int nilfs_transaction_begin(struct super_block *sb, struct nilfs_transaction_info *ti, @@ -252,6 +250,8 @@ int nilfs_transaction_begin(struct super_block *sb, * nilfs_transaction_commit() sets a timer to start the segment * constructor. If a sync flag is set, it starts construction * directly. + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_transaction_commit(struct super_block *sb) { @@ -407,6 +407,8 @@ static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, /** * nilfs_segctor_reset_segment_buffer - reset the current segment buffer * @sci: nilfs_sc_info + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) { @@ -430,6 +432,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) return 0; } +/** + * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area + * @sci: segment constructor object + * + * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of + * the current segment summary block. + */ +static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci) +{ + struct nilfs_segsum_pointer *ssp; + + ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr; + if (ssp->offset < ssp->bh->b_size) + memset(ssp->bh->b_data + ssp->offset, 0, + ssp->bh->b_size - ssp->offset); +} + static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) { sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; @@ -438,6 +457,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) * The current segment is filled up * (internal code) */ + nilfs_segctor_zeropad_segsum(sci); sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); return nilfs_segctor_reset_segment_buffer(sci); } @@ -501,7 +521,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, ii = NILFS_I(inode); - if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + if (ii->i_type & NILFS_I_TYPE_GC) cno = ii->i_cno; else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) cno = 0; @@ -542,6 +562,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, goto retry; } if (unlikely(required)) { + nilfs_segctor_zeropad_segsum(sci); err = nilfs_segbuf_extend_segsum(segbuf); if (unlikely(err)) goto failed; @@ -680,7 +701,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, loff_t start, loff_t end) { struct address_space *mapping = inode->i_mapping; - struct pagevec pvec; + struct folio_batch fbatch; pgoff_t index = 0, last = ULONG_MAX; size_t ndirties = 0; int i; @@ -694,23 +715,29 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, index = start >> PAGE_SHIFT; last = end >> PAGE_SHIFT; } - pagevec_init(&pvec); + folio_batch_init(&fbatch); repeat: if (unlikely(index > last) || - !pagevec_lookup_range_tag(&pvec, mapping, &index, last, - PAGECACHE_TAG_DIRTY)) + !filemap_get_folios_tag(mapping, &index, last, + PAGECACHE_TAG_DIRTY, &fbatch)) return ndirties; - for (i = 0; i < pagevec_count(&pvec); i++) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { struct buffer_head *bh, *head; - struct page *page = pvec.pages[i]; + struct folio *folio = fbatch.folios[i]; - lock_page(page); - if (!page_has_buffers(page)) - create_empty_buffers(page, i_blocksize(inode), 0); - unlock_page(page); + folio_lock(folio); + if (unlikely(folio->mapping != mapping)) { + /* Exclude folios removed from the address space */ + folio_unlock(folio); + continue; + } + head = folio_buffers(folio); + if (!head) + head = create_empty_buffers(folio, + i_blocksize(inode), 0); - bh = head = page_buffers(page); + bh = head; do { if (!buffer_dirty(bh) || buffer_async_write(bh)) continue; @@ -718,13 +745,16 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, list_add_tail(&bh->b_assoc_buffers, listp); ndirties++; if (unlikely(ndirties >= nlimit)) { - pagevec_release(&pvec); + folio_unlock(folio); + folio_batch_release(&fbatch); cond_resched(); return ndirties; } } while (bh = bh->b_this_page, bh != head); + + folio_unlock(folio); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); goto repeat; } @@ -734,20 +764,19 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, { struct nilfs_inode_info *ii = NILFS_I(inode); struct inode *btnc_inode = ii->i_assoc_inode; - struct pagevec pvec; + struct folio_batch fbatch; struct buffer_head *bh, *head; unsigned int i; pgoff_t index = 0; if (!btnc_inode) return; + folio_batch_init(&fbatch); - pagevec_init(&pvec); - - while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, - PAGECACHE_TAG_DIRTY)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - bh = head = page_buffers(pvec.pages[i]); + while (filemap_get_folios_tag(btnc_inode->i_mapping, &index, + (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + bh = head = folio_buffers(fbatch.folios[i]); do { if (buffer_dirty(bh) && !buffer_async_write(bh)) { @@ -758,7 +787,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, bh = bh->b_this_page; } while (bh != head); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } } @@ -855,76 +884,6 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) nilfs_mdt_clear_dirty(nilfs->ns_dat); } -static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) -{ - struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - int err; - - /* XXX: this interface will be changed */ - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, - &raw_cp, &bh_cp); - if (likely(!err)) { - /* - * The following code is duplicated with cpfile. But, it is - * needed to collect the checkpoint even if it was not newly - * created. - */ - mark_buffer_dirty(bh_cp); - nilfs_mdt_mark_dirty(nilfs->ns_cpfile); - nilfs_cpfile_put_checkpoint( - nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - } else if (err == -EINVAL || err == -ENOENT) { - nilfs_error(sci->sc_super, - "checkpoint creation failed due to metadata corruption."); - err = -EIO; - } - return err; -} - -static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) -{ - struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - int err; - - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, - &raw_cp, &bh_cp); - if (unlikely(err)) { - if (err == -EINVAL || err == -ENOENT) { - nilfs_error(sci->sc_super, - "checkpoint finalization failed due to metadata corruption."); - err = -EIO; - } - goto failed_ibh; - } - raw_cp->cp_snapshot_list.ssl_next = 0; - raw_cp->cp_snapshot_list.ssl_prev = 0; - raw_cp->cp_inodes_count = - cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); - raw_cp->cp_blocks_count = - cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); - raw_cp->cp_nblk_inc = - cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); - raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); - raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); - - if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) - nilfs_checkpoint_clear_minor(raw_cp); - else - nilfs_checkpoint_set_minor(raw_cp); - - nilfs_write_inode_common(sci->sc_root->ifile, - &raw_cp->cp_ifile_inode, 1); - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - return 0; - - failed_ibh: - return err; -} - static void nilfs_fill_in_file_bmap(struct inode *ifile, struct nilfs_inode_info *ii) @@ -938,7 +897,7 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile, raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, ibh); nilfs_bmap_write(ii->i_bmap, raw_inode); - nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); + nilfs_ifile_unmap_inode(raw_inode); } } @@ -952,6 +911,33 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) } } +/** + * nilfs_write_root_mdt_inode - export root metadata inode information to + * the on-disk inode + * @inode: inode object of the root metadata file + * @raw_inode: on-disk inode + * + * nilfs_write_root_mdt_inode() writes inode information and bmap data of + * @inode to the inode area of the metadata file allocated on the super root + * block created to finalize the log. Since super root blocks are configured + * each time, this function zero-fills the unused area of @raw_inode. + */ +static void nilfs_write_root_mdt_inode(struct inode *inode, + struct nilfs_inode *raw_inode) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + nilfs_write_inode_common(inode, raw_inode); + + /* zero-fill unused portion of raw_inode */ + raw_inode->i_xattr = 0; + raw_inode->i_pad = 0; + memset((void *)raw_inode + sizeof(*raw_inode), 0, + nilfs->ns_inode_size - sizeof(*raw_inode)); + + nilfs_bmap_write(NILFS_I(inode)->i_bmap, raw_inode); +} + static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { @@ -960,23 +946,29 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, unsigned int isz, srsz; bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; + + lock_buffer(bh_sr); raw_sr = (struct nilfs_super_root *)bh_sr->b_data; isz = nilfs->ns_inode_size; srsz = NILFS_SR_BYTES(isz); + raw_sr->sr_sum = 0; /* Ensure initialization within this update */ raw_sr->sr_bytes = cpu_to_le16(srsz); raw_sr->sr_nongc_ctime = cpu_to_le64(nilfs_doing_gc() ? nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr + - NILFS_SR_DAT_OFFSET(isz), 1); - nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + - NILFS_SR_CPFILE_OFFSET(isz), 1); - nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + - NILFS_SR_SUFILE_OFFSET(isz), 1); + nilfs_write_root_mdt_inode(nilfs->ns_dat, (void *)raw_sr + + NILFS_SR_DAT_OFFSET(isz)); + nilfs_write_root_mdt_inode(nilfs->ns_cpfile, (void *)raw_sr + + NILFS_SR_CPFILE_OFFSET(isz)); + nilfs_write_root_mdt_inode(nilfs->ns_sufile, (void *)raw_sr + + NILFS_SR_SUFILE_OFFSET(isz)); + memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); + set_buffer_uptodate(bh_sr); + unlock_buffer(bh_sr); } static void nilfs_redirty_inodes(struct list_head *head) @@ -1114,12 +1106,65 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, return err; } +/** + * nilfs_free_segments - free the segments given by an array of segment numbers + * @nilfs: nilfs object + * @segnumv: array of segment numbers to be freed + * @nsegs: number of segments to be freed in @segnumv + * + * nilfs_free_segments() wraps nilfs_sufile_freev() and + * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file + * (sufile) to free all segments given by @segnumv and @nsegs at once. If + * it fails midway, it cancels the changes so that none of the segments are + * freed. If @nsegs is 0, this function does nothing. + * + * The freeing of segments is not finalized until the writing of a log with + * a super root block containing this sufile change is complete, and it can + * be canceled with nilfs_sufile_cancel_freev() until then. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, + size_t nsegs) +{ + size_t ndone; + int ret; + + if (!nsegs) + return 0; + + ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); + if (unlikely(ret)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, + NULL); + /* + * If a segment usage of the segments to be freed is in a + * hole block, nilfs_sufile_freev() will return -ENOENT. + * In this case, -EINVAL should be returned to the caller + * since there is something wrong with the given segment + * number array. This error can only occur during GC, so + * there is no need to worry about it propagating to other + * callers (such as fsync). + */ + if (ret == -ENOENT) { + nilfs_err(nilfs->ns_sb, + "The segment usage entry %llu to be freed is invalid (in a hole)", + (unsigned long long)segnumv[ndone]); + ret = -EINVAL; + } + } + return ret; +} + static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; - size_t ndone; int err = 0; switch (nilfs_sc_cstage_get(sci)) { @@ -1200,7 +1245,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) break; nilfs_sc_cstage_inc(sci); /* Creating a checkpoint */ - err = nilfs_segctor_create_checkpoint(sci); + err = nilfs_cpfile_create_checkpoint(nilfs->ns_cpfile, + nilfs->ns_cno); if (unlikely(err)) break; fallthrough; @@ -1212,14 +1258,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) nilfs_sc_cstage_inc(sci); fallthrough; case NILFS_ST_SUFILE: - err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, - sci->sc_nfreesegs, &ndone); - if (unlikely(err)) { - nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, ndone, - NULL); + err = nilfs_free_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (unlikely(err)) break; - } sci->sc_stage.flags |= NILFS_CF_SUFREED; err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, @@ -1278,6 +1320,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) * nilfs_segctor_begin_construction - setup segment buffer to make a new log * @sci: nilfs_sc_info * @nilfs: nilfs object + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) @@ -1531,6 +1575,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } + nilfs_segctor_zeropad_segsum(sci); nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); return 0; @@ -1581,7 +1626,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, nblocks = le32_to_cpu(finfo->fi_nblocks); ndatablk = le32_to_cpu(finfo->fi_ndatablk); - inode = bh->b_page->mapping->host; + inode = bh->b_folio->mapping->host; if (mode == SC_LSEG_DSYNC) sc_op = &nilfs_sc_dsync_ops; @@ -1634,68 +1679,95 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) return 0; } -static void nilfs_begin_page_io(struct page *page) +static void nilfs_begin_folio_io(struct folio *folio) { - if (!page || PageWriteback(page)) + if (!folio || folio_test_writeback(folio)) /* * For split b-tree node pages, this function may be called * twice. We ignore the 2nd or later calls by this check. */ return; - lock_page(page); - clear_page_dirty_for_io(page); - set_page_writeback(page); - unlock_page(page); + folio_lock(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; + struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh->b_page != bd_page) { - if (bd_page) { - lock_page(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); - } - bd_page = bh->b_page; + if (bh == segbuf->sb_super_root) + break; + set_buffer_async_write(bh); + if (bh->b_folio != fs_folio) { + nilfs_begin_folio_io(fs_folio); + fs_folio = bh->b_folio; } } + } + nilfs_begin_folio_io(fs_folio); - list_for_each_entry(bh, &segbuf->sb_payload_buffers, + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - set_buffer_async_write(bh); - if (bh == segbuf->sb_super_root) { - if (bh->b_page != bd_page) { - lock_page(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); - bd_page = bh->b_page; - } - break; - } - if (bh->b_page != fs_page) { - nilfs_begin_page_io(fs_page); - fs_page = bh->b_page; + mark_buffer_dirty(bh); + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); } + bd_folio = bh->b_folio; } } - if (bd_page) { - lock_page(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + mark_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); } - nilfs_begin_page_io(fs_page); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -1708,17 +1780,18 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, return ret; } -static void nilfs_end_page_io(struct page *page, int err) +static void nilfs_end_folio_io(struct folio *folio, int err) { - if (!page) + if (!folio) return; - if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { + if (buffer_nilfs_node(folio_buffers(folio)) && + !folio_test_writeback(folio)) { /* * For b-tree node pages, this function may be called twice * or more because they might be split in a segment. */ - if (PageDirty(page)) { + if (folio_test_dirty(folio)) { /* * For pages holding split b-tree node buffers, dirty * flag on the buffers may be cleared discretely. @@ -1726,30 +1799,24 @@ static void nilfs_end_page_io(struct page *page, int err) * remaining buffers, and it must be cancelled if * all the buffers get cleaned later. */ - lock_page(page); - if (nilfs_page_buffers_clean(page)) - __nilfs_clear_page_dirty(page); - unlock_page(page); + folio_lock(folio); + if (nilfs_folio_buffers_clean(folio)) + __nilfs_clear_folio_dirty(folio); + folio_unlock(folio); } return; } - if (!err) { - if (!nilfs_page_buffers_clean(page)) - __set_page_dirty_nobuffers(page); - ClearPageError(page); - } else { - __set_page_dirty_nobuffers(page); - SetPageError(page); - } + if (err || !nilfs_folio_buffers_clean(folio)) + filemap_dirty_folio(folio->mapping, folio); - end_page_writeback(page); + folio_end_writeback(folio); } static void nilfs_abort_logs(struct list_head *logs, int err) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; + struct folio *bd_folio = NULL, *fs_folio = NULL; struct buffer_head *bh; if (list_empty(logs)) @@ -1758,33 +1825,35 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - if (bh->b_page != bd_page) { - if (bd_page) - end_page_writeback(bd_page); - bd_page = bh->b_page; + clear_buffer_uptodate(bh); + if (bh->b_folio != bd_folio) { + if (bd_folio) + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { - if (bh->b_page != bd_page) { - end_page_writeback(bd_page); - bd_page = bh->b_page; + clear_buffer_uptodate(bh); + if (bh->b_folio != bd_folio) { + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } break; } - if (bh->b_page != fs_page) { - nilfs_end_page_io(fs_page, err); - fs_page = bh->b_page; + clear_buffer_async_write(bh); + if (bh->b_folio != fs_folio) { + nilfs_end_folio_io(fs_folio, err); + fs_folio = bh->b_folio; } } } - if (bd_page) - end_page_writeback(bd_page); + if (bd_folio) + folio_end_writeback(bd_folio); - nilfs_end_page_io(fs_page, err); + nilfs_end_folio_io(fs_folio, err); } static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, @@ -1798,6 +1867,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, nilfs_abort_logs(&logs, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); + if (list_empty(&logs)) + return; /* if the first segment buffer preparation failed */ + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); nilfs_free_incomplete_logs(&logs, nilfs); @@ -1826,7 +1898,7 @@ static void nilfs_set_next_segment(struct the_nilfs *nilfs, static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; + struct folio *bd_folio = NULL, *fs_folio = NULL; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int update_sr = false; @@ -1837,21 +1909,21 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); - if (bh->b_page != bd_page) { - if (bd_page) - end_page_writeback(bd_page); - bd_page = bh->b_page; + if (bh->b_folio != bd_folio) { + if (bd_folio) + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } } /* - * We assume that the buffers which belong to the same page + * We assume that the buffers which belong to the same folio * continue over the buffer list. - * Under this assumption, the last BHs of pages is - * identifiable by the discontinuity of bh->b_page - * (page != fs_page). + * Under this assumption, the last BHs of folios is + * identifiable by the discontinuity of bh->b_folio + * (folio != fs_folio). * * For B-tree node blocks, however, this assumption is not - * guaranteed. The cleanup code of B-tree node pages needs + * guaranteed. The cleanup code of B-tree node folios needs * special care. */ list_for_each_entry(bh, &segbuf->sb_payload_buffers, @@ -1862,18 +1934,20 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Redirected)); - set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh == segbuf->sb_super_root) { - if (bh->b_page != bd_page) { - end_page_writeback(bd_page); - bd_page = bh->b_page; + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } update_sr = true; break; } - if (bh->b_page != fs_page) { - nilfs_end_page_io(fs_page, 0); - fs_page = bh->b_page; + set_mask_bits(&bh->b_state, clear_bits, set_bits); + if (bh->b_folio != fs_folio) { + nilfs_end_folio_io(fs_folio, 0); + fs_folio = bh->b_folio; } } @@ -1887,13 +1961,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) } } /* - * Since pages may continue over multiple segment buffers, - * end of the last page must be checked outside of the loop. + * Since folios may continue over multiple segment buffers, + * end of the last folio must be checked outside of the loop. */ - if (bd_page) - end_page_writeback(bd_page); + if (bd_folio) + folio_end_writeback(bd_folio); - nilfs_end_page_io(fs_page, 0); + nilfs_end_folio_io(fs_folio, 0); nilfs_drop_collected_inodes(&sci->sc_dirty_files); @@ -2019,6 +2093,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int err; + if (sb_rdonly(sci->sc_super)) + return -EROFS; + nilfs_sc_cstage_set(sci, NILFS_ST_INIT); sci->sc_cno = nilfs->ns_cno; @@ -2037,7 +2114,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) err = nilfs_segctor_begin_construction(sci, nilfs); if (unlikely(err)) - goto out; + goto failed; /* Update time stamp */ sci->sc_seg_ctime = ktime_get_real_seconds(); @@ -2062,7 +2139,11 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) { - err = nilfs_segctor_fill_in_checkpoint(sci); + err = nilfs_cpfile_finalize_checkpoint( + nilfs->ns_cpfile, nilfs->ns_cno, sci->sc_root, + sci->sc_nblk_inc + sci->sc_nblk_this_inc, + sci->sc_seg_ctime, + !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)); if (unlikely(err)) goto failed_to_write; @@ -2071,10 +2152,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) @@ -2100,10 +2178,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) return err; failed_to_write: - if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_redirty_inodes(&sci->sc_dirty_files); - failed: + if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE) + nilfs_redirty_inodes(&sci->sc_dirty_files); if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); nilfs_segctor_abort_construction(sci, nilfs, err); @@ -2122,8 +2199,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) { spin_lock(&sci->sc_state_lock); if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { - sci->sc_timer.expires = jiffies + sci->sc_interval; - add_timer(&sci->sc_timer); + if (sci->sc_task) { + sci->sc_timer.expires = jiffies + sci->sc_interval; + add_timer(&sci->sc_timer); + } sci->sc_state |= NILFS_SEGCTOR_COMMIT; } spin_unlock(&sci->sc_state_lock); @@ -2142,22 +2221,6 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) spin_unlock(&sci->sc_state_lock); } -/** - * nilfs_flush_segment - trigger a segment construction for resource control - * @sb: super block - * @ino: inode number of the file to be flushed out. - */ -void nilfs_flush_segment(struct super_block *sb, ino_t ino) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - struct nilfs_sc_info *sci = nilfs->ns_writer; - - if (!sci || nilfs_doing_construction()) - return; - nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); - /* assign bit 0 to data files */ -} - struct nilfs_segctor_wait_request { wait_queue_entry_t wq; __u32 seq; @@ -2170,19 +2233,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) struct nilfs_segctor_wait_request wait_req; int err = 0; - spin_lock(&sci->sc_state_lock); init_wait(&wait_req.wq); wait_req.err = 0; atomic_set(&wait_req.done, 0); + init_waitqueue_entry(&wait_req.wq, current); + + /* + * To prevent a race issue where completion notifications from the + * log writer thread are missed, increment the request sequence count + * "sc_seq_request" and insert a wait queue entry using the current + * sequence number into the "sc_wait_request" queue at the same time + * within the lock section of "sc_state_lock". + */ + spin_lock(&sci->sc_state_lock); wait_req.seq = ++sci->sc_seq_request; + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); spin_unlock(&sci->sc_state_lock); - init_waitqueue_entry(&wait_req.wq, current); - add_wait_queue(&sci->sc_wait_request, &wait_req.wq); - set_current_state(TASK_INTERRUPTIBLE); wake_up(&sci->sc_wait_daemon); for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Synchronize only while the log writer thread is alive. + * Leave flushing out after the log writer thread exits to + * the cleanup work in nilfs_segctor_destroy(). + */ + if (!sci->sc_task) + break; + if (atomic_read(&wait_req.done)) { err = wait_req.err; break; @@ -2198,7 +2278,7 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) return err; } -static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force) { struct nilfs_segctor_wait_request *wrq, *n; unsigned long flags; @@ -2206,7 +2286,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) spin_lock_irqsave(&sci->sc_wait_request.lock, flags); list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && - nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) { wrq->err = err; atomic_set(&wrq->done, 1); } @@ -2223,18 +2303,13 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) * nilfs_construct_segment - construct a logical segment * @sb: super block * - * Return Value: On success, 0 is returned. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ int nilfs_construct_segment(struct super_block *sb) { @@ -2258,18 +2333,13 @@ int nilfs_construct_segment(struct super_block *sb) * @start: start byte offset * @end: end byte offset (inclusive) * - * Return Value: On success, 0 is returned. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, loff_t start, loff_t end) @@ -2324,10 +2394,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, */ static void nilfs_segctor_accept(struct nilfs_sc_info *sci) { + bool thread_is_alive; + spin_lock(&sci->sc_state_lock); sci->sc_seq_accepted = sci->sc_seq_request; + thread_is_alive = (bool)sci->sc_task; spin_unlock(&sci->sc_state_lock); - del_timer_sync(&sci->sc_timer); + + /* + * This function does not race with the log writer thread's + * termination. Therefore, deleting sc_timer, which should not be + * done after the log writer thread exits, can be done safely outside + * the area protected by sc_state_lock. + */ + if (thread_is_alive) + timer_delete_sync(&sci->sc_timer); } /** @@ -2344,7 +2425,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) if (mode == SC_LSEG_SR) { sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; sci->sc_seq_done = sci->sc_seq_accepted; - nilfs_segctor_wakeup(sci, err); + nilfs_segctor_wakeup(sci, err, false); sci->sc_flush_request = 0; } else { if (mode == SC_FLUSH_FILE) @@ -2353,7 +2434,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) sci->sc_flush_request &= ~FLUSH_DAT_BIT; /* re-enable timer if checkpoint creation was not done */ - if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task && time_before(jiffies, sci->sc_timer.expires)) add_timer(&sci->sc_timer); } @@ -2364,6 +2445,8 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) * nilfs_segctor_construct - form logs and write them to disk * @sci: segment constructor object * @mode: mode of log forming + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) { @@ -2402,9 +2485,9 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) static void nilfs_construction_timeout(struct timer_list *t) { - struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); + struct nilfs_sc_info *sci = timer_container_of(sci, t, sc_timer); - wake_up_process(sci->sc_timer_task); + wake_up_process(sci->sc_task); } static void @@ -2530,121 +2613,85 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) } /** - * nilfs_segctor_thread - main loop of the segment constructor thread. + * nilfs_log_write_required - determine whether log writing is required + * @sci: nilfs_sc_info struct + * @modep: location for storing log writing mode + * + * Return: true if log writing is required, false otherwise. If log writing + * is required, the mode is stored in the location pointed to by @modep. + */ +static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) +{ + bool timedout, ret = true; + + spin_lock(&sci->sc_state_lock); + timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer.expires)); + if (timedout || sci->sc_seq_request != sci->sc_seq_done) + *modep = SC_LSEG_SR; + else if (sci->sc_flush_request) + *modep = nilfs_segctor_flush_mode(sci); + else + ret = false; + + spin_unlock(&sci->sc_state_lock); + return ret; +} + +/** + * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. * - * nilfs_segctor_thread() initializes a timer and serves as a daemon - * to execute segment constructions. + * nilfs_segctor_thread() is the main loop function of the log writer kernel + * thread, which determines whether log writing is necessary, and if so, + * performs the log write in the background, or waits if not. It is also + * used to decide the background writeback of the superblock. + * + * Return: Always 0. */ static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; - sci->sc_timer_task = current; - - /* start sync. */ - sci->sc_task = current; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); - spin_lock(&sci->sc_state_lock); - loop: - for (;;) { - int mode; + set_freezable(); - if (sci->sc_state & NILFS_SEGCTOR_QUIT) - goto end_thread; - - if (timeout || sci->sc_seq_request != sci->sc_seq_done) - mode = SC_LSEG_SR; - else if (sci->sc_flush_request) - mode = nilfs_segctor_flush_mode(sci); - else - break; - - spin_unlock(&sci->sc_state_lock); - nilfs_segctor_thread_construct(sci, mode); - spin_lock(&sci->sc_state_lock); - timeout = 0; - } - - - if (freezing(current)) { - spin_unlock(&sci->sc_state_lock); - try_to_freeze(); - spin_lock(&sci->sc_state_lock); - } else { + while (!kthread_should_stop()) { DEFINE_WAIT(wait); - int should_sleep = 1; + bool should_write; + int mode; + + if (freezing(current)) { + try_to_freeze(); + continue; + } prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE); - - if (sci->sc_seq_request != sci->sc_seq_done) - should_sleep = 0; - else if (sci->sc_flush_request) - should_sleep = 0; - else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) - should_sleep = time_before(jiffies, - sci->sc_timer.expires); - - if (should_sleep) { - spin_unlock(&sci->sc_state_lock); + should_write = nilfs_log_write_required(sci, &mode); + if (!should_write) schedule(); - spin_lock(&sci->sc_state_lock); - } finish_wait(&sci->sc_wait_daemon, &wait); - timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer.expires)); if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); - } - goto loop; - end_thread: - spin_unlock(&sci->sc_state_lock); + if (should_write) + nilfs_segctor_thread_construct(sci, mode); + } /* end sync. */ + spin_lock(&sci->sc_state_lock); sci->sc_task = NULL; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ - return 0; -} - -static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) -{ - struct task_struct *t; - - t = kthread_run(nilfs_segctor_thread, sci, "segctord"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); - - nilfs_err(sci->sc_super, "error %d creating segctord thread", - err); - return err; - } - wait_event(sci->sc_wait_task, sci->sc_task != NULL); + timer_shutdown_sync(&sci->sc_timer); + spin_unlock(&sci->sc_state_lock); return 0; } -static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) - __acquires(&sci->sc_state_lock) - __releases(&sci->sc_state_lock) -{ - sci->sc_state |= NILFS_SEGCTOR_QUIT; - - while (sci->sc_task) { - wake_up(&sci->sc_wait_daemon); - spin_unlock(&sci->sc_state_lock); - wait_event(sci->sc_wait_task, sci->sc_task == NULL); - spin_lock(&sci->sc_state_lock); - } -} - /* * Setup & clean-up functions */ @@ -2665,7 +2712,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); - init_waitqueue_head(&sci->sc_wait_task); spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); @@ -2673,7 +2719,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_iput_queue); INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; @@ -2703,7 +2748,7 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) flush_work(&sci->sc_iput_work); - } while (ret && retrycount-- > 0); + } while (ret && ret != -EROFS && retrycount-- > 0); } /** @@ -2721,12 +2766,28 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) up_write(&nilfs->ns_segctor_sem); + if (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + if (kthread_stop(sci->sc_task)) { + spin_lock(&sci->sc_state_lock); + sci->sc_task = NULL; + timer_shutdown_sync(&sci->sc_timer); + spin_unlock(&sci->sc_state_lock); + } + } + spin_lock(&sci->sc_state_lock); - nilfs_segctor_kill_thread(sci); flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + /* + * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can + * be called from delayed iput() via nilfs_evict_inode() and can race + * with the above log writer thread termination. + */ + nilfs_segctor_wakeup(sci, 0, true); + if (flush_work(&sci->sc_iput_work)) flag = true; @@ -2752,7 +2813,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) down_write(&nilfs->ns_segctor_sem); - timer_shutdown_sync(&sci->sc_timer); kfree(sci); } @@ -2764,14 +2824,16 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * This allocates a log writer object, initializes it, and starts the * log writer. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINTR - Log writer thread creation failed due to interruption. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci; + struct task_struct *t; int err; if (nilfs->ns_writer) { @@ -2784,17 +2846,23 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) return 0; } - nilfs->ns_writer = nilfs_segctor_new(sb, root); - if (!nilfs->ns_writer) + sci = nilfs_segctor_new(sb, root); + if (unlikely(!sci)) return -ENOMEM; - inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); - - err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (unlikely(err)) + nilfs->ns_writer = sci; + t = kthread_create(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + err = PTR_ERR(t); + nilfs_err(sb, "error %d creating segctord thread", err); nilfs_detach_log_writer(sb); + return err; + } + sci->sc_task = t; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - return err; + wake_up_process(sci->sc_task); + return 0; } /** @@ -2814,6 +2882,7 @@ void nilfs_detach_log_writer(struct super_block *sb) nilfs_segctor_destroy(nilfs->ns_writer); nilfs->ns_writer = NULL; } + set_nilfs_purging(nilfs); /* Force to free the list of dirty files */ spin_lock(&nilfs->ns_inode_lock); @@ -2826,4 +2895,5 @@ void nilfs_detach_log_writer(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); nilfs_dispose_list(nilfs, &garbage_list, 1); + clear_nilfs_purging(nilfs); } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 1060f72ebf5a..4b39ed43ae72 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -22,10 +22,10 @@ struct nilfs_root; * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root - * @ri_ri_cno: Number of the last checkpoint + * @ri_cno: Number of the last checkpoint * @ri_lsegs_start: Region for roll-forwarding (start block number) * @ri_lsegs_end: Region for roll-forwarding (end block number) - * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start * @ri_used_segments: List of segments to be mark active * @ri_pseg_start: Block number of the last partial segment * @ri_seq: Sequence number on the last partial segment @@ -105,9 +105,8 @@ struct nilfs_segsum_pointer { * @sc_flush_request: inode bitmap of metadata files to be flushed * @sc_wait_request: Client request queue * @sc_wait_daemon: Daemon wait queue - * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter - * @sc_seq_accept: Accepted request count + * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -158,7 +157,6 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_request; wait_queue_head_t sc_wait_daemon; - wait_queue_head_t sc_wait_task; __u32 sc_seq_request; __u32 sc_seq_accepted; @@ -171,7 +169,6 @@ struct nilfs_sc_info { unsigned long sc_watermark; struct timer_list sc_timer; - struct task_struct *sc_timer_task; struct task_struct *sc_task; }; @@ -192,7 +189,6 @@ enum { }; /* sc_state */ -#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ /* @@ -230,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); -extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index dc359b56fdfa..83f93337c01b 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -48,7 +48,7 @@ nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; - do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); + t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile)); return (unsigned long)t; } @@ -70,19 +70,35 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, max - curr + 1); } -static struct nilfs_segment_usage * -nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, - struct buffer_head *bh, void *kaddr) +/** + * nilfs_sufile_segment_usage_offset - calculate the byte offset of a segment + * usage entry in the folio containing it + * @sufile: segment usage file inode + * @segnum: number of segment usage + * @bh: buffer head of block containing segment usage indexed by @segnum + * + * Return: Byte offset in the folio of the segment usage entry. + */ +static size_t nilfs_sufile_segment_usage_offset(const struct inode *sufile, + __u64 segnum, + struct buffer_head *bh) { - return kaddr + bh_offset(bh) + + return offset_in_folio(bh->b_folio, bh->b_data) + nilfs_sufile_get_offset(sufile, segnum) * NILFS_MDT(sufile)->mi_entry_size; } -static inline int nilfs_sufile_get_header_block(struct inode *sufile, - struct buffer_head **bhp) +static int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(sufile->i_sb, + "missing header block in segment usage metadata"); + err = -EIO; + } + return err; } static inline int @@ -105,13 +121,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { struct nilfs_sufile_header *header; - void *kaddr; - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); } @@ -119,6 +133,8 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, /** * nilfs_sufile_get_ncleansegs - return the number of clean segments * @sufile: inode of segment usage file + * + * Return: Number of clean segments. */ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) { @@ -141,17 +157,13 @@ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) * of successfully modified segments from the head is stored in the * place @ndone points to. * - * Return Value: On success, zero is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - Given segment usage is in hole block (may be returned if - * @create is zero) - * - * %-EINVAL - Invalid segment usage number + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment usage number + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Given segment usage is in hole block (may be returned if + * @create is zero) + * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, int create, size_t *ndone, @@ -258,10 +270,7 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, * @start: minimum segment number of allocatable region (inclusive) * @end: maximum segment number of allocatable region (inclusive) * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-ERANGE - invalid segment region + * Return: 0 on success, or %-ERANGE if segment range is invalid. */ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) { @@ -286,17 +295,14 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) * @sufile: inode of segment usage file * @segnump: pointer to segment number * - * Description: nilfs_sufile_alloc() allocates a clean segment. + * Description: nilfs_sufile_alloc() allocates a clean segment, and stores + * its segment number in the place pointed to by @segnump. * - * Return Value: On success, 0 is returned and the segment number of the - * allocated segment is stored in the place pointed by @segnump. On error, one - * of the following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOSPC - No clean segment left. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No clean segment left. */ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) { @@ -306,6 +312,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; + size_t offset; void *kaddr; unsigned long nsegments, nsus, cnt; int ret, j; @@ -315,10 +322,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); last_alloc = le64_to_cpu(header->sh_last_alloc); - kunmap_atomic(kaddr); + kunmap_local(header); nsegments = nilfs_sufile_get_nsegments(sufile); maxsegnum = sui->allocmax; @@ -352,9 +358,10 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) &su_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); nsus = nilfs_sufile_segment_usages_in_block( sufile, segnum, maxsegnum); @@ -363,14 +370,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); - kunmap_atomic(kaddr); + kunmap_local(header); sui->ncleansegs--; mark_buffer_dirty(header_bh); @@ -384,7 +390,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) goto out_header; } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(su_bh); } @@ -404,18 +410,18 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (unlikely(!nilfs_segment_usage_clean(su))) { nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr); + kunmap_local(su); return; } nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); + kunmap_local(su); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; @@ -429,14 +435,14 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int clean, dirty; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) && su->su_nblocks == cpu_to_le32(0)) { - kunmap_atomic(kaddr); + kunmap_local(su); return; } clean = nilfs_segment_usage_clean(su); @@ -446,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)); - kunmap_atomic(kaddr); + kunmap_local(su); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; @@ -460,23 +466,28 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int sudirty; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_clean(su)) { nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr); + kunmap_local(su); return; } - WARN_ON(nilfs_segment_usage_error(su)); - WARN_ON(!nilfs_segment_usage_dirty(su)); + if (unlikely(nilfs_segment_usage_error(su))) + nilfs_warn(sufile->i_sb, "free segment %llu marked in error", + (unsigned long long)segnum); sudirty = nilfs_segment_usage_dirty(su); + if (unlikely(!sudirty)) + nilfs_warn(sufile->i_sb, "free unallocated segment %llu", + (unsigned long long)segnum); + nilfs_segment_usage_set_clean(su); - kunmap_atomic(kaddr); + kunmap_local(su); mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); @@ -491,25 +502,57 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; - void *kaddr; + size_t offset; struct nilfs_segment_usage *su; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (!ret) { + if (unlikely(ret)) { + if (ret == -ENOENT) { + nilfs_error(sufile->i_sb, + "segment usage for segment %llu is unreadable due to a hole block", + (unsigned long long)segnum); + ret = -EIO; + } + goto out_sem; + } + + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); + if (unlikely(nilfs_segment_usage_error(su))) { + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + + kunmap_local(su); + brelse(bh); + if (nilfs_segment_is_active(nilfs, segnum)) { + nilfs_error(sufile->i_sb, + "active segment %llu is erroneous", + (unsigned long long)segnum); + } else { + /* + * Segments marked erroneous are never allocated by + * nilfs_sufile_alloc(); only active segments, ie, + * the segments indexed by ns_segnum or ns_nextnum, + * can be erroneous here. + */ + WARN_ON_ONCE(1); + } + ret = -EIO; + } else { + nilfs_segment_usage_set_dirty(su); + kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); - kaddr = kmap_atomic(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); brelse(bh); } +out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } @@ -520,13 +563,15 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) * @segnum: segment number * @nblocks: number of live blocks in the segment * @modtime: modification time (option) + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, unsigned long nblocks, time64_t modtime) { struct buffer_head *bh; struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); @@ -534,13 +579,18 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - WARN_ON(nilfs_segment_usage_error(su)); - if (modtime) + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); + if (modtime) { + /* + * Check segusage error and set su_lastmod only when updating + * this entry with a valid timestamp, not for cancellation. + */ + WARN_ON_ONCE(nilfs_segment_usage_error(su)); su->su_lastmod = cpu_to_le64(modtime); + } su->su_nblocks = cpu_to_le32(nblocks); - kunmap_atomic(kaddr); + kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); @@ -556,23 +606,19 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, * @sufile: inode of segment usage file * @sustat: pointer to a structure of segment usage statistics * - * Description: nilfs_sufile_get_stat() returns information about segment - * usage. - * - * Return Value: On success, 0 is returned, and segment usage information is - * stored in the place pointed by @sustat. On error, one of the following - * negative error codes is returned. + * Description: nilfs_sufile_get_stat() retrieves segment usage statistics + * and stores them in the location pointed to by @sustat. * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; - void *kaddr; int ret; down_read(&NILFS_MDT(sufile)->mi_sem); @@ -581,8 +627,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); @@ -591,7 +636,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(header_bh); out_sem: @@ -604,18 +649,18 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int suclean; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_error(su)) { - kunmap_atomic(kaddr); + kunmap_local(su); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); - kunmap_atomic(kaddr); + kunmap_local(su); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); @@ -631,16 +676,12 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, * @start: start segment number (inclusive) * @end: end segment number (inclusive) * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid number of segments specified - * - * %-EBUSY - Dirty or active segments are present in the range + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Dirty or active segments are present in the range. + * * %-EINVAL - Invalid number of segments specified. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_sufile_truncate_range(struct inode *sufile, __u64 start, __u64 end) @@ -653,7 +694,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, unsigned long segusages_per_block; unsigned long nsegs, ncleaned; __u64 segnum; - void *kaddr; + size_t offset; ssize_t n, nc; int ret; int j; @@ -684,16 +725,16 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, /* hole */ continue; } - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); su2 = su; for (j = 0; j < n; j++, su = (void *)su + susz) { if ((le32_to_cpu(su->su_flags) & ~BIT(NILFS_SEGMENT_USAGE_ERROR)) || nilfs_segment_is_active(nilfs, segnum + j)) { ret = -EBUSY; - kunmap_atomic(kaddr); + kunmap_local(su2); brelse(su_bh); goto out_header; } @@ -705,7 +746,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, nc++; } } - kunmap_atomic(kaddr); + kunmap_local(su2); if (nc > 0) { mark_buffer_dirty(su_bh); ncleaned += nc; @@ -735,16 +776,12 @@ out: * @sufile: inode of segment usage file * @newnsegs: new number of segments * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOSPC - Enough free space is not left for shrinking - * - * %-EBUSY - Dirty or active segments exist in the region to be truncated + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Dirty or active segments exist in the region to be truncated. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - Enough free space is not left for shrinking. */ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) { @@ -752,7 +789,6 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct nilfs_sufile_info *sui = NILFS_SUI(sufile); - void *kaddr; unsigned long nsegs, nrsvsegs; int ret = 0; @@ -779,12 +815,20 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) goto out_header; sui->ncleansegs -= nsegs - newnsegs; + + /* + * If the sufile is successfully truncated, immediately adjust + * the segment allocation space while locking the semaphore + * "mi_sem" so that nilfs_sufile_alloc() never allocates + * segments in the truncated space. + */ + sui->allocmax = newnsegs - 1; + sui->allocmin = 0; } - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); @@ -798,21 +842,17 @@ out: } /** - * nilfs_sufile_get_suinfo - + * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @buf: array of suinfo - * @sisz: byte size of suinfo - * @nsi: size of suinfo array + * @buf: array of suinfo + * @sisz: byte size of suinfo + * @nsi: size of suinfo array * - * Description: - * - * Return Value: On success, 0 is returned and .... On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: Count of segment usage info items stored in the output buffer on + * success, or one of the following negative error codes on failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) @@ -822,6 +862,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + size_t offset; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -849,9 +890,9 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, continue; } - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); for (j = 0; j < n; j++, su = (void *)su + susz, si = (void *)si + sisz) { si->sui_lastmod = le64_to_cpu(su->su_lastmod); @@ -862,7 +903,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, si->sui_flags |= BIT(NILFS_SEGMENT_USAGE_ACTIVE); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(su_bh); } ret = nsegs; @@ -883,14 +924,11 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, * segment usage accordingly. Only the fields indicated by the sup_flags * are updated. * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid values in input (segment number, flags or nblocks). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, unsigned int supsz, size_t nsup) @@ -899,7 +937,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, struct buffer_head *header_bh, *bh; struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; unsigned long blkoff, prev_blkoff; int cleansi, cleansu, dirtysi, dirtysu; long ncleaned = 0, ndirtied = 0; @@ -931,9 +969,9 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, goto out_header; for (;;) { - kaddr = kmap_atomic(bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, sup->sup_segnum, bh, kaddr); + offset = nilfs_sufile_segment_usage_offset( + sufile, sup->sup_segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); if (nilfs_suinfo_update_lastmod(sup)) su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); @@ -968,7 +1006,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); } - kunmap_atomic(kaddr); + kunmap_local(su); sup = (void *)sup + supsz; if (sup >= supend) @@ -1017,13 +1055,14 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, * and start+len is rounded down. For each clean segment blkdev_issue_discard * function is invoked. * - * Return Value: On success, 0 is returned or negative error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *su_bh; struct nilfs_segment_usage *su; + size_t offset; void *kaddr; size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; sector_t seg_start, seg_end, start_block, end_block; @@ -1073,9 +1112,9 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) continue; } - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, - su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { if (!nilfs_segment_usage_clean(su)) continue; @@ -1103,7 +1142,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) } if (nblocks >= minlen) { - kunmap_atomic(kaddr); + kunmap_local(kaddr); ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, @@ -1115,16 +1154,17 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) } ndiscarded += nblocks; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset( + sufile, segnum, su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, + offset); } /* start new extent */ start = seg_start; nblocks = seg_end - seg_start + 1; } - kunmap_atomic(kaddr); + kunmap_local(kaddr); put_bh(su_bh); } @@ -1161,6 +1201,8 @@ out_sem: * @susize: size of a segment usage entry * @raw_inode: on-disk sufile inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep) @@ -1169,7 +1211,6 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_sufile_info *sui; struct buffer_head *header_bh; struct nilfs_sufile_header *header; - void *kaddr; int err; if (susize > sb->s_blocksize) { @@ -1185,7 +1226,7 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); if (unlikely(!sufile)) return -ENOMEM; - if (!(sufile->i_state & I_NEW)) + if (!(inode_state_read_once(sufile) & I_NEW)) goto out; err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); @@ -1199,15 +1240,20 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, if (err) goto failed; - err = nilfs_sufile_get_header_block(sufile, &header_bh); - if (err) + err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "missing header block in segment usage metadata"); + err = -EINVAL; + } goto failed; + } sui = NILFS_SUI(sufile); - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(header_bh); sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 8e8a1a5a0402..cd6f28ab3521 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -58,6 +58,8 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range); * nilfs_sufile_scrap - make a segment garbage * @sufile: inode of segment usage file * @segnum: segment number to be freed + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) { @@ -68,6 +70,8 @@ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) * nilfs_sufile_free - free segment * @sufile: inode of segment usage file * @segnum: segment number to be freed + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) { @@ -80,6 +84,8 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) * @segnumv: array of segment numbers * @nsegs: size of @segnumv array * @ndone: place to store the number of freed segments + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, size_t nsegs, size_t *ndone) @@ -95,8 +101,7 @@ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, * @nsegs: size of @segnumv array * @ndone: place to store the number of cancelled segments * - * Return Value: On success, 0 is returned. On error, a negative error codes - * is returned. + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_cancel_freev(struct inode *sufile, __u64 *segnumv, size_t nsegs, @@ -114,14 +119,11 @@ static inline int nilfs_sufile_cancel_freev(struct inode *sufile, * Description: nilfs_sufile_set_error() marks the segment specified by * @segnum as erroneous. The error segment will never be used again. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment usage number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) { diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6edb6e0dd61f..badc2cbc895e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -29,12 +29,13 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/parser.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include "nilfs.h" #include "export.h" #include "mdt.h" @@ -60,7 +61,6 @@ struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); -static int nilfs_remount(struct super_block *sb, int *flags, char *data); void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { @@ -105,6 +105,10 @@ static void nilfs_set_error(struct super_block *sb) /** * __nilfs_error() - report failure condition on a filesystem + * @sb: super block instance + * @function: name of calling function + * @fmt: format string for message to be output + * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when @@ -156,6 +160,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; @@ -304,6 +309,8 @@ int nilfs_commit_super(struct super_block *sb, int flag) * This function restores state flags in the on-disk super block. * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the * filesystem was not clean previously. + * + * Return: 0 on success, %-EIO if I/O error or superblock is corrupted. */ int nilfs_cleanup_super(struct super_block *sb) { @@ -334,6 +341,8 @@ int nilfs_cleanup_super(struct super_block *sb) * nilfs_move_2nd_super - relocate secondary super block * @sb: super block instance * @sb2off: new offset of the secondary super block (in bytes) + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) { @@ -372,10 +381,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) goto out; } nsbp = (void *)nsbh->b_data + offset; - memset(nsbp, 0, nilfs->ns_blocksize); + lock_buffer(nsbh); if (sb2i >= 0) { + /* + * The position of the second superblock only changes by 4KiB, + * which is larger than the maximum superblock data size + * (= 1KiB), so there is no need to use memmove() to allow + * overlap between source and destination. + */ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); + + /* + * Zero fill after copy to avoid overwriting in case of move + * within the same block. + */ + memset(nsbh->b_data, 0, offset); + memset((void *)nsbp + nilfs->ns_sbsize, 0, + nsbh->b_size - offset - nilfs->ns_sbsize); + } else { + memset(nsbh->b_data, 0, nsbh->b_size); + } + set_buffer_uptodate(nsbh); + unlock_buffer(nsbh); + + if (sb2i >= 0) { brelse(nilfs->ns_sbh[sb2i]); nilfs->ns_sbh[sb2i] = nsbh; nilfs->ns_sbp[sb2i] = nsbp; @@ -394,6 +424,8 @@ out: * nilfs_resize_fs - resize the filesystem * @sb: super block instance * @newsize: new size of the filesystem (in bytes) + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) { @@ -409,6 +441,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) goto out; /* + * Prevent underflow in second superblock position calculation. + * The exact minimum size check is done in nilfs_sufile_resize(). + */ + if (newsize < 4096) { + ret = -ENOSPC; + goto out; + } + + /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. @@ -417,7 +458,7 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) sb2off = NILFS_SB2_OFFSET_BYTES(newsize); newnsegs = sb2off >> nilfs->ns_blocksize_bits; - do_div(newnsegs, nilfs->ns_blocks_per_segment); + newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment); ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); up_write(&nilfs->ns_segctor_sem); @@ -473,6 +514,7 @@ static void nilfs_put_super(struct super_block *sb) up_write(&nilfs->ns_sem); } + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); @@ -512,8 +554,6 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; - struct nilfs_checkpoint *raw_cp; - struct buffer_head *bh_cp; int err = -ENOMEM; root = nilfs_find_or_create_root( @@ -525,38 +565,19 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, - &bh_cp); + err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size); up_read(&nilfs->ns_segctor_sem); - if (unlikely(err)) { - if (err == -ENOENT || err == -EINVAL) { - nilfs_err(sb, - "Invalid checkpoint (checkpoint number=%llu)", - (unsigned long long)cno); - err = -EINVAL; - } + if (unlikely(err)) goto failed; - } - - err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size, - &raw_cp->cp_ifile_inode, &root->ifile); - if (err) - goto failed_bh; - - atomic64_set(&root->inodes_count, - le64_to_cpu(raw_cp->cp_inodes_count)); - atomic64_set(&root->blocks_count, - le64_to_cpu(raw_cp->cp_blocks_count)); - - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); reuse: *rootp = root; return 0; - failed_bh: - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); failed: + if (err == -EINVAL) + nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", + (unsigned long long)cno); nilfs_put_root(root); return err; @@ -691,105 +712,98 @@ static const struct super_operations nilfs_sops = { .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, - .remount_fs = nilfs_remount, .show_options = nilfs_show_options }; enum { - Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, - Opt_discard, Opt_nodiscard, Opt_err, + Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery, + Opt_discard, }; -static match_table_t tokens = { - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_snapshot, "cp=%u"}, - {Opt_order, "order=%s"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_err, NULL} +static const struct constant_table nilfs_param_err[] = { + {"continue", NILFS_MOUNT_ERRORS_CONT}, + {"panic", NILFS_MOUNT_ERRORS_PANIC}, + {"remount-ro", NILFS_MOUNT_ERRORS_RO}, + {} }; -static int parse_options(char *options, struct super_block *sb, int is_remount) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - char *p; - substring_t args[MAX_OPT_ARGS]; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; +static const struct fs_parameter_spec nilfs_param_spec[] = { + fsparam_enum ("errors", Opt_err, nilfs_param_err), + fsparam_flag_no ("barrier", Opt_barrier), + fsparam_u64 ("cp", Opt_snapshot), + fsparam_string ("order", Opt_order), + fsparam_flag ("norecovery", Opt_norecovery), + fsparam_flag_no ("discard", Opt_discard), + {} +}; - if (!*p) - continue; +struct nilfs_fs_context { + unsigned long ns_mount_opt; + __u64 cno; +}; - token = match_token(p, tokens, args); - switch (token) { - case Opt_barrier: - nilfs_set_opt(nilfs, BARRIER); - break; - case Opt_nobarrier: +static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct nilfs_fs_context *nilfs = fc->fs_private; + int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, nilfs_param_spec, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_barrier: + if (result.negated) nilfs_clear_opt(nilfs, BARRIER); - break; - case Opt_order: - if (strcmp(args[0].from, "relaxed") == 0) - /* Ordered data semantics */ - nilfs_clear_opt(nilfs, STRICT_ORDER); - else if (strcmp(args[0].from, "strict") == 0) - /* Strict in-order semantics */ - nilfs_set_opt(nilfs, STRICT_ORDER); - else - return 0; - break; - case Opt_err_panic: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC); - break; - case Opt_err_ro: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO); - break; - case Opt_err_cont: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT); - break; - case Opt_snapshot: - if (is_remount) { - nilfs_err(sb, - "\"%s\" option is invalid for remount", - p); - return 0; - } - break; - case Opt_norecovery: - nilfs_set_opt(nilfs, NORECOVERY); - break; - case Opt_discard: - nilfs_set_opt(nilfs, DISCARD); - break; - case Opt_nodiscard: - nilfs_clear_opt(nilfs, DISCARD); - break; - default: - nilfs_err(sb, "unrecognized mount option \"%s\"", p); - return 0; + else + nilfs_set_opt(nilfs, BARRIER); + break; + case Opt_order: + if (strcmp(param->string, "relaxed") == 0) + /* Ordered data semantics */ + nilfs_clear_opt(nilfs, STRICT_ORDER); + else if (strcmp(param->string, "strict") == 0) + /* Strict in-order semantics */ + nilfs_set_opt(nilfs, STRICT_ORDER); + else + return -EINVAL; + break; + case Opt_err: + nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE; + nilfs->ns_mount_opt |= result.uint_32; + break; + case Opt_snapshot: + if (is_remount) { + struct super_block *sb = fc->root->d_sb; + + nilfs_err(sb, + "\"%s\" option is invalid for remount", + param->key); + return -EINVAL; } + if (result.uint_64 == 0) { + nilfs_err(NULL, + "invalid option \"cp=0\": invalid checkpoint number 0"); + return -EINVAL; + } + nilfs->cno = result.uint_64; + break; + case Opt_norecovery: + nilfs_set_opt(nilfs, NORECOVERY); + break; + case Opt_discard: + if (result.negated) + nilfs_clear_opt(nilfs, DISCARD); + else + nilfs_set_opt(nilfs, DISCARD); + break; + default: + return -EINVAL; } - return 1; -} -static inline void -nilfs_set_default_options(struct super_block *sb, - struct nilfs_super_block *sbp) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - - nilfs->ns_mount_opt = - NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + return 0; } static int nilfs_setup_super(struct super_block *sb, int is_mount) @@ -846,9 +860,8 @@ struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } -int nilfs_store_magic_and_option(struct super_block *sb, - struct nilfs_super_block *sbp, - char *data) +int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp) { struct the_nilfs *nilfs = sb->s_fs_info; @@ -859,14 +872,12 @@ int nilfs_store_magic_and_option(struct super_block *sb, sb->s_flags |= SB_NOATIME; #endif - nilfs_set_default_options(sb, sbp); - nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); - return !parse_options(data, sb, 0) ? -EINVAL : 0; + return 0; } int nilfs_check_feature_compatibility(struct super_block *sb, @@ -982,7 +993,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint * @root_dentry: root dentry of the tree to be shrunk * - * This function returns true if the tree was in-use. + * Return: true if the tree was in-use, false otherwise. */ static bool nilfs_tree_is_busy(struct dentry *root_dentry) { @@ -1024,17 +1035,19 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block - * @data: mount options - * @silent: silent mode flag + * @fc: filesystem context * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. + * + * Return: 0 on success, or a negative error code on failure. */ static int -nilfs_fill_super(struct super_block *sb, void *data, int silent) +nilfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct the_nilfs *nilfs; struct nilfs_root *fsroot; + struct nilfs_fs_context *ctx = fc->fs_private; __u64 cno; int err; @@ -1044,10 +1057,13 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = nilfs; - err = init_nilfs(nilfs, sb, (char *)data); + err = init_nilfs(nilfs, sb); if (err) goto failed_nilfs; + /* Copy in parsed mount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; + sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; @@ -1060,6 +1076,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto failed_nilfs; + super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, + sizeof(nilfs->ns_sbp[0]->s_uuid)); + super_set_sysfs_name_bdev(sb); + cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { @@ -1096,6 +1116,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) nilfs_put_root(fsroot); failed_unload: + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); @@ -1105,34 +1126,25 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) return err; } -static int nilfs_remount(struct super_block *sb, int *flags, char *data) +static int nilfs_reconfigure(struct fs_context *fc) { + struct nilfs_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; - unsigned long old_sb_flags; - unsigned long old_mount_opt; int err; sync_filesystem(sb); - old_sb_flags = sb->s_flags; - old_mount_opt = nilfs->ns_mount_opt; - - if (!parse_options(data, sb, 1)) { - err = -EINVAL; - goto restore_opts; - } - sb->s_flags = (sb->s_flags & ~SB_POSIXACL); err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { nilfs_warn(sb, "couldn't remount because the filesystem is in an incomplete recovery state"); - goto restore_opts; + goto ignore_opts; } - - if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) + if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; /* @@ -1160,166 +1172,67 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) "couldn't remount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); err = -EROFS; - goto restore_opts; + goto ignore_opts; } sb->s_flags &= ~SB_RDONLY; root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); - if (err) - goto restore_opts; + if (err) { + sb->s_flags |= SB_RDONLY; + goto ignore_opts; + } down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } out: + sb->s_flags = (sb->s_flags & ~SB_POSIXACL); + /* Copy over parsed remount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; + return 0; - restore_opts: - sb->s_flags = old_sb_flags; - nilfs->ns_mount_opt = old_mount_opt; + ignore_opts: return err; } -struct nilfs_super_data { - struct block_device *bdev; - __u64 cno; - int flags; -}; - -static int nilfs_parse_snapshot_option(const char *option, - const substring_t *arg, - struct nilfs_super_data *sd) +static int +nilfs_get_tree(struct fs_context *fc) { - unsigned long long val; - const char *msg = NULL; + struct nilfs_fs_context *ctx = fc->fs_private; + struct super_block *s; + dev_t dev; int err; - if (!(sd->flags & SB_RDONLY)) { - msg = "read-only option is not specified"; - goto parse_error; - } - - err = kstrtoull(arg->from, 0, &val); - if (err) { - if (err == -ERANGE) - msg = "too large checkpoint number"; - else - msg = "malformed argument"; - goto parse_error; - } else if (val == 0) { - msg = "invalid checkpoint number 0"; - goto parse_error; + if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) { + nilfs_err(NULL, + "invalid option \"cp=%llu\": read-only option is not specified", + ctx->cno); + return -EINVAL; } - sd->cno = val; - return 0; - -parse_error: - nilfs_err(NULL, "invalid option \"%s\": %s", option, msg); - return 1; -} - -/** - * nilfs_identify - pre-read mount options needed to identify mount instance - * @data: mount options - * @sd: nilfs_super_data - */ -static int nilfs_identify(char *data, struct nilfs_super_data *sd) -{ - char *p, *options = data; - substring_t args[MAX_OPT_ARGS]; - int token; - int ret = 0; - - do { - p = strsep(&options, ","); - if (p != NULL && *p) { - token = match_token(p, tokens, args); - if (token == Opt_snapshot) - ret = nilfs_parse_snapshot_option(p, &args[0], - sd); - } - if (!options) - break; - BUG_ON(options == data); - *(options - 1) = ','; - } while (!ret); - return ret; -} - -static int nilfs_set_bdev_super(struct super_block *s, void *data) -{ - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - return 0; -} - -static int nilfs_test_bdev_super(struct super_block *s, void *data) -{ - return (void *)s->s_bdev == data; -} - -static struct dentry * -nilfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - struct nilfs_super_data sd; - struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; - struct dentry *root_dentry; - int err, s_new = false; - - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); - if (IS_ERR(sd.bdev)) - return ERR_CAST(sd.bdev); + err = lookup_bdev(fc->source, &dev); + if (err) + return err; - sd.cno = 0; - sd.flags = flags; - if (nilfs_identify((char *)data, &sd)) { - err = -EINVAL; - goto failed; - } - - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - mutex_lock(&sd.bdev->bd_fsfreeze_mutex); - if (sd.bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); - err = -EBUSY; - goto failed; - } - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, - sd.bdev); - mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); - if (IS_ERR(s)) { - err = PTR_ERR(s); - goto failed; - } + s = sget_dev(fc, dev); + if (IS_ERR(s)) + return PTR_ERR(s); if (!s->s_root) { - s_new = true; - - /* New superblock instance created */ - s->s_mode = mode; - snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev); - sb_set_blocksize(s, block_size(sd.bdev)); - - err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0); + err = setup_bdev_super(s, fc->sb_flags, fc); + if (!err) + err = nilfs_fill_super(s, fc); if (err) goto failed_super; s->s_flags |= SB_ACTIVE; - } else if (!sd.cno) { + } else if (!ctx->cno) { if (nilfs_tree_is_busy(s->s_root)) { - if ((flags ^ s->s_flags) & SB_RDONLY) { + if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { nilfs_err(s, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); @@ -1328,43 +1241,75 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } } else { /* - * Try remount to setup mount states if the current + * Try reconfigure to setup mount states if the current * tree is not mounted and only snapshots use this sb. + * + * Since nilfs_reconfigure() requires fc->root to be + * set, set it first and release it on failure. */ - err = nilfs_remount(s, &flags, data); - if (err) + fc->root = dget(s->s_root); + err = nilfs_reconfigure(fc); + if (err) { + dput(fc->root); + fc->root = NULL; /* prevent double release */ goto failed_super; + } + return 0; } } - if (sd.cno) { - err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); + if (ctx->cno) { + struct dentry *root_dentry; + + err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry); if (err) goto failed_super; - } else { - root_dentry = dget(s->s_root); + fc->root = root_dentry; + return 0; } - if (!s_new) - blkdev_put(sd.bdev, mode); - - return root_dentry; + fc->root = dget(s->s_root); + return 0; failed_super: deactivate_locked_super(s); + return err; +} - failed: - if (!s_new) - blkdev_put(sd.bdev, mode); - return ERR_PTR(err); +static void nilfs_free_fc(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static const struct fs_context_operations nilfs_context_ops = { + .parse_param = nilfs_parse_param, + .get_tree = nilfs_get_tree, + .reconfigure = nilfs_reconfigure, + .free = nilfs_free_fc, +}; + +static int nilfs_init_fs_context(struct fs_context *fc) +{ + struct nilfs_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + fc->fs_private = ctx; + fc->ops = &nilfs_context_ops; + + return 0; } struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", - .mount = nilfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = nilfs_init_fs_context, + .parameters = nilfs_param_spec, }; MODULE_ALIAS_FS("nilfs2"); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 379d22e28ed6..bc52afbfc5c7 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \ sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ -static struct kobj_type nilfs_##name##_ktype = { \ +static const struct kobj_type nilfs_##name##_ktype = { \ .default_groups = nilfs_##name##_groups, \ .sysfs_ops = &nilfs_##name##_attr_ops, \ .release = nilfs_##name##_attr_release, \ @@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = { .store = nilfs_snapshot_attr_store, }; -static struct kobj_type nilfs_snapshot_ktype = { +static const struct kobj_type nilfs_snapshot_ktype = { .default_groups = nilfs_snapshot_groups, .sysfs_ops = &nilfs_snapshot_attr_ops, .release = nilfs_snapshot_attr_release, @@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u32 major = le32_to_cpu(sbp[0]->s_rev_level); - u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level); + struct nilfs_super_block *raw_sb; + u32 major; + u16 minor; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + major = le32_to_cpu(raw_sb->s_rev_level); + minor = le16_to_cpu(raw_sb->s_minor_rev_level); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%d.%d\n", major, minor); } @@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size); + struct nilfs_super_block *raw_sb; + u64 dev_size; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + dev_size = le64_to_cpu(raw_sb->s_dev_size); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%llu\n", dev_size); } @@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; - return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid); + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid); + up_read(&nilfs->ns_sem); + + return len; } static @@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n", + raw_sb->s_volume_name); + up_read(&nilfs->ns_sem); - return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n", - sbp[0]->s_volume_name); + return len; } static const char dev_readme_str[] = @@ -967,7 +990,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = { .store = nilfs_dev_attr_store, }; -static struct kobj_type nilfs_dev_ktype = { +static const struct kobj_type nilfs_dev_ktype = { .default_groups = nilfs_dev_groups, .sysfs_ops = &nilfs_dev_attr_ops, .release = nilfs_dev_attr_release, @@ -1052,7 +1075,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) ************************************************************************/ static ssize_t nilfs_feature_revision_show(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d.%d\n", NILFS_CURRENT_REV, NILFS_MINOR_REV); @@ -1064,7 +1087,7 @@ static const char features_readme_str[] = "(1) revision\n\tshow current revision of NILFS file system driver.\n"; static ssize_t nilfs_feature_README_show(struct kobject *kobj, - struct attribute *attr, + struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, features_readme_str); diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h index 78a87a016928..d370cd5cce3f 100644 --- a/fs/nilfs2/sysfs.h +++ b/fs/nilfs2/sysfs.h @@ -50,16 +50,16 @@ struct nilfs_sysfs_dev_subgroups { struct completion sg_segments_kobj_unregister; }; -#define NILFS_COMMON_ATTR_STRUCT(name) \ +#define NILFS_KOBJ_ATTR_STRUCT(name) \ struct nilfs_##name##_attr { \ struct attribute attr; \ - ssize_t (*show)(struct kobject *, struct attribute *, \ + ssize_t (*show)(struct kobject *, struct kobj_attribute *, \ char *); \ - ssize_t (*store)(struct kobject *, struct attribute *, \ + ssize_t (*store)(struct kobject *, struct kobj_attribute *, \ const char *, size_t); \ } -NILFS_COMMON_ATTR_STRUCT(feature); +NILFS_KOBJ_ATTR_STRUCT(feature); #define NILFS_DEV_ATTR_STRUCT(name) \ struct nilfs_##name##_attr { \ diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 2064e6473d30..d0bcf744c553 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -12,7 +12,6 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> -#include <linux/random.h> #include <linux/log2.h> #include <linux/crc32.h> #include "nilfs.h" @@ -50,8 +49,8 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, * alloc_nilfs - allocate a nilfs object * @sb: super block instance * - * Return Value: On success, pointer to the_nilfs is returned. - * On error, NULL is returned. + * Return: a pointer to the allocated nilfs object on success, or NULL on + * failure. */ struct the_nilfs *alloc_nilfs(struct super_block *sb) { @@ -69,7 +68,6 @@ struct the_nilfs *alloc_nilfs(struct super_block *sb) INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); - spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); @@ -87,7 +85,6 @@ void destroy_nilfs(struct the_nilfs *nilfs) { might_sleep(); if (nilfs_init(nilfs)) { - nilfs_sysfs_delete_device_group(nilfs); brelse(nilfs->ns_sbh[0]); brelse(nilfs->ns_sbh[1]); } @@ -168,6 +165,9 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) * containing a super root from a given super block, and initializes * relevant information on the nilfs object preparatory for log * scanning and recovery. + * + * Return: 0 on success, or %-EINVAL if current segment number is out + * of range. */ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) @@ -203,8 +203,7 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, * exponent information written in @sbp and stores it in @blocksize, * or aborts with an error message if it's too large. * - * Return Value: On success, 0 is returned. If the block size is too - * large, -EINVAL is returned. + * Return: 0 on success, or %-EINVAL if the block size is too large. */ static int nilfs_get_blocksize(struct super_block *sb, struct nilfs_super_block *sbp, int *blocksize) @@ -229,6 +228,13 @@ static int nilfs_get_blocksize(struct super_block *sb, * load_nilfs() searches and load the latest super root, * attaches the last segment, and does recovery if needed. * The caller must call this exclusively for simultaneous mounts. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - No valid segment found. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-EROFS - Read only device or RO compat mode (if recovery is required) */ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { @@ -305,6 +311,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; } + err = nilfs_sysfs_create_device_group(sb); + if (unlikely(err)) + goto sysfs_error; + if (valid_fs) goto skip_recovery; @@ -366,6 +376,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; failed_unload: + nilfs_sysfs_delete_device_group(nilfs); + + sysfs_error: iput(nilfs->ns_cpfile); iput(nilfs->ns_sufile); iput(nilfs->ns_dat); @@ -391,6 +404,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) * nilfs_nrsvsegs - calculate the number of reserved segments * @nilfs: nilfs object * @nsegs: total number of segments + * + * Return: Number of reserved segments. */ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) { @@ -399,6 +414,20 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) 100)); } +/** + * nilfs_max_segment_count - calculate the maximum number of segments + * @nilfs: nilfs object + * + * Return: Maximum number of segments + */ +static u64 nilfs_max_segment_count(struct the_nilfs *nilfs) +{ + u64 max_count = U64_MAX; + + max_count = div64_ul(max_count, nilfs->ns_blocks_per_segment); + return min_t(u64, max_count, ULONG_MAX); +} + void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) { nilfs->ns_nsegments = nsegs; @@ -408,6 +437,8 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { + u64 nsegments, nblocks; + if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { nilfs_err(nilfs->ns_sb, "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).", @@ -432,6 +463,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, } nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); + if (nilfs->ns_first_ino < NILFS_USER_INO) { + nilfs_err(nilfs->ns_sb, + "too small lower limit for non-reserved inode numbers: %u", + nilfs->ns_first_ino); + return -EINVAL; + } nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { @@ -451,7 +488,34 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, return -EINVAL; } - nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); + nsegments = le64_to_cpu(sbp->s_nsegments); + if (nsegments > nilfs_max_segment_count(nilfs)) { + nilfs_err(nilfs->ns_sb, + "segment count %llu exceeds upper limit (%llu segments)", + (unsigned long long)nsegments, + (unsigned long long)nilfs_max_segment_count(nilfs)); + return -EINVAL; + } + + nblocks = sb_bdev_nr_blocks(nilfs->ns_sb); + if (nblocks) { + u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment; + /* + * To avoid failing to mount early device images without a + * second superblock, exclude that block count from the + * "min_block_count" calculation. + */ + + if (nblocks < min_block_count) { + nilfs_err(nilfs->ns_sb, + "total number of segment blocks %llu exceeds device size (%llu blocks)", + (unsigned long long)min_block_count, + (unsigned long long)nblocks); + return -EINVAL; + } + } + + nilfs_set_nsegments(nilfs, nsegments); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; } @@ -487,7 +551,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) * area, or if the parameters themselves are not normal, it is * determined to be invalid. * - * Return Value: true if invalid, false if valid. + * Return: true if invalid, false if valid. */ static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) { @@ -544,8 +608,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, { struct nilfs_super_block **sbp = nilfs->ns_sbp; struct buffer_head **sbh = nilfs->ns_sbh; - u64 sb2off = NILFS_SB2_OFFSET_BYTES(bdev_nr_bytes(nilfs->ns_bdev)); - int valid[2], swp = 0; + u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev); + int valid[2], swp = 0, older; + + if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { + nilfs_err(sb, "device size too small"); + return -EINVAL; + } + sb2off = NILFS_SB2_OFFSET_BYTES(devsize); sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, &sbh[0]); @@ -595,9 +665,25 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, if (swp) nilfs_swap_super_block(nilfs); + /* + * Calculate the array index of the older superblock data. + * If one has been dropped, set index 0 pointing to the remaining one, + * otherwise set index 1 pointing to the old one (including if both + * are the same). + * + * Divided case valid[0] valid[1] swp -> older + * ------------------------------------------------------------- + * Both SBs are invalid 0 0 N/A (Error) + * SB1 is invalid 0 1 1 0 + * SB2 is invalid 1 0 0 0 + * SB2 is newer 1 1 1 0 + * SB2 is older or the same 1 1 0 1 + */ + older = valid[1] ^ swp; + nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); - nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); + nilfs->ns_prot_seq = le64_to_cpu(sbp[older]->s_last_seq); *sbpp = sbp[0]; return 0; } @@ -606,23 +692,19 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure * @sb: super block - * @data: mount options * * init_nilfs() performs common initialization per block device (e.g. * reading the super block, getting disk layout information, initializing * shared fields in the_nilfs). * - * Return Value: On success, 0 is returned. On error, a negative error - * code is returned. + * Return: 0 on success, or a negative error code on failure. */ -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { struct nilfs_super_block *sbp; int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { nilfs_err(sb, "unable to set blocksize"); @@ -633,7 +715,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto out; - err = nilfs_store_magic_and_option(sb, sbp, data); + err = nilfs_store_magic(sb, sbp); if (err) goto failed_sbh; @@ -663,7 +745,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) goto failed_sbh; } nilfs_release_super_block(nilfs); - sb_set_blocksize(sb, blocksize); + if (!sb_set_blocksize(sb, blocksize)) { + nilfs_err(sb, "bad blocksize %d", blocksize); + err = -EINVAL; + goto out; + } err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); if (err) @@ -676,9 +762,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; - get_random_bytes(&nilfs->ns_next_generation, - sizeof(nilfs->ns_next_generation)); - err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; @@ -691,14 +774,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto failed_sbh; - err = nilfs_sysfs_create_device_group(sb); - if (err) - goto failed_sbh; - set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 47c7dfbb7ea5..4776a70f01ae 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -29,6 +29,7 @@ enum { THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_SB_DIRTY, /* super block is dirty */ + THE_NILFS_PURGING, /* disposing dirty files for cleanup */ }; /** @@ -70,8 +71,6 @@ enum { * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_next_generation: next generation number for inodes - * @ns_next_gen_lock: lock protecting @ns_next_generation * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks @@ -160,10 +159,6 @@ struct the_nilfs { /* GC inode list */ struct list_head ns_gc_inodes; - /* Inode allocator */ - u32 ns_next_generation; - spinlock_t ns_next_gen_lock; - /* Mount options */ unsigned long ns_mount_opt; @@ -181,7 +176,7 @@ struct the_nilfs { unsigned long ns_nrsvsegs; unsigned long ns_first_data_block; int ns_inode_size; - int ns_first_ino; + unsigned int ns_first_ino; u32 ns_crc_seed; /* /sys/fs/<nilfs>/<device> */ @@ -208,6 +203,7 @@ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) +THE_NILFS_FNS(PURGING, purging) /* * Mount option operations @@ -217,10 +213,6 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty) #define nilfs_set_opt(nilfs, opt) \ ((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt) #define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt) -#define nilfs_write_opt(nilfs, mask, opt) \ - ((nilfs)->ns_mount_opt = \ - (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \ - NILFS_MOUNT_##opt)) \ /** * struct nilfs_root - nilfs root object @@ -274,7 +266,7 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct super_block *sb); void destroy_nilfs(struct the_nilfs *nilfs); -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb); int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); |
