diff options
Diffstat (limited to 'fs/nilfs2')
-rw-r--r-- | fs/nilfs2/alloc.c | 234 | ||||
-rw-r--r-- | fs/nilfs2/alloc.h | 12 | ||||
-rw-r--r-- | fs/nilfs2/bmap.c | 136 | ||||
-rw-r--r-- | fs/nilfs2/bmap.h | 20 | ||||
-rw-r--r-- | fs/nilfs2/btnode.c | 92 | ||||
-rw-r--r-- | fs/nilfs2/btree.c | 50 | ||||
-rw-r--r-- | fs/nilfs2/btree.h | 1 | ||||
-rw-r--r-- | fs/nilfs2/cpfile.c | 502 | ||||
-rw-r--r-- | fs/nilfs2/dat.c | 160 | ||||
-rw-r--r-- | fs/nilfs2/dir.c | 154 | ||||
-rw-r--r-- | fs/nilfs2/direct.c | 3 | ||||
-rw-r--r-- | fs/nilfs2/gcinode.c | 31 | ||||
-rw-r--r-- | fs/nilfs2/ifile.c | 54 | ||||
-rw-r--r-- | fs/nilfs2/ifile.h | 4 | ||||
-rw-r--r-- | fs/nilfs2/inode.c | 155 | ||||
-rw-r--r-- | fs/nilfs2/ioctl.c | 353 | ||||
-rw-r--r-- | fs/nilfs2/mdt.c | 112 | ||||
-rw-r--r-- | fs/nilfs2/namei.c | 94 | ||||
-rw-r--r-- | fs/nilfs2/nilfs.h | 48 | ||||
-rw-r--r-- | fs/nilfs2/page.c | 93 | ||||
-rw-r--r-- | fs/nilfs2/page.h | 8 | ||||
-rw-r--r-- | fs/nilfs2/recovery.c | 148 | ||||
-rw-r--r-- | fs/nilfs2/segbuf.c | 29 | ||||
-rw-r--r-- | fs/nilfs2/segment.c | 481 | ||||
-rw-r--r-- | fs/nilfs2/segment.h | 11 | ||||
-rw-r--r-- | fs/nilfs2/sufile.c | 322 | ||||
-rw-r--r-- | fs/nilfs2/sufile.h | 22 | ||||
-rw-r--r-- | fs/nilfs2/super.c | 407 | ||||
-rw-r--r-- | fs/nilfs2/sysfs.c | 49 | ||||
-rw-r--r-- | fs/nilfs2/the_nilfs.c | 65 | ||||
-rw-r--r-- | fs/nilfs2/the_nilfs.h | 14 |
31 files changed, 2127 insertions, 1737 deletions
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 89caef7513db..6b506995818d 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -21,6 +21,8 @@ * nilfs_palloc_groups_per_desc_block - get the number of groups that a group * descriptor block can maintain * @inode: inode of metadata file using this allocator + * + * Return: Number of groups that a group descriptor block can maintain. */ static inline unsigned long nilfs_palloc_groups_per_desc_block(const struct inode *inode) @@ -32,6 +34,8 @@ nilfs_palloc_groups_per_desc_block(const struct inode *inode) /** * nilfs_palloc_groups_count - get maximum number of groups * @inode: inode of metadata file using this allocator + * + * Return: Maximum number of groups. */ static inline unsigned long nilfs_palloc_groups_count(const struct inode *inode) @@ -43,6 +47,8 @@ nilfs_palloc_groups_count(const struct inode *inode) * nilfs_palloc_init_blockgroup - initialize private variables for allocator * @inode: inode of metadata file using this allocator * @entry_size: size of the persistent object + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size) { @@ -78,6 +84,9 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size) * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) * @offset: pointer to store offset number in the group + * + * Return: Number of the group that contains the entry with the index + * specified by @nr. */ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, unsigned long *offset) @@ -93,8 +102,8 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, * @inode: inode of metadata file using this allocator * @group: group number * - * nilfs_palloc_desc_blkoff() returns block offset of the descriptor - * block which contains a descriptor of the specified group. + * Return: Index number in the metadata file of the descriptor block of + * the group specified by @group. */ static unsigned long nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) @@ -111,6 +120,9 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) * * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap * block used to allocate/deallocate entries in the specified group. + * + * Return: Index number in the metadata file of the bitmap block of + * the group specified by @group. */ static unsigned long nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) @@ -125,6 +137,8 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group * @desc: pointer to descriptor structure for the group * @lock: spin lock protecting @desc + * + * Return: Number of free entries written in the group descriptor @desc. */ static unsigned long nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, @@ -143,6 +157,9 @@ nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, * @desc: pointer to descriptor structure for the group * @lock: spin lock protecting @desc * @n: delta to be added + * + * Return: Number of free entries after adjusting the group descriptor + * @desc. */ static u32 nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, @@ -161,6 +178,9 @@ nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, * nilfs_palloc_entry_blkoff - get block offset of an entry block * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) + * + * Return: Index number in the metadata file of the block containing + * the entry specified by @nr. */ static unsigned long nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) @@ -177,12 +197,14 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block * @inode: inode of metadata file * @bh: buffer head of the buffer to be initialized - * @kaddr: kernel address mapped for the page including the buffer + * @from: kernel address mapped for a chunk of the block + * + * This function does not yet support the case where block size > PAGE_SIZE. */ static void nilfs_palloc_desc_block_init(struct inode *inode, - struct buffer_head *bh, void *kaddr) + struct buffer_head *bh, void *from) { - struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh); + struct nilfs_palloc_group_desc *desc = from; unsigned long n = nilfs_palloc_groups_per_desc_block(inode); __le32 nfrees; @@ -236,6 +258,12 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, * @blkoff: block offset * @prev: nilfs_bh_assoc struct of the last used buffer * @lock: spin lock protecting @prev + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Non-existent block. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, struct nilfs_bh_assoc *prev, @@ -256,6 +284,8 @@ static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, * @group: group number * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_get_desc_block(struct inode *inode, unsigned long group, @@ -275,6 +305,8 @@ static int nilfs_palloc_get_desc_block(struct inode *inode, * @group: group number * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_get_bitmap_block(struct inode *inode, unsigned long group, @@ -292,6 +324,8 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode, * nilfs_palloc_delete_bitmap_block - delete a bitmap block * @inode: inode of metadata file using this allocator * @group: group number + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_delete_bitmap_block(struct inode *inode, unsigned long group) @@ -310,6 +344,8 @@ static int nilfs_palloc_delete_bitmap_block(struct inode *inode, * @nr: serial number of the entry (e.g. inode number) * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, int create, struct buffer_head **bhp) @@ -326,6 +362,8 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, * nilfs_palloc_delete_entry_block - delete an entry block * @inode: inode of metadata file using this allocator * @nr: serial number of the entry + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) { @@ -337,38 +375,55 @@ static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) } /** - * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor + * nilfs_palloc_group_desc_offset - calculate the byte offset of a group + * descriptor in the folio containing it * @inode: inode of metadata file using this allocator * @group: group number - * @bh: buffer head of the buffer storing the group descriptor block - * @kaddr: kernel address mapped for the page including the buffer + * @bh: buffer head of the group descriptor block + * + * Return: Byte offset in the folio of the group descriptor for @group. */ -static struct nilfs_palloc_group_desc * -nilfs_palloc_block_get_group_desc(const struct inode *inode, - unsigned long group, - const struct buffer_head *bh, void *kaddr) +static size_t nilfs_palloc_group_desc_offset(const struct inode *inode, + unsigned long group, + const struct buffer_head *bh) { - return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) + - group % nilfs_palloc_groups_per_desc_block(inode); + return offset_in_folio(bh->b_folio, bh->b_data) + + sizeof(struct nilfs_palloc_group_desc) * + (group % nilfs_palloc_groups_per_desc_block(inode)); } /** - * nilfs_palloc_block_get_entry - get kernel address of an entry + * nilfs_palloc_bitmap_offset - calculate the byte offset of a bitmap block + * in the folio containing it + * @bh: buffer head of the bitmap block + * + * Return: Byte offset in the folio of the bitmap block for @bh. + */ +static size_t nilfs_palloc_bitmap_offset(const struct buffer_head *bh) +{ + return offset_in_folio(bh->b_folio, bh->b_data); +} + +/** + * nilfs_palloc_entry_offset - calculate the byte offset of an entry in the + * folio containing it * @inode: inode of metadata file using this allocator - * @nr: serial number of the entry (e.g. inode number) - * @bh: buffer head of the buffer storing the entry block - * @kaddr: kernel address mapped for the page including the buffer + * @nr: serial number of the entry (e.g. inode number) + * @bh: buffer head of the entry block + * + * Return: Byte offset in the folio of the entry @nr. */ -void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, - const struct buffer_head *bh, void *kaddr) +size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr, + const struct buffer_head *bh) { - unsigned long entry_offset, group_offset; + unsigned long entry_index_in_group, entry_index_in_block; - nilfs_palloc_group(inode, nr, &group_offset); - entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block; + nilfs_palloc_group(inode, nr, &entry_index_in_group); + entry_index_in_block = entry_index_in_group % + NILFS_MDT(inode)->mi_entries_per_block; - return kaddr + bh_offset(bh) + - entry_offset * NILFS_MDT(inode)->mi_entry_size; + return offset_in_folio(bh->b_folio, bh->b_data) + + entry_index_in_block * NILFS_MDT(inode)->mi_entry_size; } /** @@ -377,11 +432,15 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, * @target: offset number of an entry in the group (start point) * @bsize: size in bits * @lock: spin lock protecting @bitmap + * @wrap: whether to wrap around + * + * Return: Offset number within the group of the found free entry, or + * %-ENOSPC if not found. */ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, unsigned int bsize, - spinlock_t *lock) + spinlock_t *lock, bool wrap) { int pos, end = bsize; @@ -397,6 +456,8 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, end = target; } + if (!wrap) + return -ENOSPC; /* wrap around */ for (pos = 0; pos < end; pos++) { @@ -416,6 +477,9 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, * @inode: inode of metadata file using this allocator * @curr: current group number * @max: maximum number of groups + * + * Return: Number of remaining descriptors (= groups) managed by the descriptor + * block. */ static unsigned long nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, @@ -431,6 +495,8 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, * nilfs_palloc_count_desc_blocks - count descriptor blocks number * @inode: inode of metadata file using this allocator * @desc_blocks: descriptor blocks number [out] + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_count_desc_blocks(struct inode *inode, unsigned long *desc_blocks) @@ -451,6 +517,8 @@ static int nilfs_palloc_count_desc_blocks(struct inode *inode, * MDT file growing * @inode: inode of metadata file using this allocator * @desc_blocks: known current descriptor blocks count + * + * Return: true if a group can be added in the metadata file, false if not. */ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, unsigned long desc_blocks) @@ -465,6 +533,12 @@ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, * @inode: inode of metadata file using this allocator * @nused: current number of used entries * @nmaxp: max number of entries [out] + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ERANGE - Number of entries in use is out of range. */ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) { @@ -495,14 +569,22 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation + * @wrap: whether to wrap around + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - Entries exhausted (No entries available for allocation). + * * %-EROFS - Read only filesystem */ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, - struct nilfs_palloc_req *req) + struct nilfs_palloc_req *req, bool wrap) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned long group, maxgroup, ngroups; unsigned long group_offset, maxgroup_offset; unsigned long n, entries_per_group; @@ -516,7 +598,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, entries_per_group = nilfs_palloc_entries_per_group(inode); for (i = 0; i < ngroups; i += n) { - if (group >= ngroups) { + if (group >= ngroups && wrap) { /* wrap around */ group = 0; maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, @@ -525,17 +607,17 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); if (ret < 0) return ret; - desc_kaddr = kmap_local_page(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); + + doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh); + desc = kmap_local_folio(desc_bh->b_folio, doff); n = nilfs_palloc_rest_groups_in_desc_block(inode, group, maxgroup); - for (j = 0; j < n; j++, desc++, group++, group_offset = 0) { + for (j = 0; j < n; j++, group++, group_offset = 0) { lock = nilfs_mdt_bgl_lock(inode, group); - if (nilfs_palloc_group_desc_nfrees(desc, lock) == 0) + if (nilfs_palloc_group_desc_nfrees(&desc[j], lock) == 0) continue; - kunmap_local(desc_kaddr); + kunmap_local(desc); ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh); if (unlikely(ret < 0)) { @@ -543,22 +625,31 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, return ret; } - desc_kaddr = kmap_local_page(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); + /* + * Re-kmap the folio containing the first (and + * subsequent) group descriptors. + */ + desc = kmap_local_folio(desc_bh->b_folio, doff); - bitmap_kaddr = kmap_local_page(bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(bitmap_bh); + boff = nilfs_palloc_bitmap_offset(bitmap_bh); + bitmap = kmap_local_folio(bitmap_bh->b_folio, boff); pos = nilfs_palloc_find_available_slot( - bitmap, group_offset, entries_per_group, lock); - kunmap_local(bitmap_kaddr); + bitmap, group_offset, entries_per_group, lock, + wrap); + /* + * Since the search for a free slot in the second and + * subsequent bitmap blocks always starts from the + * beginning, the wrap flag only has an effect on the + * first search. + */ + kunmap_local(bitmap); if (pos >= 0) goto found; brelse(bitmap_bh); } - kunmap_local(desc_kaddr); + kunmap_local(desc); brelse(desc_bh); } @@ -567,9 +658,9 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, found: /* found a free entry */ - nilfs_palloc_group_desc_add_entries(desc, lock, -1); + nilfs_palloc_group_desc_add_entries(&desc[j], lock, -1); req->pr_entry_nr = entries_per_group * group + pos; - kunmap_local(desc_kaddr); + kunmap_local(desc); req->pr_desc_bh = desc_bh; req->pr_bitmap_bh = bitmap_bh; @@ -600,18 +691,18 @@ void nilfs_palloc_commit_alloc_entry(struct inode *inode, void nilfs_palloc_commit_free_entry(struct inode *inode, struct nilfs_palloc_req *req) { - struct nilfs_palloc_group_desc *desc; unsigned long group, group_offset; + size_t doff, boff; + struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap_local_page(req->pr_desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc(inode, group, - req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap_local_page(req->pr_bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); + doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh); + desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh); + bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff); lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) @@ -622,8 +713,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, else nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap_local(bitmap_kaddr); - kunmap_local(desc_kaddr); + kunmap_local(bitmap); + kunmap_local(desc); mark_buffer_dirty(req->pr_desc_bh); mark_buffer_dirty(req->pr_bitmap_bh); @@ -642,17 +733,17 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { struct nilfs_palloc_group_desc *desc; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned char *bitmap; unsigned long group, group_offset; spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap_local_page(req->pr_desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc(inode, group, - req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap_local_page(req->pr_bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); + doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh); + desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh); + bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff); lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) @@ -663,8 +754,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, else nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap_local(bitmap_kaddr); - kunmap_local(desc_kaddr); + kunmap_local(bitmap); + kunmap_local(desc); brelse(req->pr_bitmap_bh); brelse(req->pr_desc_bh); @@ -678,6 +769,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the removal + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_prepare_free_entry(struct inode *inode, struct nilfs_palloc_req *req) @@ -722,13 +815,15 @@ void nilfs_palloc_abort_free_entry(struct inode *inode, * @inode: inode of metadata file using this allocator * @entry_nrs: array of entry numbers to be deallocated * @nitems: number of entries stored in @entry_nrs + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned long group, group_offset; __u64 group_min_nr, last_nrs[8]; const unsigned long epg = nilfs_palloc_entries_per_group(inode); @@ -756,8 +851,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) /* Get the first entry number of the group */ group_min_nr = (__u64)group * epg; - bitmap_kaddr = kmap_local_page(bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(bitmap_bh); + boff = nilfs_palloc_bitmap_offset(bitmap_bh); + bitmap = kmap_local_folio(bitmap_bh->b_folio, boff); lock = nilfs_mdt_bgl_lock(inode, group); j = i; @@ -802,7 +897,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) entry_start = rounddown(group_offset, epb); } while (true); - kunmap_local(bitmap_kaddr); + kunmap_local(bitmap); mark_buffer_dirty(bitmap_bh); brelse(bitmap_bh); @@ -816,11 +911,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) inode->i_ino); } - desc_kaddr = kmap_local_page(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); + doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh); + desc = kmap_local_folio(desc_bh->b_folio, doff); nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n); - kunmap_local(desc_kaddr); + kunmap_local(desc); mark_buffer_dirty(desc_bh); nilfs_mdt_mark_dirty(inode); brelse(desc_bh); diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index b667e869ac07..046d876ea3e0 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -21,6 +21,8 @@ * * The number of entries per group is defined by the number of bits * that a bitmap block can maintain. + * + * Return: Number of entries per group. */ static inline unsigned long nilfs_palloc_entries_per_group(const struct inode *inode) @@ -31,13 +33,13 @@ nilfs_palloc_entries_per_group(const struct inode *inode) int nilfs_palloc_init_blockgroup(struct inode *, unsigned int); int nilfs_palloc_get_entry_block(struct inode *, __u64, int, struct buffer_head **); -void *nilfs_palloc_block_get_entry(const struct inode *, __u64, - const struct buffer_head *, void *); +size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr, + const struct buffer_head *bh); int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); /** - * nilfs_palloc_req - persistent allocator request and reply + * struct nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap @@ -50,8 +52,8 @@ struct nilfs_palloc_req { struct buffer_head *pr_entry_bh; }; -int nilfs_palloc_prepare_alloc_entry(struct inode *, - struct nilfs_palloc_req *); +int nilfs_palloc_prepare_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req, bool wrap); void nilfs_palloc_commit_alloc_entry(struct inode *, struct nilfs_palloc_req *); void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 383f0afa2cea..ccc1a7aa52d2 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -47,17 +47,14 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, * @ptrp: place to store the value associated to @key * * Description: nilfs_bmap_lookup_at_level() finds a record whose key - * matches @key in the block at @level of the bmap. - * - * Return Value: On success, 0 is returned and the record associated with @key - * is stored in the place pointed by @ptrp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A record associated with @key does not exist. + * matches @key in the block at @level of the bmap. The record associated + * with @key is stored in the place pointed to by @ptrp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A record associated with @key does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, __u64 *ptrp) @@ -138,14 +135,11 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) * Description: nilfs_bmap_insert() inserts the new key-record pair specified * by @key and @rec into @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EEXIST - A record associated with @key already exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - A record associated with @key already exists. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec) { @@ -193,14 +187,11 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap * starting from @start, and stores it to @keyp if found. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No valid entry was found + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid entry was found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp) { @@ -236,14 +227,11 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp) * Description: nilfs_bmap_delete() deletes the key-record pair specified by * @key from @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A record associated with @key does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A record associated with @key does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key) { @@ -290,12 +278,10 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key) * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are * greater than or equal to @key from @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key) { @@ -330,12 +316,10 @@ void nilfs_bmap_clear(struct nilfs_bmap *bmap) * Description: nilfs_bmap_propagate() marks the buffers that directly or * indirectly refer to the block specified by @bh dirty. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) { @@ -349,7 +333,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) } /** - * nilfs_bmap_lookup_dirty_buffers - + * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers * @bmap: bmap * @listp: pointer to buffer head list */ @@ -362,22 +346,22 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, /** * nilfs_bmap_assign - assign a new block number to a block - * @bmap: bmap - * @bh: pointer to buffer head + * @bmap: bmap + * @bh: place to store a pointer to the buffer head to which a block + * address is assigned (in/out) * @blocknr: block number - * @binfo: block information + * @binfo: block information * * Description: nilfs_bmap_assign() assigns the block number @blocknr to the - * buffer specified by @bh. - * - * Return Value: On success, 0 is returned and the buffer head of a newly - * create buffer and the block information associated with the buffer are - * stored in the place pointed by @bh and @binfo, respectively. On error, one - * of the following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * buffer specified by @bh. The block information is stored in the memory + * pointed to by @binfo, and the buffer head may be replaced as a block + * address is assigned, in which case a pointer to the new buffer head is + * stored in the memory pointed to by @bh. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_assign(struct nilfs_bmap *bmap, struct buffer_head **bh, @@ -402,12 +386,10 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap, * Description: nilfs_bmap_mark() marks the block specified by @key and @level * as dirty. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) { @@ -430,7 +412,7 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) * Description: nilfs_test_and_clear() is the atomic operation to test and * clear the dirty state of @bmap. * - * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. + * Return: 1 if @bmap is dirty, or 0 if clear. */ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) { @@ -450,15 +432,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { - struct buffer_head *pbh; - __u64 key; - - key = page_index(bh->b_page) << (PAGE_SHIFT - - bmap->b_inode->i_blkbits); - for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) - key++; + loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh); - return key; + return pos >> bmap->b_inode->i_blkbits; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) @@ -496,10 +472,10 @@ static struct lock_class_key nilfs_bmap_mdt_lock_key; * * Description: nilfs_bmap_read() initializes the bmap @bmap. * - * Return Value: On success, 0 is returned. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (corrupted bmap). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 608168a5cb88..4656df392722 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -44,6 +44,19 @@ struct nilfs_bmap_stats { /** * struct nilfs_bmap_operations - bmap operation table + * @bop_lookup: single block search operation + * @bop_lookup_contig: consecutive block search operation + * @bop_insert: block insertion operation + * @bop_delete: block delete operation + * @bop_clear: block mapping resource release operation + * @bop_propagate: operation to propagate dirty state towards the + * mapping root + * @bop_lookup_dirty_buffers: operation to collect dirty block buffers + * @bop_assign: disk block address assignment operation + * @bop_mark: operation to mark in-use blocks as dirty for + * relocation by GC + * @bop_seek_key: find valid block key operation + * @bop_last_key: find last valid block key operation */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); @@ -66,7 +79,7 @@ struct nilfs_bmap_operations { int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *); int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); - /* The following functions are internal use only. */ + /* private: internal use only */ int (*bop_check_insert)(const struct nilfs_bmap *, __u64); int (*bop_check_delete)(struct nilfs_bmap *, __u64); int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); @@ -74,9 +87,8 @@ struct nilfs_bmap_operations { #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) -#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) -#define NILFS_BMAP_NEW_PTR_INIT \ - (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) +#define NILFS_BMAP_KEY_BIT BITS_PER_LONG +#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1)) static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) { diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 0131d83b912d..568367129092 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -35,6 +35,7 @@ void nilfs_init_btnc_inode(struct inode *btnc_inode) ii->i_flags = 0; memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); + btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; } void nilfs_btnode_cache_clear(struct address_space *btnc) @@ -51,15 +52,23 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) - return NULL; + return ERR_PTR(-ENOMEM); if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { - brelse(bh); - BUG(); + /* + * The block buffer at the specified new address was already + * in use. This can happen if it is a virtual block number + * and has been reallocated due to corruption of the bitmap + * used to manage its allocation state (if not, the buffer + * clearing of an abandoned b-tree node is missing somewhere). + */ + nilfs_error(inode->i_sb, + "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)", + (unsigned long long)blocknr, inode->i_ino); + goto failed; } memset(bh->b_data, 0, i_blocksize(inode)); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); @@ -67,6 +76,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) folio_unlock(bh->b_folio); folio_put(bh->b_folio); return bh; + +failed: + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + brelse(bh); + return ERR_PTR(-EIO); } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, @@ -118,7 +133,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, goto found; } set_buffer_mapped(bh); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -164,11 +178,33 @@ void nilfs_btnode_delete(struct buffer_head *bh) } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -217,8 +253,8 @@ retry: } nbh = nilfs_btnode_create_block(btnc, newkey); - if (!nbh) - return -ENOMEM; + if (IS_ERR(nbh)) + return PTR_ERR(nbh); BUG_ON(nbh == obh); ctxt->newbh = nbh; @@ -230,8 +266,21 @@ retry: } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -270,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 65659fa0372e..dd0c8e560ef6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); - if (!bh) - return -ENOMEM; + if (IS_ERR(bh)) + return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; @@ -334,7 +334,7 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, * @inode: host inode of btree * @blocknr: block number * - * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + * Return: 0 if normal, 1 if the node is broken. */ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, size_t size, struct inode *inode, @@ -350,7 +350,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || - nchildren < 0 || + nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", @@ -366,7 +366,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, * @node: btree root node to be examined * @inode: host inode of btree * - * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + * Return: 0 if normal, 1 if the root node is broken. */ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, struct inode *inode) @@ -381,7 +381,8 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || - nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || + (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); @@ -651,8 +652,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, * @minlevel: start level * @nextkey: place to store the next valid key * - * Return Value: If a next key was found, 0 is returned. Otherwise, - * -ENOENT is returned. + * Return: 0 if the next key was found, %-ENOENT if not found. */ static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, @@ -1658,13 +1658,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) int nchildren, ret; root = nilfs_btree_get_root(btree); + nchildren = nilfs_btree_node_get_nchildren(root); + if (unlikely(nchildren == 0)) + return 0; + switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, @@ -1673,12 +1676,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } - nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; @@ -1857,13 +1860,22 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, } /** - * nilfs_btree_convert_and_insert - - * @bmap: - * @key: - * @ptr: - * @keys: - * @ptrs: - * @n: + * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree + * @btree: NILFS B-tree structure + * @key: Key of the new entry to be inserted + * @ptr: Pointer (block number) associated with the key to be inserted + * @keys: Array of keys to be inserted in addition to @key + * @ptrs: Array of pointers associated with @keys + * @n: Number of keys and pointers in @keys and @ptrs + * + * This function is used to insert a new entry specified by @key and @ptr, + * along with additional entries specified by @keys and @ptrs arrays, into a + * NILFS B-tree. + * It prepares the necessary changes by allocating the required blocks and any + * necessary intermediate nodes. It converts configurations from other forms of + * block mapping (the one that currently exists is direct mapping) to a B-tree. + * + * Return: 0 on success or a negative error code on failure. */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, @@ -2090,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 92868e1a48ca..2a220f716c91 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -24,6 +24,7 @@ * @bp_index: index of child node * @bp_oldreq: ptr end request for old ptr * @bp_newreq: ptr alloc request for new ptr + * @bp_ctxt: context information for changing the key of a b-tree node block * @bp_op: rebalance operation */ struct nilfs_btree_path { diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 69a5cced1e84..bcc7d76269ac 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -68,54 +68,41 @@ static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile, static unsigned int nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile, struct buffer_head *bh, - void *kaddr, unsigned int n) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp; unsigned int count; + cp = kmap_local_folio(bh->b_folio, + offset_in_folio(bh->b_folio, bh->b_data)); count = le32_to_cpu(cp->cp_checkpoints_count) + n; cp->cp_checkpoints_count = cpu_to_le32(count); + kunmap_local(cp); return count; } static unsigned int nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, struct buffer_head *bh, - void *kaddr, unsigned int n) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp; unsigned int count; + cp = kmap_local_folio(bh->b_folio, + offset_in_folio(bh->b_folio, bh->b_data)); WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); count = le32_to_cpu(cp->cp_checkpoints_count) - n; cp->cp_checkpoints_count = cpu_to_le32(count); + kunmap_local(cp); return count; } -static inline struct nilfs_cpfile_header * -nilfs_cpfile_block_get_header(const struct inode *cpfile, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh); -} - -static struct nilfs_checkpoint * -nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) * - NILFS_MDT(cpfile)->mi_entry_size; -} - static void nilfs_cpfile_block_init(struct inode *cpfile, struct buffer_head *bh, - void *kaddr) + void *from) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp = from; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; int n = nilfs_cpfile_checkpoints_per_block(cpfile); @@ -125,10 +112,65 @@ static void nilfs_cpfile_block_init(struct inode *cpfile, } } -static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, - struct buffer_head **bhp) +/** + * nilfs_cpfile_checkpoint_offset - calculate the byte offset of a checkpoint + * entry in the folio containing it + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @bh: buffer head of block containing checkpoint indexed by @cno + * + * Return: Byte offset in the folio of the checkpoint specified by @cno. + */ +static size_t nilfs_cpfile_checkpoint_offset(const struct inode *cpfile, + __u64 cno, + struct buffer_head *bh) +{ + return offset_in_folio(bh->b_folio, bh->b_data) + + nilfs_cpfile_get_offset(cpfile, cno) * + NILFS_MDT(cpfile)->mi_entry_size; +} + +/** + * nilfs_cpfile_cp_snapshot_list_offset - calculate the byte offset of a + * checkpoint snapshot list in the folio + * containing it + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @bh: buffer head of block containing checkpoint indexed by @cno + * + * Return: Byte offset in the folio of the checkpoint snapshot list specified + * by @cno. + */ +static size_t nilfs_cpfile_cp_snapshot_list_offset(const struct inode *cpfile, + __u64 cno, + struct buffer_head *bh) +{ + return nilfs_cpfile_checkpoint_offset(cpfile, cno, bh) + + offsetof(struct nilfs_checkpoint, cp_snapshot_list); +} + +/** + * nilfs_cpfile_ch_snapshot_list_offset - calculate the byte offset of the + * snapshot list in the header + * + * Return: Byte offset in the folio of the checkpoint snapshot list + */ +static size_t nilfs_cpfile_ch_snapshot_list_offset(void) { - return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + return offsetof(struct nilfs_cpfile_header, ch_snapshot_list); +} + +static int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) +{ + int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(cpfile->i_sb, + "missing header block in checkpoint metadata"); + err = -EIO; + } + return err; } static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, @@ -149,14 +191,11 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, * @cnop: place to store the next checkpoint number * @bhp: place to store a pointer to buffer_head struct * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error - * - * %-ENOENT - no block exists in the range. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - no block exists in the range. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile, __u64 start_cno, __u64 end_cno, @@ -197,7 +236,8 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, * stores it to the inode file given by @ifile and the nilfs root object * given by @root. * - * Return: 0 on success, or the following negative error code on failure. + * Return: 0 on success, or one of the following negative error codes on + * failure: * * %-EINVAL - Invalid checkpoint. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error (including metadata corruption). @@ -207,7 +247,7 @@ int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, { struct buffer_head *cp_bh; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; if (cno < 1 || cno > nilfs_mdt_cno(cpfile)) @@ -221,8 +261,8 @@ int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, goto out_sem; } - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { ret = -EINVAL; goto put_cp; @@ -247,7 +287,7 @@ int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, root->ifile = ifile; put_cp: - kunmap_local(kaddr); + kunmap_local(cp); brelse(cp_bh); out_sem: up_read(&NILFS_MDT(cpfile)->mi_sem); @@ -265,7 +305,8 @@ out_sem: * In either case, the buffer of the block containing the checkpoint entry * and the cpfile inode are made dirty for inclusion in the write log. * - * Return: 0 on success, or the following negative error code on failure. + * Return: 0 on success, or one of the following negative error codes on + * failure: * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error (including metadata corruption). * * %-EROFS - Read only filesystem @@ -275,7 +316,7 @@ int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) struct buffer_head *header_bh, *cp_bh; struct nilfs_cpfile_header *header; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; if (WARN_ON_ONCE(cno < 1)) @@ -283,36 +324,29 @@ int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (unlikely(ret < 0)) { - if (ret == -ENOENT) { - nilfs_error(cpfile->i_sb, - "checkpoint creation failed due to metadata corruption."); - ret = -EIO; - } + if (unlikely(ret < 0)) goto out_sem; - } + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh); if (unlikely(ret < 0)) goto out_header; - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { /* a newly-created checkpoint */ nilfs_checkpoint_clear_invalid(cp); + kunmap_local(cp); if (!nilfs_cpfile_is_in_first(cpfile, cno)) nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, - kaddr, 1); - kunmap_local(kaddr); + 1); - kaddr = kmap_local_page(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, - kaddr); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_ncheckpoints, 1); - kunmap_local(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); } else { - kunmap_local(kaddr); + kunmap_local(cp); } /* Force the buffer and the inode to become dirty */ @@ -341,7 +375,8 @@ out_sem: * cpfile with the data given by the arguments @root, @blkinc, @ctime, and * @minor. * - * Return: 0 on success, or the following negative error code on failure. + * Return: 0 on success, or one of the following negative error codes on + * failure: * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error (including metadata corruption). */ @@ -351,7 +386,7 @@ int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, { struct buffer_head *cp_bh; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; if (WARN_ON_ONCE(cno < 1)) @@ -365,10 +400,10 @@ int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, goto out_sem; } - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (unlikely(nilfs_checkpoint_invalid(cp))) { - kunmap_local(kaddr); + kunmap_local(cp); brelse(cp_bh); goto error; } @@ -389,7 +424,7 @@ int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, nilfs_write_inode_common(root->ifile, &cp->cp_ifile_inode); nilfs_bmap_write(NILFS_I(root->ifile)->i_bmap, &cp->cp_ifile_inode); - kunmap_local(kaddr); + kunmap_local(cp); brelse(cp_bh); out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); @@ -412,14 +447,11 @@ error: * the period from @start to @end, excluding @end itself. The checkpoints * which have been already deleted are ignored. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - invalid checkpoints. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid checkpoints. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, __u64 start, @@ -430,6 +462,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, struct nilfs_checkpoint *cp; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cno; + size_t offset; void *kaddr; unsigned long tnicps; int ret, ncps, nicps, nss, count, i; @@ -460,9 +493,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; } - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint( - cpfile, cno, cp_bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kaddr = kmap_local_folio(cp_bh->b_folio, offset); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { if (nilfs_checkpoint_snapshot(cp)) { @@ -472,43 +504,42 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, nicps++; } } - if (nicps > 0) { - tnicps += nicps; - mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_dirty(cpfile); - if (!nilfs_cpfile_is_in_first(cpfile, cno)) { - count = - nilfs_cpfile_block_sub_valid_checkpoints( - cpfile, cp_bh, kaddr, nicps); - if (count == 0) { - /* make hole */ - kunmap_local(kaddr); - brelse(cp_bh); - ret = - nilfs_cpfile_delete_checkpoint_block( - cpfile, cno); - if (ret == 0) - continue; - nilfs_err(cpfile->i_sb, - "error %d deleting checkpoint block", - ret); - break; - } - } + kunmap_local(kaddr); + + if (nicps <= 0) { + brelse(cp_bh); + continue; } - kunmap_local(kaddr); + tnicps += nicps; + mark_buffer_dirty(cp_bh); + nilfs_mdt_mark_dirty(cpfile); + if (nilfs_cpfile_is_in_first(cpfile, cno)) { + brelse(cp_bh); + continue; + } + + count = nilfs_cpfile_block_sub_valid_checkpoints(cpfile, cp_bh, + nicps); brelse(cp_bh); + if (count) + continue; + + /* Delete the block if there are no more valid checkpoints */ + ret = nilfs_cpfile_delete_checkpoint_block(cpfile, cno); + if (unlikely(ret)) { + nilfs_err(cpfile->i_sb, + "error %d deleting checkpoint block", ret); + break; + } } if (tnicps > 0) { - kaddr = kmap_local_page(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, - kaddr); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); - kunmap_local(kaddr); + kunmap_local(header); } brelse(header_bh); @@ -542,6 +573,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, struct buffer_head *bh; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; + size_t offset; void *kaddr; int n, ret; int ncps, i; @@ -560,8 +592,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, } ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); - kaddr = kmap_local_page(bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh); + cp = kaddr = kmap_local_folio(bh->b_folio, offset); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { if (!nilfs_checkpoint_invalid(cp)) { nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, @@ -595,7 +627,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, struct nilfs_cpinfo *ci = buf; __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; - void *kaddr; + size_t offset; int n = 0, ret; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -604,10 +636,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; - kaddr = kmap_local_page(bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + header = kmap_local_folio(bh->b_folio, 0); curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); - kunmap_local(kaddr); + kunmap_local(header); brelse(bh); if (curr == 0) { ret = 0; @@ -625,9 +656,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = 0; /* No snapshots (started from a hole block) */ goto out; } - kaddr = kmap_local_page(bh->b_page); + offset = nilfs_cpfile_checkpoint_offset(cpfile, curr, bh); + cp = kmap_local_folio(bh->b_folio, offset); while (n < nci) { - cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); curr = ~(__u64)0; /* Terminator */ if (unlikely(nilfs_checkpoint_invalid(cp) || !nilfs_checkpoint_snapshot(cp))) @@ -639,9 +670,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, if (next == 0) break; /* reach end of the snapshot list */ + kunmap_local(cp); next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { - kunmap_local(kaddr); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); @@ -649,12 +680,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, WARN_ON(ret == -ENOENT); goto out; } - kaddr = kmap_local_page(bh->b_page); } + offset = nilfs_cpfile_checkpoint_offset(cpfile, next, bh); + cp = kmap_local_folio(bh->b_folio, offset); curr = next; curr_blkoff = next_blkoff; } - kunmap_local(kaddr); + kunmap_local(cp); brelse(bh); *cnop = curr; ret = n; @@ -683,7 +715,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, * number to continue searching. * * Return: Count of checkpoint info items stored in the output buffer on - * success, or the following negative error code on failure. + * success, or one of the following negative error codes on failure: * * %-EINVAL - Invalid checkpoint mode. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error (including metadata corruption). @@ -704,9 +736,16 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, } /** - * nilfs_cpfile_delete_checkpoint - - * @cpfile: - * @cno: + * nilfs_cpfile_delete_checkpoint - delete a checkpoint + * @cpfile: checkpoint file inode + * @cno: checkpoint number to delete + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Checkpoint in use (snapshot specified). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid checkpoint found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { @@ -725,26 +764,6 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); } -static struct nilfs_snapshot_list * -nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile, - __u64 cno, - struct buffer_head *bh, - void *kaddr) -{ - struct nilfs_cpfile_header *header; - struct nilfs_checkpoint *cp; - struct nilfs_snapshot_list *list; - - if (cno != 0) { - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); - list = &cp->cp_snapshot_list; - } else { - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); - list = &header->ch_snapshot_list; - } - return list; -} - static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) { struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh; @@ -753,94 +772,103 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) struct nilfs_snapshot_list *list; __u64 curr, prev; unsigned long curr_blkoff, prev_blkoff; - void *kaddr; + size_t offset, curr_list_offset, prev_list_offset; int ret; if (cno == 0) return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (unlikely(ret < 0)) + goto out_sem; + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) - goto out_sem; - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + goto out_header; + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_local(kaddr); + kunmap_local(cp); goto out_cp; } if (nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_local(kaddr); + kunmap_local(cp); goto out_cp; } - kunmap_local(kaddr); + kunmap_local(cp); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) - goto out_cp; - kaddr = kmap_local_page(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* + * Find the last snapshot before the checkpoint being changed to + * snapshot mode by going backwards through the snapshot list. + * Set "prev" to its checkpoint number, or 0 if not found. + */ + header = kmap_local_folio(header_bh->b_folio, 0); list = &header->ch_snapshot_list; curr_bh = header_bh; get_bh(curr_bh); curr = 0; curr_blkoff = 0; + curr_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); prev = le64_to_cpu(list->ssl_prev); while (prev > cno) { prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); curr = prev; + kunmap_local(list); if (curr_blkoff != prev_blkoff) { - kunmap_local(kaddr); brelse(curr_bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &curr_bh); - if (ret < 0) - goto out_header; - kaddr = kmap_local_page(curr_bh->b_page); + if (unlikely(ret < 0)) + goto out_cp; } + curr_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, curr, curr_bh); + list = kmap_local_folio(curr_bh->b_folio, curr_list_offset); curr_blkoff = prev_blkoff; - cp = nilfs_cpfile_block_get_checkpoint( - cpfile, curr, curr_bh, kaddr); - list = &cp->cp_snapshot_list; prev = le64_to_cpu(list->ssl_prev); } - kunmap_local(kaddr); + kunmap_local(list); if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, &prev_bh); if (ret < 0) goto out_curr; + + prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, prev, prev_bh); } else { prev_bh = header_bh; get_bh(prev_bh); + prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } - kaddr = kmap_local_page(curr_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, curr, curr_bh, kaddr); + /* Update the list entry for the next snapshot */ + list = kmap_local_folio(curr_bh->b_folio, curr_list_offset); list->ssl_prev = cpu_to_le64(cno); - kunmap_local(kaddr); + kunmap_local(list); - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + /* Update the checkpoint being changed to a snapshot */ + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); nilfs_checkpoint_set_snapshot(cp); - kunmap_local(kaddr); + kunmap_local(cp); - kaddr = kmap_local_page(prev_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, prev, prev_bh, kaddr); + /* Update the list entry for the previous snapshot */ + list = kmap_local_folio(prev_bh->b_folio, prev_list_offset); list->ssl_next = cpu_to_le64(cno); - kunmap_local(kaddr); + kunmap_local(list); - kaddr = kmap_local_page(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* Update the statistics in the header */ + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_nsnapshots, 1); - kunmap_local(kaddr); + kunmap_local(header); mark_buffer_dirty(prev_bh); mark_buffer_dirty(curr_bh); @@ -853,12 +881,12 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) out_curr: brelse(curr_bh); - out_header: - brelse(header_bh); - out_cp: brelse(cp_bh); + out_header: + brelse(header_bh); + out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; @@ -871,79 +899,87 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) struct nilfs_checkpoint *cp; struct nilfs_snapshot_list *list; __u64 next, prev; - void *kaddr; + size_t offset, next_list_offset, prev_list_offset; int ret; if (cno == 0) return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (unlikely(ret < 0)) + goto out_sem; + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) - goto out_sem; - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + goto out_header; + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_local(kaddr); + kunmap_local(cp); goto out_cp; } if (!nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_local(kaddr); + kunmap_local(cp); goto out_cp; } list = &cp->cp_snapshot_list; next = le64_to_cpu(list->ssl_next); prev = le64_to_cpu(list->ssl_prev); - kunmap_local(kaddr); + kunmap_local(cp); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) - goto out_cp; if (next != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &next_bh); if (ret < 0) - goto out_header; + goto out_cp; + + next_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, next, next_bh); } else { next_bh = header_bh; get_bh(next_bh); + next_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, &prev_bh); if (ret < 0) goto out_next; + + prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, prev, prev_bh); } else { prev_bh = header_bh; get_bh(prev_bh); + prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } - kaddr = kmap_local_page(next_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, next, next_bh, kaddr); + /* Update the list entry for the next snapshot */ + list = kmap_local_folio(next_bh->b_folio, next_list_offset); list->ssl_prev = cpu_to_le64(prev); - kunmap_local(kaddr); + kunmap_local(list); - kaddr = kmap_local_page(prev_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, prev, prev_bh, kaddr); + /* Update the list entry for the previous snapshot */ + list = kmap_local_folio(prev_bh->b_folio, prev_list_offset); list->ssl_next = cpu_to_le64(next); - kunmap_local(kaddr); + kunmap_local(list); - kaddr = kmap_local_page(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + /* Update the snapshot being changed back to a plain checkpoint */ + cp = kmap_local_folio(cp_bh->b_folio, offset); cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); nilfs_checkpoint_clear_snapshot(cp); - kunmap_local(kaddr); + kunmap_local(cp); - kaddr = kmap_local_page(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* Update the statistics in the header */ + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_nsnapshots, -1); - kunmap_local(kaddr); + kunmap_local(header); mark_buffer_dirty(next_bh); mark_buffer_dirty(prev_bh); @@ -956,39 +992,33 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) out_next: brelse(next_bh); - out_header: - brelse(header_bh); - out_cp: brelse(cp_bh); + out_header: + brelse(header_bh); + out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; } /** - * nilfs_cpfile_is_snapshot - + * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * - * Description: - * - * Return Value: On success, 1 is returned if the checkpoint specified by - * @cno is a snapshot, or 0 if not. On error, one of the following negative - * error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * @cno: checkpoint number * - * %-ENOENT - No such checkpoint. + * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or + * one of the following negative error codes on failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) { struct buffer_head *bh; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; /* @@ -1002,13 +1032,14 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); if (ret < 0) goto out; - kaddr = kmap_local_page(bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh); + cp = kmap_local_folio(bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) ret = -ENOENT; else ret = nilfs_checkpoint_snapshot(cp); - kunmap_local(kaddr); + kunmap_local(cp); brelse(bh); out: @@ -1025,14 +1056,11 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) * Description: nilfs_change_cpmode() changes the mode of the checkpoint * specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No such checkpoint. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) { @@ -1064,20 +1092,17 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) * @cpstat: pointer to a structure of checkpoint statistics * * Description: nilfs_cpfile_get_stat() returns information about checkpoints. + * The checkpoint statistics are stored in the location pointed to by @cpstat. * - * Return Value: On success, 0 is returned, and checkpoints information is - * stored in the place pointed by @cpstat. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) { struct buffer_head *bh; struct nilfs_cpfile_header *header; - void *kaddr; int ret; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -1085,12 +1110,11 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out_sem; - kaddr = kmap_local_page(bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + header = kmap_local_folio(bh->b_folio, 0); cpstat->cs_cno = nilfs_mdt_cno(cpfile); cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); - kunmap_local(kaddr); + kunmap_local(header); brelse(bh); out_sem: @@ -1104,6 +1128,8 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) * @cpsize: size of a checkpoint entry * @raw_inode: on-disk cpfile inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, struct nilfs_inode *raw_inode, struct inode **inodep) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 180fc8d36213..c664daba56ae 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) { int ret; - ret = nilfs_palloc_prepare_alloc_entry(dat, req); + ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); if (ret < 0) return ret; @@ -89,15 +89,15 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_local_page(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MAX); entry->de_blocknr = cpu_to_le64(0); - kunmap_local(kaddr); + kunmap_local(entry); nilfs_palloc_commit_alloc_entry(dat, req); nilfs_dat_commit_entry(dat, req); @@ -113,15 +113,15 @@ static void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_local_page(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MIN); entry->de_blocknr = cpu_to_le64(0); - kunmap_local(kaddr); + kunmap_local(entry); nilfs_dat_commit_entry(dat, req); @@ -143,14 +143,14 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, sector_t blocknr) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_local_page(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_local(kaddr); + kunmap_local(entry); nilfs_dat_commit_entry(dat, req); } @@ -160,19 +160,19 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; __u64 start; sector_t blocknr; - void *kaddr; + size_t offset; int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); if (ret < 0) return ret; - kaddr = kmap_local_page(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_local(kaddr); + kunmap_local(entry); if (blocknr == 0) { ret = nilfs_palloc_prepare_free_entry(dat, req); @@ -200,11 +200,11 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, struct nilfs_dat_entry *entry; __u64 start, end; sector_t blocknr; - void *kaddr; + size_t offset; - kaddr = kmap_local_page(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); end = start = le64_to_cpu(entry->de_start); if (!dead) { end = nilfs_mdt_cno(dat); @@ -212,7 +212,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_local(kaddr); + kunmap_local(entry); if (blocknr == 0) nilfs_dat_commit_free(dat, req); @@ -225,14 +225,14 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; __u64 start; sector_t blocknr; - void *kaddr; + size_t offset; - kaddr = kmap_local_page(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_local(kaddr); + kunmap_local(entry); if (start == nilfs_mdt_cno(dat) && blocknr == 0) nilfs_palloc_abort_free_entry(dat, req); @@ -271,18 +271,16 @@ void nilfs_dat_abort_update(struct inode *dat, } /** - * nilfs_dat_mark_dirty - - * @dat: DAT file inode + * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified + * virtual block address entry as dirty + * @dat: DAT file inode * @vblocknr: virtual block number * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid DAT entry (internal code). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) { @@ -305,14 +303,11 @@ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) * Description: nilfs_dat_freev() frees the virtual block numbers specified by * @vblocknrs and @nitems. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The virtual block number have not been allocated. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The virtual block number have not been allocated. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) { @@ -328,18 +323,16 @@ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) * Description: nilfs_dat_move() changes the block number associated with * @vblocknr to @blocknr. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) { struct buffer_head *entry_bh; struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); @@ -362,21 +355,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) } } - kaddr = kmap_local_page(entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); + entry = kmap_local_folio(entry_bh->b_folio, offset); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { nilfs_crit(dat->i_sb, "%s: invalid vblocknr = %llu, [%llu, %llu)", __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - kunmap_local(kaddr); + kunmap_local(entry); brelse(entry_bh); return -EINVAL; } WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_local(kaddr); + kunmap_local(entry); mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); @@ -393,24 +386,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) * @blocknrp: pointer to a block number * * Description: nilfs_dat_translate() maps the virtual block number @vblocknr - * to the corresponding block number. - * - * Return Value: On success, 0 is returned and the block number associated - * with @vblocknr is stored in the place pointed by @blocknrp. On error, one - * of the following negative error codes is returned. + * to the corresponding block number. The block number associated with + * @vblocknr is stored in the place pointed to by @blocknrp. * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A block number associated with @vblocknr does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A block number associated with @vblocknr does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) { struct buffer_head *entry_bh, *bh; struct nilfs_dat_entry *entry; sector_t blocknr; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); @@ -426,8 +416,8 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) } } - kaddr = kmap_local_page(entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); + entry = kmap_local_folio(entry_bh->b_folio, offset); blocknr = le64_to_cpu(entry->de_blocknr); if (blocknr == 0) { ret = -ENOENT; @@ -436,7 +426,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) *blocknrp = blocknr; out: - kunmap_local(kaddr); + kunmap_local(entry); brelse(entry_bh); return ret; } @@ -445,11 +435,12 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, size_t nvi) { struct buffer_head *entry_bh; - struct nilfs_dat_entry *entry; + struct nilfs_dat_entry *entry, *first_entry; struct nilfs_vinfo *vinfo = buf; __u64 first, last; - void *kaddr; + size_t offset; unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; + unsigned int entry_size = NILFS_MDT(dat)->mi_entry_size; int i, j, n, ret; for (i = 0; i < nvi; i += n) { @@ -457,23 +448,28 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_local_page(entry_bh->b_page); - /* last virtual block number in this block */ + first = vinfo->vi_vblocknr; first = div64_ul(first, entries_per_block); first *= entries_per_block; + /* first virtual block number in this block */ + last = first + entries_per_block - 1; + /* last virtual block number in this block */ + + offset = nilfs_palloc_entry_offset(dat, first, entry_bh); + first_entry = kmap_local_folio(entry_bh->b_folio, offset); for (j = i, n = 0; j < nvi && vinfo->vi_vblocknr >= first && vinfo->vi_vblocknr <= last; j++, n++, vinfo = (void *)vinfo + visz) { - entry = nilfs_palloc_block_get_entry( - dat, vinfo->vi_vblocknr, entry_bh, kaddr); + entry = (void *)first_entry + + (vinfo->vi_vblocknr - first) * entry_size; vinfo->vi_start = le64_to_cpu(entry->de_start); vinfo->vi_end = le64_to_cpu(entry->de_end); vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } - kunmap_local(kaddr); + kunmap_local(first_entry); brelse(entry_bh); } @@ -486,6 +482,8 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, * @entry_size: size of a dat entry * @raw_inode: on-disk dat inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_dat_read(struct super_block *sb, size_t entry_size, struct nilfs_inode *raw_inode, struct inode **inodep) diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index bc846b904b68..9b7f8e9655a2 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -70,7 +70,7 @@ static inline unsigned int nilfs_chunk_size(struct inode *inode) */ static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr) { - unsigned int last_byte = inode->i_size; + u64 last_byte = inode->i_size; last_byte -= page_nr << PAGE_SHIFT; if (last_byte > PAGE_SIZE) @@ -83,7 +83,7 @@ static int nilfs_prepare_chunk(struct folio *folio, unsigned int from, { loff_t pos = folio_pos(folio) + from; - return __block_write_begin(&folio->page, pos, to - from, nilfs_get_block); + return __block_write_begin(folio, pos, to - from, nilfs_get_block); } static void nilfs_commit_chunk(struct folio *folio, @@ -95,8 +95,8 @@ static void nilfs_commit_chunk(struct folio *folio, unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(&folio->page, from, to); - copied = block_write_end(NULL, mapping, pos, len, len, &folio->page, NULL); + nr_dirty = nilfs_page_count_clean_buffers(folio, from, to); + copied = block_write_end(NULL, mapping, pos, len, len, folio, NULL); if (pos + copied > dir->i_size) i_size_write(dir, pos + copied); if (IS_DIRSYNC(dir)) @@ -135,6 +135,9 @@ static bool nilfs_check_folio(struct folio *folio, char *kaddr) goto Enamelen; if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) goto Espan; + if (unlikely(p->inode && + NILFS_PRIVATE_INODE(le64_to_cpu(p->inode)))) + goto Einumber; } if (offs != limit) goto Eend; @@ -160,6 +163,9 @@ Enamelen: goto bad_entry; Espan: error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "disallowed inode number"; bad_entry: nilfs_error(sb, "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d", @@ -174,7 +180,6 @@ Eend: dir->i_ino, (folio->index << PAGE_SHIFT) + offs, (unsigned long)le64_to_cpu(p->inode)); fail: - folio_set_error(folio); return false; } @@ -226,37 +231,6 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) nilfs_rec_len_from_disk(p->rec_len)); } -static unsigned char -nilfs_filetype_table[NILFS_FT_MAX] = { - [NILFS_FT_UNKNOWN] = DT_UNKNOWN, - [NILFS_FT_REG_FILE] = DT_REG, - [NILFS_FT_DIR] = DT_DIR, - [NILFS_FT_CHRDEV] = DT_CHR, - [NILFS_FT_BLKDEV] = DT_BLK, - [NILFS_FT_FIFO] = DT_FIFO, - [NILFS_FT_SOCK] = DT_SOCK, - [NILFS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, -}; - -static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) -{ - umode_t mode = inode->i_mode; - - de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - static int nilfs_readdir(struct file *file, struct dir_context *ctx) { loff_t pos = ctx->pos; @@ -292,10 +266,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) if (de->inode) { unsigned char t; - if (de->file_type < NILFS_FT_MAX) - t = nilfs_filetype_table[de->file_type]; - else - t = DT_UNKNOWN; + t = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), t)) { @@ -318,7 +289,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) * The folio is mapped and unlocked. When the caller is finished with * the entry, it should call folio_release_kmap(). * - * On failure, returns NULL and the caller should ignore foliop. + * On failure, returns an error pointer and the caller should ignore foliop. */ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct folio **foliop) @@ -341,22 +312,24 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, do { char *kaddr = nilfs_get_folio(dir, n, foliop); - if (!IS_ERR(kaddr)) { - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, - "zero-length directory entry"); - folio_release_kmap(*foliop, kaddr); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - folio_release_kmap(*foliop, kaddr); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; /* next folio is past the blocks we've got */ @@ -369,7 +342,7 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: ei->i_dir_start_lookup = n; @@ -378,28 +351,56 @@ found: struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) { - struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop); + struct folio *folio; + struct nilfs_dir_entry *de, *next_de; + size_t limit; + char *msg; + de = nilfs_get_folio(dir, 0, &folio); if (IS_ERR(de)) return NULL; - return nilfs_next_entry(de); + + limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */ + if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino || + !nilfs_match(1, ".", de))) { + msg = "missing '.'"; + goto fail; + } + + next_de = nilfs_next_entry(de); + /* + * If "next_de" has not reached the end of the chunk, there is + * at least one more record. Check whether it matches "..". + */ + if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) || + !nilfs_match(2, "..", next_de))) { + msg = "missing '..'"; + goto fail; + } + *foliop = folio; + return next_de; + +fail: + nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); + folio_release_kmap(folio, de); + return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; struct folio *folio; de = nilfs_find_entry(dir, qstr, &folio); - if (de) { - res = le64_to_cpu(de->inode); - folio_release_kmap(folio, de); - } - return res; + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } -void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, struct folio *folio, struct inode *inode) { size_t from = offset_in_folio(folio, de); @@ -409,11 +410,15 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, folio_lock(folio); err = nilfs_prepare_chunk(folio, from, to); - BUG_ON(err); + if (unlikely(err)) { + folio_unlock(folio); + return err; + } de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); + return 0; } /* @@ -498,7 +503,7 @@ got_it: de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, folio->mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); nilfs_mark_inode_dirty(dir); @@ -542,7 +547,10 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct folio *folio) from = (char *)pde - kaddr; folio_lock(folio); err = nilfs_prepare_chunk(folio, from, to); - BUG_ON(err); + if (unlikely(err)) { + folio_unlock(folio); + goto out; + } if (pde) pde->rec_len = nilfs_rec_len_to_disk(to - from); dir->inode = 0; @@ -579,14 +587,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); kunmap_local(kaddr); nilfs_commit_chunk(folio, mapping, 0, chunk_size); fail: @@ -608,7 +616,7 @@ int nilfs_empty_dir(struct inode *inode) kaddr = nilfs_get_folio(inode, i, &folio); if (IS_ERR(kaddr)) - continue; + return 0; de = (struct nilfs_dir_entry *)kaddr; kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 893ab36824cc..2d8dc6b35b54 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index bf9a11d58817..561c220799c7 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -46,14 +46,11 @@ * specified by @pbn to the GC pagecache with the key @blkoff. * This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer. * - * Return Value: On success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The block specified with @pbn does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The block specified with @pbn does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, sector_t pbn, __u64 vbn, @@ -83,10 +80,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, goto out; } - if (!buffer_mapped(bh)) { - bh->b_bdev = inode->i_sb->s_bdev; + if (!buffer_mapped(bh)) set_buffer_mapped(bh); - } bh->b_blocknr = pbn; bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -116,12 +111,11 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, * specified by @vbn to the GC pagecache. @pbn can be supplied by the * caller to avoid translation of the disk block address. * - * Return Value: On success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Invalid virtual block address. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) @@ -165,7 +159,7 @@ int nilfs_init_gcinode(struct inode *inode) inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); - inode->i_mapping->a_ops = &empty_aops; + inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); @@ -175,6 +169,7 @@ int nilfs_init_gcinode(struct inode *inode) /** * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes + * @nilfs: NILFS filesystem instance */ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) { diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 612e609158b5..c4cd4a4dedd0 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -38,17 +38,16 @@ static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile) * @out_ino: pointer to a variable to store inode number * @out_bh: buffer_head contains newly allocated disk inode * - * Return Value: On success, 0 is returned and the newly allocated inode - * number is stored in the place pointed by @ino, and buffer_head pointer - * that contains newly allocated disk inode structure is stored in the - * place pointed by @out_bh - * On error, one of the following negative error codes is returned. + * nilfs_ifile_create_inode() allocates a new inode in the ifile metadata + * file and stores the inode number in the variable pointed to by @out_ino, + * as well as storing the ifile's buffer with the disk inode in the location + * pointed to by @out_bh. * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOSPC - No inode left. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No inode left. */ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct buffer_head **out_bh) @@ -56,13 +55,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct nilfs_palloc_req req; int ret; - req.pr_entry_nr = 0; /* - * 0 says find free inode from beginning - * of a group. dull code!! - */ + req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb); req.pr_entry_bh = NULL; - ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); + ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false); if (!ret) { ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, &req.pr_entry_bh); @@ -86,14 +82,11 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, * @ifile: ifile inode * @ino: inode number * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The inode number @ino have not been allocated. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Inode number unallocated. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) { @@ -101,7 +94,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) .pr_entry_nr = ino, .pr_entry_bh = NULL }; struct nilfs_inode *raw_inode; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_prepare_free_entry(ifile, &req); @@ -116,11 +109,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) return ret; } - kaddr = kmap_local_page(req.pr_entry_bh->b_page); - raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, - req.pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(ifile, req.pr_entry_nr, + req.pr_entry_bh); + raw_inode = kmap_local_folio(req.pr_entry_bh->b_folio, offset); raw_inode->i_flags = 0; - kunmap_local(kaddr); + kunmap_local(raw_inode); mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); @@ -153,6 +146,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, * @ifile: ifile inode * @nmaxinodes: current maximum of available inodes count [out] * @nfreeinodes: free inodes count [out] + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_ifile_count_free_inodes(struct inode *ifile, u64 *nmaxinodes, u64 *nfreeinodes) @@ -177,7 +172,8 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile, * @cno: number of checkpoint entry to read * @inode_size: size of an inode * - * Return: 0 on success, or the following negative error code on failure. + * Return: 0 on success, or one of the following negative error codes on + * failure: * * %-EINVAL - Invalid checkpoint. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error (including metadata corruption). diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 625545cc2a98..5d116a566d9e 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -21,9 +21,9 @@ static inline struct nilfs_inode * nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) { - void *kaddr = kmap_local_page(ibh->b_page); + size_t __offset_in_folio = nilfs_palloc_entry_offset(ifile, ino, ibh); - return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr); + return kmap_local_folio(ibh->b_folio, __offset_in_folio); } static inline void nilfs_ifile_unmap_inode(struct nilfs_inode *raw_inode) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7340a01d80e1..6613b8fcceb0 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -15,6 +15,7 @@ #include <linux/writeback.h> #include <linux/uio.h> #include <linux/fiemap.h> +#include <linux/random.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -28,17 +29,13 @@ * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag - * @for_btnc: inode for B-tree node cache flag - * @for_shadow: inode for shadowed page cache flag + * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - bool for_gc; - bool for_btnc; - bool for_shadow; + unsigned int type; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -71,6 +68,8 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) * * This function does not issue actual read request of the specified data * block. It is done by VFS. + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) @@ -144,6 +143,8 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, * address_space_operations. * @file: file struct of the file to be read * @folio: the folio to be read + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_read_folio(struct file *file, struct folio *folio) { @@ -162,7 +163,7 @@ static int nilfs_writepages(struct address_space *mapping, int err = 0; if (sb_rdonly(inode->i_sb)) { - nilfs_clear_dirty_pages(mapping, false); + nilfs_clear_dirty_pages(mapping); return -EROFS; } @@ -173,37 +174,6 @@ static int nilfs_writepages(struct address_space *mapping, return err; } -static int nilfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct folio *folio = page_folio(page); - struct inode *inode = folio->mapping->host; - int err; - - if (sb_rdonly(inode->i_sb)) { - /* - * It means that filesystem was remounted in read-only - * mode because of error or metadata corruption. But we - * have dirty pages that try to be flushed in background. - * So, here we simply discard this dirty page. - */ - nilfs_clear_folio_dirty(folio, false); - folio_unlock(folio); - return -EROFS; - } - - folio_redirty_for_writepage(wbc, folio); - folio_unlock(folio); - - if (wbc->sync_mode == WB_SYNC_ALL) { - err = nilfs_construct_segment(inode->i_sb); - if (unlikely(err)) - return err; - } else if (wbc->for_reclaim) - nilfs_flush_segment(inode->i_sb, inode->i_ino); - - return 0; -} - static bool nilfs_dirty_folio(struct address_space *mapping, struct folio *folio) { @@ -250,7 +220,7 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to) static int nilfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; @@ -259,7 +229,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - err = block_write_begin(mapping, pos, len, pagep, nilfs_get_block); + err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); if (unlikely(err)) { nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); @@ -269,16 +239,16 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, static int nilfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; unsigned int start = pos & (PAGE_SIZE - 1); unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(page, start, + nr_dirty = nilfs_page_count_clean_buffers(folio, start, start + copied); - copied = generic_write_end(file, mapping, pos, len, copied, page, + copied = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); @@ -298,7 +268,6 @@ nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } const struct address_space_operations nilfs_aops = { - .writepage = nilfs_writepage, .read_folio = nilfs_read_folio, .writepages = nilfs_writepages, .dirty_folio = nilfs_dirty_folio, @@ -307,16 +276,20 @@ const struct address_space_operations nilfs_aops = { .write_end = nilfs_write_end, .invalidate_folio = block_invalidate_folio, .direct_IO = nilfs_direct_IO, + .migrate_folio = buffer_migrate_folio_norefs, .is_partially_uptodate = block_is_partially_uptodate, }; +const struct address_space_operations nilfs_buffer_cache_aops = { + .invalidate_folio = block_invalidate_folio, +}; + static int nilfs_insert_inode_locked(struct inode *inode, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -325,7 +298,6 @@ static int nilfs_insert_inode_locked(struct inode *inode, struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; @@ -343,25 +315,13 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = BIT(NILFS_I_NEW); + ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - - if (unlikely(ino < NILFS_USER_INO)) { - nilfs_warn(sb, - "inode bitmap is inconsistent for reserved inodes"); - do { - brelse(bh); - err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); - if (unlikely(err)) - goto failed_ifile_create_inode; - } while (ino < NILFS_USER_INO); - - nilfs_info(sb, "repaired inode bitmap for reserved inodes"); - } ii->i_bh = bh; atomic64_inc(&root->inodes_count); @@ -385,9 +345,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&nilfs->ns_next_gen_lock); - inode->i_generation = nilfs->ns_next_generation++; - spin_unlock(&nilfs->ns_next_gen_lock); + inode->i_generation = get_random_u32(); if (nilfs_insert_inode_locked(inode, root, ino) < 0) { err = -EIO; goto failed_after_creation; @@ -546,23 +504,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); - if (test_bit(NILFS_I_BTNC, &ii->i_state)) { - if (!args->for_btnc) - return 0; - } else if (args->for_btnc) { - return 0; - } - if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { - if (!args->for_shadow) - return 0; - } else if (args->for_shadow) { + if (ii->i_type != args->type) return 0; - } - - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; - return args->for_gc && args->cno == ii->i_cno; + return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) @@ -572,15 +517,9 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) inode->i_ino = args->ino; NILFS_I(inode)->i_cno = args->cno; NILFS_I(inode)->i_root = args->root; + NILFS_I(inode)->i_type = args->type; if (args->root && args->ino == NILFS_ROOT_INO) nilfs_get_root(args->root); - - if (args->for_gc) - NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - if (args->for_btnc) - NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); - if (args->for_shadow) - NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -588,8 +527,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -599,8 +537,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -615,8 +552,14 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, inode = nilfs_iget_locked(sb, root, ino); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + + if (!(inode->i_state & I_NEW)) { + if (!inode->i_nlink) { + iput(inode); + return ERR_PTR(-ESTALE); + } return inode; + } err = __nilfs_read_inode(sb, root, ino, inode); if (unlikely(err)) { @@ -631,8 +574,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; @@ -660,10 +602,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, * or does nothing if the inode already has it. This function allocates * an additional inode to maintain page cache of B-tree nodes one-on-one. * - * Return Value: On success, 0 is returned. On errors, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or %-ENOMEM if memory is insufficient. */ int nilfs_attach_btree_node_cache(struct inode *inode) { @@ -677,9 +616,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.ino = inode->i_ino; args.root = ii->i_root; args.cno = ii->i_cno; - args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; - args.for_btnc = true; - args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + args.type = ii->i_type | NILFS_I_TYPE_BTNC; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -724,17 +661,14 @@ void nilfs_detach_btree_node_cache(struct inode *inode) * in one inode and the one for b-tree node pages is set up in the * other inode, which is attached to the former inode. * - * Return Value: On success, a pointer to the inode for data pages is - * returned. On errors, one of the following negative error code is returned - * in a pointer type. - * - * %-ENOMEM - Insufficient memory available. + * Return: a pointer to the inode for data pages on success, or %-ENOMEM + * if memory is insufficient. */ struct inode *nilfs_iget_for_shadow(struct inode *inode) { struct nilfs_iget_args args = { - .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = true + .ino = inode->i_ino, .root = NULL, .cno = 0, + .type = NILFS_I_TYPE_SHADOW }; struct inode *s_inode; int err; @@ -749,6 +683,7 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode) NILFS_I(s_inode)->i_flags = 0; memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); + s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; err = nilfs_attach_btree_node_cache(s_inode); if (unlikely(err)) { @@ -900,7 +835,7 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + if (!(ii->i_type & NILFS_I_TYPE_BTNC)) nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) @@ -1251,7 +1186,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (size) { if (phys && blkphy << blkbits == phys + size) { /* The current extent goes on */ - size += n << blkbits; + size += (u64)n << blkbits; } else { /* Terminate the current extent */ ret = fiemap_fill_next_extent( @@ -1264,14 +1199,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } } else { /* Start a new extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } blkoff += n; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index f1a01c191cf5..a66d62a51f77 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -17,6 +17,7 @@ #include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */ #include <linux/buffer_head.h> #include <linux/fileattr.h> +#include <linux/string.h> #include "nilfs.h" #include "segment.h" #include "bmap.h" @@ -32,17 +33,14 @@ * @dofunc: concrete function of get/set metadata info * * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of - * calling dofunc() function on the basis of @argv argument. - * - * Return Value: On success, 0 is returned and requested metadata info - * is copied into userspace. On error, one of the following - * negative error codes is returned. - * - * %-EINVAL - Invalid arguments from userspace. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during execution of requested operation. + * calling dofunc() function on the basis of @argv argument. If successful, + * the requested metadata information is copied to userspace memory. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EINVAL - Invalid arguments from userspace. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, @@ -60,7 +58,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_nmembs == 0) return 0; - if (argv->v_size > PAGE_SIZE) + if ((size_t)argv->v_size > PAGE_SIZE) return -EINVAL; /* @@ -114,7 +112,11 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, } /** - * nilfs_fileattr_get - ioctl to support lsattr + * nilfs_fileattr_get - retrieve miscellaneous file attributes + * @dentry: the object to retrieve from + * @fa: fileattr pointer + * + * Return: always 0 as success. */ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) { @@ -126,7 +128,12 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } /** - * nilfs_fileattr_set - ioctl to support chattr + * nilfs_fileattr_set - change miscellaneous file attributes + * @idmap: idmap of the mount + * @dentry: the object to change + * @fa: fileattr pointer + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) @@ -159,6 +166,10 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap, /** * nilfs_ioctl_getversion - get info about a file's version (generation number) + * @inode: inode object + * @argp: userspace memory where the generation number of @inode is stored + * + * Return: 0 on success, or %-EFAULT on error. */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { @@ -176,13 +187,10 @@ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) * given checkpoint between checkpoint and snapshot state. This ioctl * is used in chcp and mkcp utilities. * - * Return Value: On success, 0 is returned and mode of a checkpoint is - * changed. On error, one of the following negative error codes - * is returned. - * - * %-EPERM - Operation not permitted. - * - * %-EFAULT - Failure during checkpoint mode changing. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * %-EFAULT - Failure during checkpoint mode changing. + * %-EPERM - Operation not permitted. */ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -230,13 +238,10 @@ out: * checkpoint from NILFS2 file system. This ioctl is used in rmcp * utility. * - * Return Value: On success, 0 is returned and a checkpoint is - * removed. On error, one of the following negative error codes - * is returned. - * - * %-EPERM - Operation not permitted. - * - * %-EFAULT - Failure during checkpoint removing. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * %-EFAULT - Failure during checkpoint removing. + * %-EPERM - Operation not permitted. */ static int nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, @@ -282,7 +287,7 @@ out: * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in * lscp utility and by nilfs_cleanerd daemon. * - * Return value: count of nilfs_cpinfo structures in output buffer. + * Return: Count of nilfs_cpinfo structures in output buffer. */ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -306,17 +311,14 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, * * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints. * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities - * and by nilfs_cleanerd daemon. - * - * Return Value: On success, 0 is returned, and checkpoints information is - * copied into userspace pointer @argp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during getting checkpoints statistics. + * and by nilfs_cleanerd daemon. The checkpoint statistics are copied to + * the userspace memory pointed to by @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting checkpoints statistics. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -349,7 +351,8 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon. * - * Return value: count of nilfs_suinfo structures in output buffer. + * Return: Count of nilfs_suinfo structures in output buffer on success, + * or a negative error code on failure. */ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -373,17 +376,14 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, * * Description: nilfs_ioctl_get_sustat() returns segment usage statistics. * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities - * and by nilfs_cleanerd daemon. - * - * Return Value: On success, 0 is returned, and segment usage information is - * copied into userspace pointer @argp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during getting segment usage statistics. + * and by nilfs_cleanerd daemon. The requested segment usage information is + * copied to the userspace memory pointed to by @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting segment usage statistics. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -416,7 +416,8 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used * by nilfs_cleanerd daemon. * - * Return value: count of nilfs_vinfo structures in output buffer. + * Return: Count of nilfs_vinfo structures in output buffer on success, or + * a negative error code on failure. */ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -443,7 +444,8 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl * is used by nilfs_cleanerd daemon. * - * Return value: count of nilfs_bdescs structures in output buffer. + * Return: Count of nilfs_bdescs structures in output buffer on success, or + * a negative error code on failure. */ static ssize_t nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, @@ -480,19 +482,15 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, * * Description: nilfs_ioctl_do_get_bdescs() function returns information * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl - * is used by nilfs_cleanerd daemon. - * - * Return Value: On success, 0 is returned, and disk block descriptors are - * copied into userspace pointer @argp. On error, one of the following - * negative error codes is returned. - * - * %-EINVAL - Invalid arguments from userspace. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during getting disk block descriptors. + * is used by nilfs_cleanerd daemon. If successful, disk block descriptors + * are copied to userspace pointer @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting disk block descriptors. + * * %-EINVAL - Invalid arguments from userspace. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -526,16 +524,12 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, * Description: nilfs_ioctl_move_inode_block() function registers data/node * buffer in the GC pagecache and submit read request. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - Requested block doesn't exist. - * - * %-EEXIST - Blocks conflict is detected. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - Block conflict detected. + * * %-EIO - I/O error. + * * %-ENOENT - Requested block doesn't exist. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_move_inode_block(struct inode *inode, struct nilfs_vdesc *vdesc, @@ -590,8 +584,8 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, * blocks that garbage collector specified with the array of nilfs_vdesc * structures and stores them into page caches of GC inodes. * - * Return Value: Number of processed nilfs_vdesc structures or - * error code, otherwise. + * Return: Number of processed nilfs_vdesc structures on success, or + * a negative error code on failure. */ static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) @@ -668,14 +662,11 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, * in the period from p_start to p_end, excluding p_end itself. The checkpoints * which have been already deleted are ignored. * - * Return Value: Number of processed nilfs_period structures or - * error code, otherwise. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - invalid checkpoints. + * Return: Number of processed nilfs_period structures on success, or one of + * the following negative error codes on failure: + * * %-EINVAL - invalid checkpoints. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) @@ -703,14 +694,11 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, * Description: nilfs_ioctl_free_vblocknrs() function frees * the virtual block numbers specified by @buf and @argv->v_nmembs. * - * Return Value: Number of processed virtual block numbers or - * error code, otherwise. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The virtual block number have not been allocated. + * Return: Number of processed virtual block numbers on success, or one of the + * following negative error codes on failure: + * * %-EIO - I/O error. + * * %-ENOENT - Unallocated virtual block number. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) @@ -732,14 +720,11 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, * Description: nilfs_ioctl_mark_blocks_dirty() function marks * metadata file or data blocks as dirty. * - * Return Value: Number of processed block descriptors or - * error code, otherwise. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error - * - * %-ENOENT - the specified block does not exist (hole block) + * Return: Number of processed block descriptors on success, or one of the + * following negative error codes on failure: + * * %-EIO - I/O error. + * * %-ENOENT - Non-existent block (hole block). + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) @@ -838,7 +823,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by * nilfs_cleanerd daemon. * - * Return Value: On success, 0 is returned or error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -962,20 +947,14 @@ out: * and metadata are written out to the device when it successfully * returned. * - * Return Value: On success, 0 is retured. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EFAULT - Failure during execution of requested operation. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -1009,7 +988,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, * @filp: file object * @argp: pointer on argument from userspace * - * Return Value: On success, 0 is returned or error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp, void __user *argp) @@ -1045,7 +1024,7 @@ out: * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which * performs the actual trim operation. * - * Return Value: On success, 0 is returned or negative error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) { @@ -1087,7 +1066,7 @@ static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) * of segments in bytes and upper limit of segments in bytes. * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility. * - * Return Value: On success, 0 is returned or error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) { @@ -1138,17 +1117,15 @@ out: * @dofunc: concrete function of getting metadata info * * Description: nilfs_ioctl_get_info() gets metadata info by means of - * calling dofunc() function. - * - * Return Value: On success, 0 is returned and requested metadata info - * is copied into userspace. On error, one of the following - * negative error codes is returned. - * - * %-EINVAL - Invalid arguments from userspace. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EFAULT - Failure during execution of requested operation. + * calling dofunc() function. The requested metadata information is copied + * to userspace memory @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EINVAL - Invalid arguments from userspace. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, @@ -1188,18 +1165,14 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, * encapsulated in nilfs_argv and updates the segment usage info * according to the flags in nilfs_suinfo_update. * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EPERM - Not enough permissions - * - * %-EFAULT - Error copying input data - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - Block conflict detected. + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Invalid values in input (segment number, flags or nblocks). + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-EPERM - Not enough permissions. */ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -1266,6 +1239,92 @@ out: return ret; } +/** + * nilfs_ioctl_get_fslabel - get the volume name of the file system + * @sb: super block instance + * @argp: pointer to userspace memory where the volume name should be stored + * + * Return: 0 on success, %-EFAULT if copying to userspace memory fails. + */ +static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + char label[NILFS_MAX_VOLUME_NAME + 1]; + + BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX); + + down_read(&nilfs->ns_sem); + memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name); + up_read(&nilfs->ns_sem); + + if (copy_to_user(argp, label, sizeof(label))) + return -EFAULT; + return 0; +} + +/** + * nilfs_ioctl_set_fslabel - set the volume name of the file system + * @sb: super block instance + * @filp: file object + * @argp: pointer to userspace memory that contains the volume name + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Label length exceeds record size in superblock. + * * %-EIO - I/O error. + * * %-EPERM - Operation not permitted (insufficient permissions). + * * %-EROFS - Read only file system. + */ +static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp, + void __user *argp) +{ + char label[NILFS_MAX_VOLUME_NAME + 1]; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + size_t len; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) { + ret = -EFAULT; + goto out_drop_write; + } + + len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1); + if (len > NILFS_MAX_VOLUME_NAME) { + nilfs_err(sb, "unable to set label with more than %zu bytes", + NILFS_MAX_VOLUME_NAME); + ret = -EINVAL; + goto out_drop_write; + } + + down_write(&nilfs->ns_sem); + sbp = nilfs_prepare_super(sb, false); + if (unlikely(!sbp)) { + ret = -EIO; + goto out_unlock; + } + + strtomem_pad(sbp[0]->s_volume_name, label, 0); + if (sbp[1]) + strtomem_pad(sbp[1]->s_volume_name, label, 0); + + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + +out_unlock: + up_write(&nilfs->ns_sem); +out_drop_write: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1308,6 +1367,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_set_alloc_range(inode, argp); case FITRIM: return nilfs_ioctl_trim_fs(inode, argp); + case FS_IOC_GETFSLABEL: + return nilfs_ioctl_get_fslabel(inode->i_sb, argp); + case FS_IOC_SETFSLABEL: + return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp); default: return -ENOTTY; } @@ -1334,6 +1397,8 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 4f792a0ad0f0..946b0d3534a5 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -33,7 +33,8 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, struct buffer_head *, void *)) { struct nilfs_inode_info *ii = NILFS_I(inode); - void *kaddr; + struct folio *folio = bh->b_folio; + void *from; int ret; /* Caller exclude read accesses using page lock */ @@ -47,12 +48,14 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_local_page(bh->b_page); - memset(kaddr + bh_offset(bh), 0, i_blocksize(inode)); + /* Initialize block (block size > PAGE_SIZE not yet supported) */ + from = kmap_local_folio(folio, offset_in_folio(folio, bh->b_data)); + memset(from, 0, bh->b_size); if (init_block) - init_block(inode, bh, kaddr); - flush_dcache_page(bh->b_page); - kunmap_local(kaddr); + init_block(inode, bh, from); + kunmap_local(from); + + flush_dcache_folio(folio); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -89,7 +92,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, if (buffer_uptodate(bh)) goto failed_bh; - bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); @@ -224,20 +226,21 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, * @out_bh: output of a pointer to the buffer_head * * nilfs_mdt_get_block() looks up the specified buffer and tries to create - * a new buffer if @create is not zero. On success, the returned buffer is - * assured to be either existing or formatted using a buffer lock on success. - * @out_bh is substituted only when zero is returned. - * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * a new buffer if @create is not zero. If (and only if) this function + * succeeds, it stores a pointer to the retrieved buffer head in the location + * pointed to by @out_bh. * - * %-EIO - I/O error + * The retrieved buffer may be either an existing one or a newly allocated one. + * For a newly created buffer, if the callback function argument @init_block + * is non-NULL, the callback will be called with the buffer locked to format + * the block. * - * %-ENOENT - the specified block does not exist (hole block) - * - * %-EROFS - Read only filesystem (for create mode) + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The specified block does not exist (hole block). + * * %-ENOMEM - Insufficient memory available. + * * %-EROFS - Read only filesystem (for create mode). */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, void (*init_block)(struct inode *, @@ -273,14 +276,11 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, * @out_bh, and block offset to @blkoff, respectively. @out_bh and * @blkoff are substituted only when zero is returned. * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error - * - * %-ENOENT - no block was found in the range + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No block was found in the range. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_mdt_find_block(struct inode *inode, unsigned long start, unsigned long end, unsigned long *blkoff, @@ -319,12 +319,11 @@ out: * @inode: inode of the meta data file * @block: block offset * - * Return Value: On success, zero is returned. - * On error, one of the following negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Non-existent block. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { @@ -347,12 +346,10 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and * tries to release the page including the buffer from a page cache. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EBUSY - page has an active buffer. - * - * %-ENOENT - page cache has no page addressed by the offset. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Page has an active buffer. + * * %-ENOENT - Page cache has no page addressed by the offset. */ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) { @@ -396,10 +393,9 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) return test_bit(NILFS_I_DIRTY, &ii->i_state); } -static int -nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) +static int nilfs_mdt_write_folio(struct folio *folio, + struct writeback_control *wbc) { - struct folio *folio = page_folio(page); struct inode *inode = folio->mapping->host; struct super_block *sb; int err = 0; @@ -411,7 +407,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) * have dirty folios that try to be flushed in background. * So, here we simply discard this dirty folio. */ - nilfs_clear_folio_dirty(folio, false); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); return -EROFS; } @@ -426,17 +422,27 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); - else if (wbc->for_reclaim) - nilfs_flush_segment(sb, inode->i_ino); return err; } +static int nilfs_mdt_writeback(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct folio *folio = NULL; + int error; + + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + error = nilfs_mdt_write_folio(folio, wbc); + + return error; +} static const struct address_space_operations def_mdt_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, - .writepage = nilfs_mdt_write_page, + .writepages = nilfs_mdt_writeback, + .migrate_folio = buffer_migrate_folio_norefs, }; static const struct inode_operations def_mdt_iops; @@ -511,6 +517,8 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size, * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file * @inode: inode of the metadata file * @shadow: shadow mapping + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) @@ -532,6 +540,8 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, /** * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map * @inode: inode of the metadata file + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_mdt_save_to_shadow_map(struct inode *inode) { @@ -571,7 +581,8 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) if (!bh_frozen) bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0); - bh_frozen = get_nth_bh(bh_frozen, bh_offset(bh) >> blkbits); + bh_frozen = get_nth_bh(bh_frozen, + offset_in_folio(folio, bh->b_data) >> blkbits); if (!buffer_uptodate(bh_frozen)) nilfs_copy_buffer(bh_frozen, bh); @@ -601,7 +612,8 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) if (!IS_ERR(folio)) { bh_frozen = folio_buffers(folio); if (bh_frozen) { - n = bh_offset(bh) >> inode->i_blkbits; + n = offset_in_folio(folio, bh->b_data) >> + inode->i_blkbits; bh_frozen = get_nth_bh(bh_frozen, n); } folio_unlock(folio); @@ -638,10 +650,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_clear_dirty_pages(inode->i_mapping); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index c950139db6ef..40f4b1a28705 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -55,12 +55,25 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + if (inode == ERR_PTR(-ESTALE)) { + nilfs_error(dir->i_sb, + "deleted inode referenced: %lu", ino); + return ERR_PTR(-EIO); + } + } + return d_splice_alias(inode, dentry); } @@ -149,6 +162,9 @@ static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir, /* slow symlink */ inode->i_op = &nilfs_symlink_inode_operations; inode_nohighmem(inode); + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_constraint(inode->i_mapping, + ~__GFP_FS)); inode->i_mapping->a_ops = &nilfs_aops; err = page_symlink(inode, symname, l); if (err) @@ -202,8 +218,8 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, return err; } -static int nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; struct nilfs_transaction_info ti; @@ -211,7 +227,7 @@ static int nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) - return err; + return ERR_PTR(err); inc_nlink(dir); @@ -242,7 +258,7 @@ out: else nilfs_transaction_abort(dir->i_sb); - return err; + return ERR_PTR(err); out_fail: drop_nlink(inode); @@ -263,10 +279,11 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) struct folio *folio; int err; - err = -ENOENT; de = nilfs_find_entry(dir, &dentry->d_name, &folio); - if (!de) + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } inode = d_inode(dentry); err = -EIO; @@ -353,6 +370,7 @@ static int nilfs_rename(struct mnt_idmap *idmap, struct folio *old_folio; struct nilfs_dir_entry *old_de; struct nilfs_transaction_info ti; + bool old_is_dir = S_ISDIR(old_inode->i_mode); int err; if (flags & ~RENAME_NOREPLACE) @@ -362,12 +380,13 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (unlikely(err)) return err; - err = -ENOENT; old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); - if (!old_de) + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } - if (S_ISDIR(old_inode->i_mode)) { + if (old_is_dir && old_dir != new_dir) { err = -EIO; dir_de = nilfs_dotdot(old_inode, &dir_folio); if (!dir_de) @@ -379,18 +398,22 @@ static int nilfs_rename(struct mnt_idmap *idmap, struct nilfs_dir_entry *new_de; err = -ENOTEMPTY; - if (dir_de && !nilfs_empty_dir(new_inode)) + if (old_is_dir && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); goto out_dir; - nilfs_set_link(new_dir, new_de, new_folio, old_inode); + } + err = nilfs_set_link(new_dir, new_de, new_folio, old_inode); folio_release_kmap(new_folio, new_de); + if (unlikely(err)) + goto out_dir; nilfs_mark_inode_dirty(new_dir); inode_set_ctime_current(new_inode); - if (dir_de) + if (old_is_dir) drop_nlink(new_inode); drop_nlink(new_inode); nilfs_mark_inode_dirty(new_inode); @@ -398,7 +421,7 @@ static int nilfs_rename(struct mnt_idmap *idmap, err = nilfs_add_link(new_dentry, old_inode); if (err) goto out_dir; - if (dir_de) { + if (old_is_dir) { inc_nlink(new_dir); nilfs_mark_inode_dirty(new_dir); } @@ -410,28 +433,28 @@ static int nilfs_rename(struct mnt_idmap *idmap, */ inode_set_ctime_current(old_inode); - nilfs_delete_entry(old_de, old_folio); - - if (dir_de) { - nilfs_set_link(old_inode, dir_de, dir_folio, new_dir); - folio_release_kmap(dir_folio, dir_de); - drop_nlink(old_dir); + err = nilfs_delete_entry(old_de, old_folio); + if (likely(!err)) { + if (old_is_dir) { + if (old_dir != new_dir) + err = nilfs_set_link(old_inode, dir_de, + dir_folio, new_dir); + drop_nlink(old_dir); + } + nilfs_mark_inode_dirty(old_dir); } - folio_release_kmap(old_folio, old_de); - - nilfs_mark_inode_dirty(old_dir); nilfs_mark_inode_dirty(old_inode); - err = nilfs_transaction_commit(old_dir->i_sb); - return err; - out_dir: if (dir_de) folio_release_kmap(dir_folio, dir_de); out_old: folio_release_kmap(old_folio, old_de); out: - nilfs_transaction_abort(old_dir->i_sb); + if (likely(!err)) + err = nilfs_transaction_commit(old_dir->i_sb); + else + nilfs_transaction_abort(old_dir->i_sb); return err; } @@ -440,12 +463,13 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); root = NILFS_I(d_inode(child))->i_root; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 2e29b98ba8ba..cb6ed54accd7 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -22,6 +22,7 @@ /** * struct nilfs_inode_info - nilfs inode data in memory * @i_flags: inode flags + * @i_type: inode type (combination of flags that inidicate usage) * @i_state: dynamic state flags * @i_bmap: pointer on i_bmap_data * @i_bmap_data: raw block mapping @@ -37,6 +38,7 @@ */ struct nilfs_inode_info { __u32 i_flags; + unsigned int i_type; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; struct nilfs_bmap i_bmap_data; @@ -90,9 +92,16 @@ enum { NILFS_I_UPDATED, /* The file has been written back */ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_BTNC, /* inode for btree node cache */ - NILFS_I_SHADOW, /* inode for shadowed page cache */ +}; + +/* + * Flags to identify the usage of on-memory inodes (i_type) + */ +enum { + NILFS_I_TYPE_NORMAL = 0, + NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */ + NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */ + NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */ }; /* @@ -103,6 +112,18 @@ enum { NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; +/** + * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a + * file system volume name + * + * Defined by the size of the volume name field in the on-disk superblocks. + * This volume name does not include the terminating NULL byte if the string + * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the + * size of the buffer that requires a NULL byte termination. + */ +#define NILFS_MAX_VOLUME_NAME \ + sizeof_field(struct nilfs_super_block, s_volume_name) + /* * Macros to check inode numbers */ @@ -116,9 +137,15 @@ enum { #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino))) + ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino))) + ((ino) >= NILFS_FIRST_INO(sb) || \ + ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + +#define NILFS_PRIVATE_INODE(ino) ({ \ + ino_t __ino = (ino); \ + ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \ + (__ino) != NILFS_SKETCH_INO); }) /** * struct nilfs_transaction_info: context information for synchronization @@ -227,15 +254,15 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) /* dir.c */ int nilfs_add_link(struct dentry *, struct inode *); -ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); int nilfs_make_empty(struct inode *, struct inode *); struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, struct folio **); int nilfs_delete_entry(struct nilfs_dir_entry *, struct folio *); int nilfs_empty_dir(struct inode *); struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct folio **); -void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, - struct folio *, struct inode *); +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct folio *folio, struct inode *inode); /* file.c */ extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); @@ -335,8 +362,8 @@ void __nilfs_error(struct super_block *sb, const char *function, extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); -extern int nilfs_store_magic_and_option(struct super_block *, - struct nilfs_super_block *, char *); +extern int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp); extern int nilfs_check_feature_compatibility(struct super_block *, struct nilfs_super_block *); extern void nilfs_set_log_cursor(struct nilfs_super_block *, @@ -374,6 +401,7 @@ extern const struct file_operations nilfs_dir_operations; extern const struct inode_operations nilfs_file_inode_operations; extern const struct file_operations nilfs_file_operations; extern const struct address_space_operations nilfs_aops; +extern const struct address_space_operations nilfs_buffer_cache_aops; extern const struct inode_operations nilfs_dir_inode_operations; extern const struct inode_operations nilfs_special_inode_operations; extern const struct inode_operations nilfs_symlink_inode_operations; diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 14e470fb8870..806b056d2260 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -39,7 +39,6 @@ static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); bh = get_nth_bh(bh, block - first_block); - touch_buffer(bh); wait_on_buffer(bh); return bh; } @@ -64,6 +63,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, folio_put(folio); return NULL; } + bh->b_bdev = inode->i_sb->s_bdev; return bh; } @@ -77,7 +77,8 @@ void nilfs_forget_buffer(struct buffer_head *bh) const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); @@ -98,16 +99,16 @@ void nilfs_forget_buffer(struct buffer_head *bh) */ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) { - void *kaddr0, *kaddr1; + void *saddr, *daddr; unsigned long bits; - struct page *spage = sbh->b_page, *dpage = dbh->b_page; + struct folio *sfolio = sbh->b_folio, *dfolio = dbh->b_folio; struct buffer_head *bh; - kaddr0 = kmap_local_page(spage); - kaddr1 = kmap_local_page(dpage); - memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); - kunmap_local(kaddr1); - kunmap_local(kaddr0); + saddr = kmap_local_folio(sfolio, bh_offset(sbh)); + daddr = kmap_local_folio(dfolio, bh_offset(dbh)); + memcpy(daddr, saddr, sbh->b_size); + kunmap_local(daddr); + kunmap_local(saddr); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; @@ -121,21 +122,20 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) unlock_buffer(bh); } if (bits & BIT(BH_Uptodate)) - SetPageUptodate(dpage); + folio_mark_uptodate(dfolio); else - ClearPageUptodate(dpage); + folio_clear_uptodate(dfolio); if (bits & BIT(BH_Mapped)) - SetPageMappedToDisk(dpage); + folio_set_mappedtodisk(dfolio); else - ClearPageMappedToDisk(dpage); + folio_clear_mappedtodisk(dfolio); } /** * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not. * @folio: Folio to be checked. * - * nilfs_folio_buffers_clean() returns false if the folio has dirty buffers. - * Otherwise, it returns true. + * Return: false if the folio has dirty buffers, true otherwise. */ bool nilfs_folio_buffers_clean(struct folio *folio) { @@ -262,7 +262,7 @@ repeat: NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); dfolio = filemap_grab_folio(dmap, folio->index); - if (unlikely(IS_ERR(dfolio))) { + if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ folio_unlock(folio); err = PTR_ERR(dfolio); @@ -357,9 +357,8 @@ repeat: /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) +void nilfs_clear_dirty_pages(struct address_space *mapping) { struct folio_batch fbatch; unsigned int i; @@ -380,7 +379,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) * was acquired. Skip processing in that case. */ if (likely(folio->mapping == mapping)) - nilfs_clear_folio_dirty(folio, silent); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); } @@ -392,54 +391,67 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) /** * nilfs_clear_folio_dirty - discard dirty folio * @folio: dirty folio that will be discarded - * @silent: suppress [true] or print [false] warning messages + * + * nilfs_clear_folio_dirty() clears working states including dirty state for + * the folio and its buffers. If the folio has buffers, clear only if it is + * confirmed that none of the buffer heads are busy (none have valid + * references and none are locked). */ -void nilfs_clear_folio_dirty(struct folio *folio, bool silent) +void nilfs_clear_folio_dirty(struct folio *folio) { - struct inode *inode = folio->mapping->host; - struct super_block *sb = inode->i_sb; struct buffer_head *bh, *head; BUG_ON(!folio_test_locked(folio)); - if (!silent) - nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", - folio_pos(folio), inode->i_ino); - - folio_clear_uptodate(folio); - folio_clear_mappedtodisk(folio); - head = folio_buffers(folio); if (head) { const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); + bool busy, invalidated = false; +recheck_buffers: + busy = false; bh = head; do { - lock_buffer(bh); - if (!silent) - nilfs_warn(sb, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); + if (atomic_read(&bh->b_count) | buffer_locked(bh)) { + busy = true; + break; + } + } while (bh = bh->b_this_page, bh != head); + if (busy) { + if (invalidated) + return; + invalidate_bh_lrus(); + invalidated = true; + goto recheck_buffers; + } + + bh = head; + do { + lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); } + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); + folio_clear_checked(folio); __nilfs_clear_folio_dirty(folio); } -unsigned int nilfs_page_count_clean_buffers(struct page *page, +unsigned int nilfs_page_count_clean_buffers(struct folio *folio, unsigned int from, unsigned int to) { unsigned int block_start, block_end; struct buffer_head *bh, *head; unsigned int nc = 0; - for (bh = head = page_buffers(page), block_start = 0; + for (bh = head = folio_buffers(folio), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + bh->b_size; @@ -487,8 +499,9 @@ void __nilfs_clear_folio_dirty(struct folio *folio) * This function searches an extent of buffers marked "delayed" which * starts from a block offset equal to or larger than @start_blk. If * such an extent was found, this will store the start offset in - * @blkoff and return its length in blocks. Otherwise, zero is - * returned. + * @blkoff and return its length in blocks. + * + * Return: Length in blocks of found extent, 0 otherwise. */ unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 7e1a2c455a10..136cd1c143c9 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -41,10 +41,10 @@ void nilfs_folio_bug(struct folio *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_folio_dirty(struct folio *, bool); -void nilfs_clear_dirty_pages(struct address_space *, bool); -unsigned int nilfs_page_count_clean_buffers(struct page *, unsigned int, - unsigned int); +void nilfs_clear_folio_dirty(struct folio *folio); +void nilfs_clear_dirty_pages(struct address_space *mapping); +unsigned int nilfs_page_count_clean_buffers(struct folio *folio, + unsigned int from, unsigned int to); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, sector_t *blkoff); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 49a70c68bf3c..22aecf6e2344 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -88,6 +88,8 @@ static int nilfs_warn_segment_error(struct super_block *sb, int err) * @check_bytes: number of bytes to be checked * @start: DBN of start block * @nblock: number of blocks to be checked + * + * Return: 0 on success, or %-EIO if an I/O error occurs. */ static int nilfs_compute_checksum(struct the_nilfs *nilfs, struct buffer_head *bhs, u32 *sum, @@ -126,6 +128,11 @@ static int nilfs_compute_checksum(struct the_nilfs *nilfs, * @sr_block: disk block number of the super root block * @pbh: address of a buffer_head pointer to return super root buffer * @check: CRC check flag + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Super root block corrupted. + * * %-EIO - I/O error. */ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, struct buffer_head **pbh, int check) @@ -176,6 +183,8 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, * @nilfs: nilfs object * @start_blocknr: start block number of the log * @sum: pointer to return segment summary structure + * + * Return: Buffer head pointer, or NULL if an I/O error occurs. */ static struct buffer_head * nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, @@ -195,6 +204,13 @@ nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, * @seg_seq: sequence number of segment * @bh_sum: buffer head of summary block * @sum: segment summary struct + * + * Return: 0 on success, or one of the following internal codes on failure: + * * %NILFS_SEG_FAIL_MAGIC - Magic number mismatch. + * * %NILFS_SEG_FAIL_SEQ - Sequence number mismatch. + * * %NIFLS_SEG_FAIL_CONSISTENCY - Block count out of range. + * * %NILFS_SEG_FAIL_IO - I/O error. + * * %NILFS_SEG_FAIL_CHECKSUM_FULL - Full log checksum verification failed. */ static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq, struct buffer_head *bh_sum, @@ -238,6 +254,9 @@ out: * @pbh: the current buffer head on summary blocks [in, out] * @offset: the current byte offset on summary blocks [in, out] * @bytes: byte size of the item to be read + * + * Return: Kernel space address of current segment summary entry, or + * NULL if an I/O error occurs. */ static void *nilfs_read_summary_info(struct the_nilfs *nilfs, struct buffer_head **pbh, @@ -300,6 +319,11 @@ static void nilfs_skip_summary_info(struct the_nilfs *nilfs, * @start_blocknr: start block number of the log * @sum: log summary information * @head: list head to add nilfs_recovery_block struct + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, struct nilfs_segment_summary *sum, @@ -433,8 +457,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * The next segment is invalidated by this recovery. */ err = nilfs_sufile_free(sufile, segnum[1]); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated", + (unsigned long long)nilfs->ns_last_pseg, + (unsigned long long)nilfs->ns_segnum, + (unsigned long long)segnum[1]); + err = -EINVAL; + } goto failed; + } for (i = 1; i < 4; i++) { err = nilfs_segment_list_add(head, segnum[i]); @@ -472,19 +505,16 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - loff_t pos, struct page *page) + loff_t pos, struct folio *folio) { struct buffer_head *bh_org; - size_t from = pos & ~PAGE_MASK; - void *kaddr; + size_t from = offset_in_folio(folio, pos); bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); if (unlikely(!bh_org)) return -EIO; - kaddr = kmap_local_page(page); - memcpy(kaddr + from, bh_org->b_data, bh_org->b_size); - kunmap_local(kaddr); + memcpy_to_folio(folio, from, bh_org->b_data, bh_org->b_size); brelse(bh_org); return 0; } @@ -498,7 +528,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, struct inode *inode; struct nilfs_recovery_block *rb, *n; unsigned int blocksize = nilfs->ns_blocksize; - struct page *page; + struct folio *folio; loff_t pos; int err = 0, err2 = 0; @@ -512,7 +542,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, pos = rb->blkoff << inode->i_blkbits; err = block_write_begin(inode->i_mapping, pos, blocksize, - &page, nilfs_get_block); + &folio, nilfs_get_block); if (unlikely(err)) { loff_t isize = inode->i_size; @@ -522,26 +552,26 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, pos, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, folio); if (unlikely(err)) - goto failed_page; + goto failed_folio; err = nilfs_set_file_dirty(inode, 1); if (unlikely(err)) - goto failed_page; + goto failed_folio; block_write_end(NULL, inode->i_mapping, pos, blocksize, - blocksize, page, NULL); + blocksize, folio, NULL); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); (*nr_salvaged_blocks)++; goto next; - failed_page: - unlock_page(page); - put_page(page); + failed_folio: + folio_unlock(folio); + folio_put(folio); failed_inode: nilfs_warn(sb, @@ -563,7 +593,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, * checkpoint * @nilfs: nilfs object * @sb: super block instance + * @root: NILFS root instance * @ri: pointer to a nilfs_recovery_info + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Log format error. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct super_block *sb, @@ -698,9 +735,15 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, return; bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize); - BUG_ON(!bh); + if (WARN_ON(!bh)) + return; /* should never happen */ + + lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); set_buffer_dirty(bh); + unlock_buffer(bh); + err = sync_dirty_buffer(bh); if (unlikely(err)) nilfs_warn(nilfs->ns_sb, @@ -709,23 +752,45 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, } /** + * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery + * @nilfs: nilfs object + */ +static void nilfs_abort_roll_forward(struct the_nilfs *nilfs) +{ + struct nilfs_inode_info *ii, *n; + LIST_HEAD(head); + + /* Abandon inodes that have read recovery data */ + spin_lock(&nilfs->ns_inode_lock); + list_splice_init(&nilfs->ns_dirty_files, &head); + spin_unlock(&nilfs->ns_inode_lock); + if (list_empty(&head)) + return; + + set_nilfs_purging(nilfs); + list_for_each_entry_safe(ii, n, &head, i_dirty) { + spin_lock(&nilfs->ns_inode_lock); + list_del_init(&ii->i_dirty); + spin_unlock(&nilfs->ns_inode_lock); + + iput(&ii->vfs_inode); + } + clear_nilfs_purging(nilfs); +} + +/** * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint * @nilfs: nilfs object * @sb: super block instance * @ri: pointer to a nilfs_recovery_info struct to store search results. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EINVAL - Inconsistent filesystem state. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Inconsistent filesystem state. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. */ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb, @@ -766,15 +831,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (unlikely(err)) { nilfs_err(sb, "error %d writing segment for recovery", err); - goto failed; + goto put_root; } nilfs_finish_roll_forward(nilfs, ri); } - failed: +put_root: nilfs_put_root(root); return err; + +failed: + nilfs_abort_roll_forward(nilfs); + goto put_root; } /** @@ -786,14 +855,11 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, * segment pointed by the superblock. It sets up struct the_nilfs through * this search. It fills nilfs_recovery_info (ri) required for recovery. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EINVAL - No valid segment found - * - * %-EIO - I/O error - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - No valid segment found. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index dc431b4c34c9..a8bdf3d318ea 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -205,7 +205,6 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, { struct buffer_head *bh; struct nilfs_segment_summary *raw_sum; - void *kaddr; u32 crc; bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, @@ -220,9 +219,13 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - kaddr = kmap_local_page(bh->b_page); - crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); - kunmap_local(kaddr); + size_t offset = offset_in_folio(bh->b_folio, bh->b_data); + unsigned char *from; + + /* Do not support block sizes larger than PAGE_SIZE */ + from = kmap_local_folio(bh->b_folio, offset); + crc = crc32_le(crc, from, bh->b_size); + kunmap_local(from); } raw_sum->ss_datasum = cpu_to_le32(crc); } @@ -374,7 +377,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi, struct buffer_head *bh) { - int len, err; + int err; BUG_ON(wi->nr_vecs <= 0); repeat: @@ -385,8 +388,8 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, (wi->nilfs->ns_blocksize_bits - 9); } - len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); - if (len == bh->b_size) { + if (bio_add_folio(wi->bio, bh->b_folio, bh->b_size, + offset_in_folio(bh->b_folio, bh->b_data))) { wi->end++; return 0; } @@ -403,12 +406,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, * @segbuf: buffer storing a log to be written * @nilfs: nilfs object * - * Return Value: On Success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error - * - * %-ENOMEM - Insufficient memory available. + * Return: Always 0. */ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, struct the_nilfs *nilfs) @@ -449,10 +447,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * nilfs_segbuf_wait - wait for completion of requested BIOs * @segbuf: segment buffer * - * Return Value: On Success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error + * Return: 0 on success, or %-EIO if I/O error is detected. */ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) { diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index aa5290cb7467..61a4141f8d6b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_ge(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(a) - (__s32)(b) >= 0)) + ((__s32)((a) - (b)) >= 0)) static int nilfs_prepare_segment_lock(struct super_block *sb, struct nilfs_transaction_info *ti) @@ -191,12 +191,10 @@ static int nilfs_prepare_segment_lock(struct super_block *sb, * When @vacancy_check flag is set, this function will check the amount of * free space, and will wait for the GC to reclaim disk space if low capacity. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-ENOSPC - No space left on device + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (if checking free space). */ int nilfs_transaction_begin(struct super_block *sb, struct nilfs_transaction_info *ti, @@ -252,6 +250,8 @@ int nilfs_transaction_begin(struct super_block *sb, * nilfs_transaction_commit() sets a timer to start the segment * constructor. If a sync flag is set, it starts construction * directly. + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_transaction_commit(struct super_block *sb) { @@ -407,6 +407,8 @@ static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, /** * nilfs_segctor_reset_segment_buffer - reset the current segment buffer * @sci: nilfs_sc_info + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) { @@ -519,7 +521,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, ii = NILFS_I(inode); - if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + if (ii->i_type & NILFS_I_TYPE_GC) cno = ii->i_cno; else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) cno = 0; @@ -734,7 +736,6 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, if (!head) head = create_empty_buffers(folio, i_blocksize(inode), 0); - folio_unlock(folio); bh = head; do { @@ -744,11 +745,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, list_add_tail(&bh->b_assoc_buffers, listp); ndirties++; if (unlikely(ndirties >= nlimit)) { + folio_unlock(folio); folio_batch_release(&fbatch); cond_resched(); return ndirties; } } while (bh = bh->b_this_page, bh != head); + + folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); @@ -1102,12 +1106,65 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, return err; } +/** + * nilfs_free_segments - free the segments given by an array of segment numbers + * @nilfs: nilfs object + * @segnumv: array of segment numbers to be freed + * @nsegs: number of segments to be freed in @segnumv + * + * nilfs_free_segments() wraps nilfs_sufile_freev() and + * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file + * (sufile) to free all segments given by @segnumv and @nsegs at once. If + * it fails midway, it cancels the changes so that none of the segments are + * freed. If @nsegs is 0, this function does nothing. + * + * The freeing of segments is not finalized until the writing of a log with + * a super root block containing this sufile change is complete, and it can + * be canceled with nilfs_sufile_cancel_freev() until then. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, + size_t nsegs) +{ + size_t ndone; + int ret; + + if (!nsegs) + return 0; + + ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); + if (unlikely(ret)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, + NULL); + /* + * If a segment usage of the segments to be freed is in a + * hole block, nilfs_sufile_freev() will return -ENOENT. + * In this case, -EINVAL should be returned to the caller + * since there is something wrong with the given segment + * number array. This error can only occur during GC, so + * there is no need to worry about it propagating to other + * callers (such as fsync). + */ + if (ret == -ENOENT) { + nilfs_err(nilfs->ns_sb, + "The segment usage entry %llu to be freed is invalid (in a hole)", + (unsigned long long)segnumv[ndone]); + ret = -EINVAL; + } + } + return ret; +} + static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; - size_t ndone; int err = 0; switch (nilfs_sc_cstage_get(sci)) { @@ -1201,14 +1258,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) nilfs_sc_cstage_inc(sci); fallthrough; case NILFS_ST_SUFILE: - err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, - sci->sc_nfreesegs, &ndone); - if (unlikely(err)) { - nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, ndone, - NULL); + err = nilfs_free_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (unlikely(err)) break; - } sci->sc_stage.flags |= NILFS_CF_SUFREED; err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, @@ -1267,6 +1320,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) * nilfs_segctor_begin_construction - setup segment buffer to make a new log * @sci: nilfs_sc_info * @nilfs: nilfs object + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) @@ -1639,39 +1694,30 @@ static void nilfs_begin_folio_io(struct folio *folio) folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; - - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, - b_assoc_buffers) { - if (bh->b_folio != bd_folio) { - if (bd_folio) { - folio_lock(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - } - bd_folio = bh->b_folio; - } - } - + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == segbuf->sb_super_root) { - if (bh->b_folio != bd_folio) { - folio_lock(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - bd_folio = bh->b_folio; - } + if (bh == segbuf->sb_super_root) break; - } set_buffer_async_write(bh); if (bh->b_folio != fs_folio) { nilfs_begin_folio_io(fs_folio); @@ -1679,13 +1725,49 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } } } + nilfs_begin_folio_io(fs_folio); + + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + mark_buffer_dirty(bh); + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + } + bd_folio = bh->b_folio; + } + } + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + mark_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + if (bd_folio) { folio_lock(bd_folio); + folio_wait_writeback(bd_folio); folio_clear_dirty_for_io(bd_folio); folio_start_writeback(bd_folio); folio_unlock(bd_folio); } - nilfs_begin_folio_io(fs_folio); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -1725,14 +1807,8 @@ static void nilfs_end_folio_io(struct folio *folio, int err) return; } - if (!err) { - if (!nilfs_folio_buffers_clean(folio)) - filemap_dirty_folio(folio->mapping, folio); - folio_clear_error(folio); - } else { + if (err || !nilfs_folio_buffers_clean(folio)) filemap_dirty_folio(folio->mapping, folio); - folio_set_error(folio); - } folio_end_writeback(folio); } @@ -1791,6 +1867,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, nilfs_abort_logs(&logs, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); + if (list_empty(&logs)) + return; /* if the first segment buffer preparation failed */ + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); nilfs_free_incomplete_logs(&logs, nilfs); @@ -2035,7 +2114,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) err = nilfs_segctor_begin_construction(sci, nilfs); if (unlikely(err)) - goto out; + goto failed; /* Update time stamp */ sci->sc_seg_ctime = ktime_get_real_seconds(); @@ -2073,10 +2152,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) @@ -2102,10 +2178,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) return err; failed_to_write: - if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_redirty_inodes(&sci->sc_dirty_files); - failed: + if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE) + nilfs_redirty_inodes(&sci->sc_dirty_files); if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); nilfs_segctor_abort_construction(sci, nilfs, err); @@ -2124,8 +2199,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) { spin_lock(&sci->sc_state_lock); if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { - sci->sc_timer.expires = jiffies + sci->sc_interval; - add_timer(&sci->sc_timer); + if (sci->sc_task) { + sci->sc_timer.expires = jiffies + sci->sc_interval; + add_timer(&sci->sc_timer); + } sci->sc_state |= NILFS_SEGCTOR_COMMIT; } spin_unlock(&sci->sc_state_lock); @@ -2144,22 +2221,6 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) spin_unlock(&sci->sc_state_lock); } -/** - * nilfs_flush_segment - trigger a segment construction for resource control - * @sb: super block - * @ino: inode number of the file to be flushed out. - */ -void nilfs_flush_segment(struct super_block *sb, ino_t ino) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - struct nilfs_sc_info *sci = nilfs->ns_writer; - - if (!sci || nilfs_doing_construction()) - return; - nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); - /* assign bit 0 to data files */ -} - struct nilfs_segctor_wait_request { wait_queue_entry_t wq; __u32 seq; @@ -2172,19 +2233,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) struct nilfs_segctor_wait_request wait_req; int err = 0; - spin_lock(&sci->sc_state_lock); init_wait(&wait_req.wq); wait_req.err = 0; atomic_set(&wait_req.done, 0); + init_waitqueue_entry(&wait_req.wq, current); + + /* + * To prevent a race issue where completion notifications from the + * log writer thread are missed, increment the request sequence count + * "sc_seq_request" and insert a wait queue entry using the current + * sequence number into the "sc_wait_request" queue at the same time + * within the lock section of "sc_state_lock". + */ + spin_lock(&sci->sc_state_lock); wait_req.seq = ++sci->sc_seq_request; + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); spin_unlock(&sci->sc_state_lock); - init_waitqueue_entry(&wait_req.wq, current); - add_wait_queue(&sci->sc_wait_request, &wait_req.wq); - set_current_state(TASK_INTERRUPTIBLE); wake_up(&sci->sc_wait_daemon); for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Synchronize only while the log writer thread is alive. + * Leave flushing out after the log writer thread exits to + * the cleanup work in nilfs_segctor_destroy(). + */ + if (!sci->sc_task) + break; + if (atomic_read(&wait_req.done)) { err = wait_req.err; break; @@ -2200,7 +2278,7 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) return err; } -static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force) { struct nilfs_segctor_wait_request *wrq, *n; unsigned long flags; @@ -2208,7 +2286,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) spin_lock_irqsave(&sci->sc_wait_request.lock, flags); list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && - nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) { wrq->err = err; atomic_set(&wrq->done, 1); } @@ -2225,18 +2303,13 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) * nilfs_construct_segment - construct a logical segment * @sb: super block * - * Return Value: On success, 0 is returned. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ int nilfs_construct_segment(struct super_block *sb) { @@ -2260,18 +2333,13 @@ int nilfs_construct_segment(struct super_block *sb) * @start: start byte offset * @end: end byte offset (inclusive) * - * Return Value: On success, 0 is returned. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, loff_t start, loff_t end) @@ -2326,10 +2394,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, */ static void nilfs_segctor_accept(struct nilfs_sc_info *sci) { + bool thread_is_alive; + spin_lock(&sci->sc_state_lock); sci->sc_seq_accepted = sci->sc_seq_request; + thread_is_alive = (bool)sci->sc_task; spin_unlock(&sci->sc_state_lock); - del_timer_sync(&sci->sc_timer); + + /* + * This function does not race with the log writer thread's + * termination. Therefore, deleting sc_timer, which should not be + * done after the log writer thread exits, can be done safely outside + * the area protected by sc_state_lock. + */ + if (thread_is_alive) + timer_delete_sync(&sci->sc_timer); } /** @@ -2346,7 +2425,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) if (mode == SC_LSEG_SR) { sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; sci->sc_seq_done = sci->sc_seq_accepted; - nilfs_segctor_wakeup(sci, err); + nilfs_segctor_wakeup(sci, err, false); sci->sc_flush_request = 0; } else { if (mode == SC_FLUSH_FILE) @@ -2355,7 +2434,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) sci->sc_flush_request &= ~FLUSH_DAT_BIT; /* re-enable timer if checkpoint creation was not done */ - if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task && time_before(jiffies, sci->sc_timer.expires)) add_timer(&sci->sc_timer); } @@ -2366,6 +2445,8 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) * nilfs_segctor_construct - form logs and write them to disk * @sci: segment constructor object * @mode: mode of log forming + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) { @@ -2406,7 +2487,7 @@ static void nilfs_construction_timeout(struct timer_list *t) { struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); - wake_up_process(sci->sc_timer_task); + wake_up_process(sci->sc_task); } static void @@ -2532,121 +2613,85 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) } /** - * nilfs_segctor_thread - main loop of the segment constructor thread. + * nilfs_log_write_required - determine whether log writing is required + * @sci: nilfs_sc_info struct + * @modep: location for storing log writing mode + * + * Return: true if log writing is required, false otherwise. If log writing + * is required, the mode is stored in the location pointed to by @modep. + */ +static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) +{ + bool timedout, ret = true; + + spin_lock(&sci->sc_state_lock); + timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer.expires)); + if (timedout || sci->sc_seq_request != sci->sc_seq_done) + *modep = SC_LSEG_SR; + else if (sci->sc_flush_request) + *modep = nilfs_segctor_flush_mode(sci); + else + ret = false; + + spin_unlock(&sci->sc_state_lock); + return ret; +} + +/** + * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. * - * nilfs_segctor_thread() initializes a timer and serves as a daemon - * to execute segment constructions. + * nilfs_segctor_thread() is the main loop function of the log writer kernel + * thread, which determines whether log writing is necessary, and if so, + * performs the log write in the background, or waits if not. It is also + * used to decide the background writeback of the superblock. + * + * Return: Always 0. */ static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; - - sci->sc_timer_task = current; - /* start sync. */ - sci->sc_task = current; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); set_freezable(); - spin_lock(&sci->sc_state_lock); - loop: - for (;;) { - int mode; - if (sci->sc_state & NILFS_SEGCTOR_QUIT) - goto end_thread; - - if (timeout || sci->sc_seq_request != sci->sc_seq_done) - mode = SC_LSEG_SR; - else if (sci->sc_flush_request) - mode = nilfs_segctor_flush_mode(sci); - else - break; - - spin_unlock(&sci->sc_state_lock); - nilfs_segctor_thread_construct(sci, mode); - spin_lock(&sci->sc_state_lock); - timeout = 0; - } - - - if (freezing(current)) { - spin_unlock(&sci->sc_state_lock); - try_to_freeze(); - spin_lock(&sci->sc_state_lock); - } else { + while (!kthread_should_stop()) { DEFINE_WAIT(wait); - int should_sleep = 1; + bool should_write; + int mode; + + if (freezing(current)) { + try_to_freeze(); + continue; + } prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE); - - if (sci->sc_seq_request != sci->sc_seq_done) - should_sleep = 0; - else if (sci->sc_flush_request) - should_sleep = 0; - else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) - should_sleep = time_before(jiffies, - sci->sc_timer.expires); - - if (should_sleep) { - spin_unlock(&sci->sc_state_lock); + should_write = nilfs_log_write_required(sci, &mode); + if (!should_write) schedule(); - spin_lock(&sci->sc_state_lock); - } finish_wait(&sci->sc_wait_daemon, &wait); - timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer.expires)); if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); + + if (should_write) + nilfs_segctor_thread_construct(sci, mode); } - goto loop; - end_thread: /* end sync. */ + spin_lock(&sci->sc_state_lock); sci->sc_task = NULL; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ + timer_shutdown_sync(&sci->sc_timer); spin_unlock(&sci->sc_state_lock); return 0; } -static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) -{ - struct task_struct *t; - - t = kthread_run(nilfs_segctor_thread, sci, "segctord"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); - - nilfs_err(sci->sc_super, "error %d creating segctord thread", - err); - return err; - } - wait_event(sci->sc_wait_task, sci->sc_task != NULL); - return 0; -} - -static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) - __acquires(&sci->sc_state_lock) - __releases(&sci->sc_state_lock) -{ - sci->sc_state |= NILFS_SEGCTOR_QUIT; - - while (sci->sc_task) { - wake_up(&sci->sc_wait_daemon); - spin_unlock(&sci->sc_state_lock); - wait_event(sci->sc_wait_task, sci->sc_task == NULL); - spin_lock(&sci->sc_state_lock); - } -} - /* * Setup & clean-up functions */ @@ -2667,7 +2712,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); - init_waitqueue_head(&sci->sc_wait_task); spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); @@ -2675,7 +2719,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_iput_queue); INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; @@ -2723,12 +2766,23 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) up_write(&nilfs->ns_segctor_sem); + if (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + kthread_stop(sci->sc_task); + } + spin_lock(&sci->sc_state_lock); - nilfs_segctor_kill_thread(sci); flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + /* + * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can + * be called from delayed iput() via nilfs_evict_inode() and can race + * with the above log writer thread termination. + */ + nilfs_segctor_wakeup(sci, 0, true); + if (flush_work(&sci->sc_iput_work)) flag = true; @@ -2754,7 +2808,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) down_write(&nilfs->ns_segctor_sem); - timer_shutdown_sync(&sci->sc_timer); kfree(sci); } @@ -2766,14 +2819,16 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * This allocates a log writer object, initializes it, and starts the * log writer. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINTR - Log writer thread creation failed due to interruption. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci; + struct task_struct *t; int err; if (nilfs->ns_writer) { @@ -2786,17 +2841,23 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) return 0; } - nilfs->ns_writer = nilfs_segctor_new(sb, root); - if (!nilfs->ns_writer) + sci = nilfs_segctor_new(sb, root); + if (unlikely(!sci)) return -ENOMEM; - inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); - - err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (unlikely(err)) + nilfs->ns_writer = sci; + t = kthread_create(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + err = PTR_ERR(t); + nilfs_err(sb, "error %d creating segctord thread", err); nilfs_detach_log_writer(sb); + return err; + } + sci->sc_task = t; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - return err; + wake_up_process(sci->sc_task); + return 0; } /** diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 1060f72ebf5a..4b39ed43ae72 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -22,10 +22,10 @@ struct nilfs_root; * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root - * @ri_ri_cno: Number of the last checkpoint + * @ri_cno: Number of the last checkpoint * @ri_lsegs_start: Region for roll-forwarding (start block number) * @ri_lsegs_end: Region for roll-forwarding (end block number) - * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start * @ri_used_segments: List of segments to be mark active * @ri_pseg_start: Block number of the last partial segment * @ri_seq: Sequence number on the last partial segment @@ -105,9 +105,8 @@ struct nilfs_segsum_pointer { * @sc_flush_request: inode bitmap of metadata files to be flushed * @sc_wait_request: Client request queue * @sc_wait_daemon: Daemon wait queue - * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter - * @sc_seq_accept: Accepted request count + * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -158,7 +157,6 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_request; wait_queue_head_t sc_wait_daemon; - wait_queue_head_t sc_wait_task; __u32 sc_seq_request; __u32 sc_seq_accepted; @@ -171,7 +169,6 @@ struct nilfs_sc_info { unsigned long sc_watermark; struct timer_list sc_timer; - struct task_struct *sc_timer_task; struct task_struct *sc_task; }; @@ -192,7 +189,6 @@ enum { }; /* sc_state */ -#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ /* @@ -230,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); -extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 6748218be7c5..330f269abedf 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -70,19 +70,35 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, max - curr + 1); } -static struct nilfs_segment_usage * -nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, - struct buffer_head *bh, void *kaddr) +/** + * nilfs_sufile_segment_usage_offset - calculate the byte offset of a segment + * usage entry in the folio containing it + * @sufile: segment usage file inode + * @segnum: number of segment usage + * @bh: buffer head of block containing segment usage indexed by @segnum + * + * Return: Byte offset in the folio of the segment usage entry. + */ +static size_t nilfs_sufile_segment_usage_offset(const struct inode *sufile, + __u64 segnum, + struct buffer_head *bh) { - return kaddr + bh_offset(bh) + + return offset_in_folio(bh->b_folio, bh->b_data) + nilfs_sufile_get_offset(sufile, segnum) * NILFS_MDT(sufile)->mi_entry_size; } -static inline int nilfs_sufile_get_header_block(struct inode *sufile, - struct buffer_head **bhp) +static int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(sufile->i_sb, + "missing header block in segment usage metadata"); + err = -EIO; + } + return err; } static inline int @@ -105,13 +121,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { struct nilfs_sufile_header *header; - void *kaddr; - kaddr = kmap_local_page(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); - kunmap_local(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); } @@ -119,6 +133,8 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, /** * nilfs_sufile_get_ncleansegs - return the number of clean segments * @sufile: inode of segment usage file + * + * Return: Number of clean segments. */ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) { @@ -141,17 +157,13 @@ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) * of successfully modified segments from the head is stored in the * place @ndone points to. * - * Return Value: On success, zero is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - Given segment usage is in hole block (may be returned if - * @create is zero) - * - * %-EINVAL - Invalid segment usage number + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment usage number + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Given segment usage is in hole block (may be returned if + * @create is zero) + * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, int create, size_t *ndone, @@ -258,10 +270,7 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, * @start: minimum segment number of allocatable region (inclusive) * @end: maximum segment number of allocatable region (inclusive) * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-ERANGE - invalid segment region + * Return: 0 on success, or %-ERANGE if segment range is invalid. */ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) { @@ -286,17 +295,14 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) * @sufile: inode of segment usage file * @segnump: pointer to segment number * - * Description: nilfs_sufile_alloc() allocates a clean segment. - * - * Return Value: On success, 0 is returned and the segment number of the - * allocated segment is stored in the place pointed by @segnump. On error, one - * of the following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Description: nilfs_sufile_alloc() allocates a clean segment, and stores + * its segment number in the place pointed to by @segnump. * - * %-ENOSPC - No clean segment left. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No clean segment left. */ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) { @@ -306,6 +312,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; + size_t offset; void *kaddr; unsigned long nsegments, nsus, cnt; int ret, j; @@ -315,10 +322,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; - kaddr = kmap_local_page(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); last_alloc = le64_to_cpu(header->sh_last_alloc); - kunmap_local(kaddr); + kunmap_local(header); nsegments = nilfs_sufile_get_nsegments(sufile); maxsegnum = sui->allocmax; @@ -352,9 +358,10 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) &su_bh); if (ret < 0) goto out_header; - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); nsus = nilfs_sufile_segment_usages_in_block( sufile, segnum, maxsegnum); @@ -365,12 +372,11 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) nilfs_segment_usage_set_dirty(su); kunmap_local(kaddr); - kaddr = kmap_local_page(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); - kunmap_local(kaddr); + kunmap_local(header); sui->ncleansegs--; mark_buffer_dirty(header_bh); @@ -404,18 +410,18 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (unlikely(!nilfs_segment_usage_clean(su))) { nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", __func__, (unsigned long long)segnum); - kunmap_local(kaddr); + kunmap_local(su); return; } nilfs_segment_usage_set_dirty(su); - kunmap_local(kaddr); + kunmap_local(su); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; @@ -429,14 +435,14 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int clean, dirty; - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) && su->su_nblocks == cpu_to_le32(0)) { - kunmap_local(kaddr); + kunmap_local(su); return; } clean = nilfs_segment_usage_clean(su); @@ -446,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)); - kunmap_local(kaddr); + kunmap_local(su); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; @@ -460,15 +466,15 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int sudirty; - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_clean(su)) { nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", __func__, (unsigned long long)segnum); - kunmap_local(kaddr); + kunmap_local(su); return; } if (unlikely(nilfs_segment_usage_error(su))) @@ -481,7 +487,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, (unsigned long long)segnum); nilfs_segment_usage_set_clean(su); - kunmap_local(kaddr); + kunmap_local(su); mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); @@ -496,25 +502,34 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; - void *kaddr; + size_t offset; struct nilfs_segment_usage *su; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (ret) + if (unlikely(ret)) { + if (ret == -ENOENT) { + nilfs_error(sufile->i_sb, + "segment usage for segment %llu is unreadable due to a hole block", + (unsigned long long)segnum); + ret = -EIO; + } goto out_sem; + } - kaddr = kmap_local_page(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); if (unlikely(nilfs_segment_usage_error(su))) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; - kunmap_local(kaddr); + kunmap_local(su); brelse(bh); if (nilfs_segment_is_active(nilfs, segnum)) { nilfs_error(sufile->i_sb, @@ -532,7 +547,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) ret = -EIO; } else { nilfs_segment_usage_set_dirty(su); - kunmap_local(kaddr); + kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); @@ -548,13 +563,15 @@ out_sem: * @segnum: segment number * @nblocks: number of live blocks in the segment * @modtime: modification time (option) + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, unsigned long nblocks, time64_t modtime) { struct buffer_head *bh; struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); @@ -562,8 +579,8 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, if (ret < 0) goto out_sem; - kaddr = kmap_local_page(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); if (modtime) { /* * Check segusage error and set su_lastmod only when updating @@ -573,7 +590,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(modtime); } su->su_nblocks = cpu_to_le32(nblocks); - kunmap_local(kaddr); + kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); @@ -589,23 +606,19 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, * @sufile: inode of segment usage file * @sustat: pointer to a structure of segment usage statistics * - * Description: nilfs_sufile_get_stat() returns information about segment - * usage. - * - * Return Value: On success, 0 is returned, and segment usage information is - * stored in the place pointed by @sustat. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. + * Description: nilfs_sufile_get_stat() retrieves segment usage statistics + * and stores them in the location pointed to by @sustat. * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; - void *kaddr; int ret; down_read(&NILFS_MDT(sufile)->mi_sem); @@ -614,8 +627,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) if (ret < 0) goto out_sem; - kaddr = kmap_local_page(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); @@ -624,7 +636,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); - kunmap_local(kaddr); + kunmap_local(header); brelse(header_bh); out_sem: @@ -637,18 +649,18 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int suclean; - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_error(su)) { - kunmap_local(kaddr); + kunmap_local(su); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); - kunmap_local(kaddr); + kunmap_local(su); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); @@ -664,16 +676,12 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, * @start: start segment number (inclusive) * @end: end segment number (inclusive) * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid number of segments specified - * - * %-EBUSY - Dirty or active segments are present in the range + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Dirty or active segments are present in the range. + * * %-EINVAL - Invalid number of segments specified. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_sufile_truncate_range(struct inode *sufile, __u64 start, __u64 end) @@ -686,7 +694,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, unsigned long segusages_per_block; unsigned long nsegs, ncleaned; __u64 segnum; - void *kaddr; + size_t offset; ssize_t n, nc; int ret; int j; @@ -717,16 +725,16 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, /* hole */ continue; } - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); su2 = su; for (j = 0; j < n; j++, su = (void *)su + susz) { if ((le32_to_cpu(su->su_flags) & ~BIT(NILFS_SEGMENT_USAGE_ERROR)) || nilfs_segment_is_active(nilfs, segnum + j)) { ret = -EBUSY; - kunmap_local(kaddr); + kunmap_local(su2); brelse(su_bh); goto out_header; } @@ -738,7 +746,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, nc++; } } - kunmap_local(kaddr); + kunmap_local(su2); if (nc > 0) { mark_buffer_dirty(su_bh); ncleaned += nc; @@ -768,16 +776,12 @@ out: * @sufile: inode of segment usage file * @newnsegs: new number of segments * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOSPC - Enough free space is not left for shrinking - * - * %-EBUSY - Dirty or active segments exist in the region to be truncated + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Dirty or active segments exist in the region to be truncated. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - Enough free space is not left for shrinking. */ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) { @@ -785,7 +789,6 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct nilfs_sufile_info *sui = NILFS_SUI(sufile); - void *kaddr; unsigned long nsegs, nrsvsegs; int ret = 0; @@ -823,10 +826,9 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) sui->allocmin = 0; } - kaddr = kmap_local_page(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); - kunmap_local(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); @@ -840,21 +842,17 @@ out: } /** - * nilfs_sufile_get_suinfo - + * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @buf: array of suinfo - * @sisz: byte size of suinfo - * @nsi: size of suinfo array - * - * Description: + * @buf: array of suinfo + * @sisz: byte size of suinfo + * @nsi: size of suinfo array * - * Return Value: On success, 0 is returned and .... On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: Count of segment usage info items stored in the output buffer on + * success, or one of the following negative error codes on failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) @@ -864,6 +862,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + size_t offset; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -891,9 +890,9 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, continue; } - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); for (j = 0; j < n; j++, su = (void *)su + susz, si = (void *)si + sisz) { si->sui_lastmod = le64_to_cpu(su->su_lastmod); @@ -925,14 +924,11 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, * segment usage accordingly. Only the fields indicated by the sup_flags * are updated. * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid values in input (segment number, flags or nblocks) + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid values in input (segment number, flags or nblocks). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, unsigned int supsz, size_t nsup) @@ -941,7 +937,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, struct buffer_head *header_bh, *bh; struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; unsigned long blkoff, prev_blkoff; int cleansi, cleansu, dirtysi, dirtysu; long ncleaned = 0, ndirtied = 0; @@ -973,9 +969,9 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, goto out_header; for (;;) { - kaddr = kmap_local_page(bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, sup->sup_segnum, bh, kaddr); + offset = nilfs_sufile_segment_usage_offset( + sufile, sup->sup_segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); if (nilfs_suinfo_update_lastmod(sup)) su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); @@ -1010,7 +1006,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); } - kunmap_local(kaddr); + kunmap_local(su); sup = (void *)sup + supsz; if (sup >= supend) @@ -1059,13 +1055,14 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, * and start+len is rounded down. For each clean segment blkdev_issue_discard * function is invoked. * - * Return Value: On success, 0 is returned or negative error code, otherwise. + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *su_bh; struct nilfs_segment_usage *su; + size_t offset; void *kaddr; size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; sector_t seg_start, seg_end, start_block, end_block; @@ -1115,9 +1112,9 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) continue; } - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, - su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { if (!nilfs_segment_usage_clean(su)) continue; @@ -1157,9 +1154,10 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) } ndiscarded += nblocks; - kaddr = kmap_local_page(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset( + sufile, segnum, su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, + offset); } /* start new extent */ @@ -1203,6 +1201,8 @@ out_sem: * @susize: size of a segment usage entry * @raw_inode: on-disk sufile inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep) @@ -1211,7 +1211,6 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_sufile_info *sui; struct buffer_head *header_bh; struct nilfs_sufile_header *header; - void *kaddr; int err; if (susize > sb->s_blocksize) { @@ -1241,15 +1240,20 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, if (err) goto failed; - err = nilfs_sufile_get_header_block(sufile, &header_bh); - if (err) + err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "missing header block in segment usage metadata"); + err = -EINVAL; + } goto failed; + } sui = NILFS_SUI(sufile); - kaddr = kmap_local_page(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_local(kaddr); + kunmap_local(header); brelse(header_bh); sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 8e8a1a5a0402..cd6f28ab3521 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -58,6 +58,8 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range); * nilfs_sufile_scrap - make a segment garbage * @sufile: inode of segment usage file * @segnum: segment number to be freed + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) { @@ -68,6 +70,8 @@ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) * nilfs_sufile_free - free segment * @sufile: inode of segment usage file * @segnum: segment number to be freed + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) { @@ -80,6 +84,8 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) * @segnumv: array of segment numbers * @nsegs: size of @segnumv array * @ndone: place to store the number of freed segments + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, size_t nsegs, size_t *ndone) @@ -95,8 +101,7 @@ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, * @nsegs: size of @segnumv array * @ndone: place to store the number of cancelled segments * - * Return Value: On success, 0 is returned. On error, a negative error codes - * is returned. + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_cancel_freev(struct inode *sufile, __u64 *segnumv, size_t nsegs, @@ -114,14 +119,11 @@ static inline int nilfs_sufile_cancel_freev(struct inode *sufile, * Description: nilfs_sufile_set_error() marks the segment specified by * @segnum as erroneous. The error segment will never be used again. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment usage number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) { diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index ac24ed109ce9..badc2cbc895e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -29,13 +29,13 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/parser.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/fs_context.h> +#include <linux/fs_parser.h> #include "nilfs.h" #include "export.h" #include "mdt.h" @@ -61,7 +61,6 @@ struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); -static int nilfs_remount(struct super_block *sb, int *flags, char *data); void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { @@ -106,6 +105,10 @@ static void nilfs_set_error(struct super_block *sb) /** * __nilfs_error() - report failure condition on a filesystem + * @sb: super block instance + * @function: name of calling function + * @fmt: format string for message to be output + * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when @@ -157,6 +160,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; @@ -305,6 +309,8 @@ int nilfs_commit_super(struct super_block *sb, int flag) * This function restores state flags in the on-disk super block. * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the * filesystem was not clean previously. + * + * Return: 0 on success, %-EIO if I/O error or superblock is corrupted. */ int nilfs_cleanup_super(struct super_block *sb) { @@ -335,6 +341,8 @@ int nilfs_cleanup_super(struct super_block *sb) * nilfs_move_2nd_super - relocate secondary super block * @sb: super block instance * @sb2off: new offset of the secondary super block (in bytes) + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) { @@ -416,6 +424,8 @@ out: * nilfs_resize_fs - resize the filesystem * @sb: super block instance * @newsize: new size of the filesystem (in bytes) + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) { @@ -702,105 +712,98 @@ static const struct super_operations nilfs_sops = { .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, - .remount_fs = nilfs_remount, .show_options = nilfs_show_options }; enum { - Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, - Opt_discard, Opt_nodiscard, Opt_err, + Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery, + Opt_discard, }; -static match_table_t tokens = { - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_snapshot, "cp=%u"}, - {Opt_order, "order=%s"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_err, NULL} +static const struct constant_table nilfs_param_err[] = { + {"continue", NILFS_MOUNT_ERRORS_CONT}, + {"panic", NILFS_MOUNT_ERRORS_PANIC}, + {"remount-ro", NILFS_MOUNT_ERRORS_RO}, + {} }; -static int parse_options(char *options, struct super_block *sb, int is_remount) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - char *p; - substring_t args[MAX_OPT_ARGS]; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; +static const struct fs_parameter_spec nilfs_param_spec[] = { + fsparam_enum ("errors", Opt_err, nilfs_param_err), + fsparam_flag_no ("barrier", Opt_barrier), + fsparam_u64 ("cp", Opt_snapshot), + fsparam_string ("order", Opt_order), + fsparam_flag ("norecovery", Opt_norecovery), + fsparam_flag_no ("discard", Opt_discard), + {} +}; - if (!*p) - continue; +struct nilfs_fs_context { + unsigned long ns_mount_opt; + __u64 cno; +}; - token = match_token(p, tokens, args); - switch (token) { - case Opt_barrier: - nilfs_set_opt(nilfs, BARRIER); - break; - case Opt_nobarrier: +static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct nilfs_fs_context *nilfs = fc->fs_private; + int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, nilfs_param_spec, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_barrier: + if (result.negated) nilfs_clear_opt(nilfs, BARRIER); - break; - case Opt_order: - if (strcmp(args[0].from, "relaxed") == 0) - /* Ordered data semantics */ - nilfs_clear_opt(nilfs, STRICT_ORDER); - else if (strcmp(args[0].from, "strict") == 0) - /* Strict in-order semantics */ - nilfs_set_opt(nilfs, STRICT_ORDER); - else - return 0; - break; - case Opt_err_panic: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC); - break; - case Opt_err_ro: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO); - break; - case Opt_err_cont: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT); - break; - case Opt_snapshot: - if (is_remount) { - nilfs_err(sb, - "\"%s\" option is invalid for remount", - p); - return 0; - } - break; - case Opt_norecovery: - nilfs_set_opt(nilfs, NORECOVERY); - break; - case Opt_discard: - nilfs_set_opt(nilfs, DISCARD); - break; - case Opt_nodiscard: - nilfs_clear_opt(nilfs, DISCARD); - break; - default: - nilfs_err(sb, "unrecognized mount option \"%s\"", p); - return 0; + else + nilfs_set_opt(nilfs, BARRIER); + break; + case Opt_order: + if (strcmp(param->string, "relaxed") == 0) + /* Ordered data semantics */ + nilfs_clear_opt(nilfs, STRICT_ORDER); + else if (strcmp(param->string, "strict") == 0) + /* Strict in-order semantics */ + nilfs_set_opt(nilfs, STRICT_ORDER); + else + return -EINVAL; + break; + case Opt_err: + nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE; + nilfs->ns_mount_opt |= result.uint_32; + break; + case Opt_snapshot: + if (is_remount) { + struct super_block *sb = fc->root->d_sb; + + nilfs_err(sb, + "\"%s\" option is invalid for remount", + param->key); + return -EINVAL; + } + if (result.uint_64 == 0) { + nilfs_err(NULL, + "invalid option \"cp=0\": invalid checkpoint number 0"); + return -EINVAL; } + nilfs->cno = result.uint_64; + break; + case Opt_norecovery: + nilfs_set_opt(nilfs, NORECOVERY); + break; + case Opt_discard: + if (result.negated) + nilfs_clear_opt(nilfs, DISCARD); + else + nilfs_set_opt(nilfs, DISCARD); + break; + default: + return -EINVAL; } - return 1; -} - -static inline void -nilfs_set_default_options(struct super_block *sb, - struct nilfs_super_block *sbp) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - nilfs->ns_mount_opt = - NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + return 0; } static int nilfs_setup_super(struct super_block *sb, int is_mount) @@ -857,9 +860,8 @@ struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } -int nilfs_store_magic_and_option(struct super_block *sb, - struct nilfs_super_block *sbp, - char *data) +int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp) { struct the_nilfs *nilfs = sb->s_fs_info; @@ -870,14 +872,12 @@ int nilfs_store_magic_and_option(struct super_block *sb, sb->s_flags |= SB_NOATIME; #endif - nilfs_set_default_options(sb, sbp); - nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); - return !parse_options(data, sb, 0) ? -EINVAL : 0; + return 0; } int nilfs_check_feature_compatibility(struct super_block *sb, @@ -993,7 +993,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint * @root_dentry: root dentry of the tree to be shrunk * - * This function returns true if the tree was in-use. + * Return: true if the tree was in-use, false otherwise. */ static bool nilfs_tree_is_busy(struct dentry *root_dentry) { @@ -1035,17 +1035,19 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block - * @data: mount options - * @silent: silent mode flag + * @fc: filesystem context * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. + * + * Return: 0 on success, or a negative error code on failure. */ static int -nilfs_fill_super(struct super_block *sb, void *data, int silent) +nilfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct the_nilfs *nilfs; struct nilfs_root *fsroot; + struct nilfs_fs_context *ctx = fc->fs_private; __u64 cno; int err; @@ -1055,10 +1057,13 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = nilfs; - err = init_nilfs(nilfs, sb, (char *)data); + err = init_nilfs(nilfs, sb); if (err) goto failed_nilfs; + /* Copy in parsed mount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; + sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; @@ -1071,6 +1076,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto failed_nilfs; + super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, + sizeof(nilfs->ns_sbp[0]->s_uuid)); + super_set_sysfs_name_bdev(sb); + cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { @@ -1117,34 +1126,25 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) return err; } -static int nilfs_remount(struct super_block *sb, int *flags, char *data) +static int nilfs_reconfigure(struct fs_context *fc) { + struct nilfs_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; - unsigned long old_sb_flags; - unsigned long old_mount_opt; int err; sync_filesystem(sb); - old_sb_flags = sb->s_flags; - old_mount_opt = nilfs->ns_mount_opt; - - if (!parse_options(data, sb, 1)) { - err = -EINVAL; - goto restore_opts; - } - sb->s_flags = (sb->s_flags & ~SB_POSIXACL); err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { nilfs_warn(sb, "couldn't remount because the filesystem is in an incomplete recovery state"); - goto restore_opts; + goto ignore_opts; } - - if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) + if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; /* @@ -1172,138 +1172,67 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) "couldn't remount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); err = -EROFS; - goto restore_opts; + goto ignore_opts; } sb->s_flags &= ~SB_RDONLY; root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); - if (err) - goto restore_opts; + if (err) { + sb->s_flags |= SB_RDONLY; + goto ignore_opts; + } down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } out: - return 0; - - restore_opts: - sb->s_flags = old_sb_flags; - nilfs->ns_mount_opt = old_mount_opt; - return err; -} - -struct nilfs_super_data { - __u64 cno; - int flags; -}; - -static int nilfs_parse_snapshot_option(const char *option, - const substring_t *arg, - struct nilfs_super_data *sd) -{ - unsigned long long val; - const char *msg = NULL; - int err; - - if (!(sd->flags & SB_RDONLY)) { - msg = "read-only option is not specified"; - goto parse_error; - } - - err = kstrtoull(arg->from, 0, &val); - if (err) { - if (err == -ERANGE) - msg = "too large checkpoint number"; - else - msg = "malformed argument"; - goto parse_error; - } else if (val == 0) { - msg = "invalid checkpoint number 0"; - goto parse_error; - } - sd->cno = val; - return 0; - -parse_error: - nilfs_err(NULL, "invalid option \"%s\": %s", option, msg); - return 1; -} - -/** - * nilfs_identify - pre-read mount options needed to identify mount instance - * @data: mount options - * @sd: nilfs_super_data - */ -static int nilfs_identify(char *data, struct nilfs_super_data *sd) -{ - char *p, *options = data; - substring_t args[MAX_OPT_ARGS]; - int token; - int ret = 0; - - do { - p = strsep(&options, ","); - if (p != NULL && *p) { - token = match_token(p, tokens, args); - if (token == Opt_snapshot) - ret = nilfs_parse_snapshot_option(p, &args[0], - sd); - } - if (!options) - break; - BUG_ON(options == data); - *(options - 1) = ','; - } while (!ret); - return ret; -} + sb->s_flags = (sb->s_flags & ~SB_POSIXACL); + /* Copy over parsed remount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; -static int nilfs_set_bdev_super(struct super_block *s, void *data) -{ - s->s_dev = *(dev_t *)data; return 0; -} -static int nilfs_test_bdev_super(struct super_block *s, void *data) -{ - return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data; + ignore_opts: + return err; } -static struct dentry * -nilfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int +nilfs_get_tree(struct fs_context *fc) { - struct nilfs_super_data sd = { .flags = flags }; + struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *s; dev_t dev; int err; - if (nilfs_identify(data, &sd)) - return ERR_PTR(-EINVAL); + if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) { + nilfs_err(NULL, + "invalid option \"cp=%llu\": read-only option is not specified", + ctx->cno); + return -EINVAL; + } - err = lookup_bdev(dev_name, &dev); + err = lookup_bdev(fc->source, &dev); if (err) - return ERR_PTR(err); + return err; - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, - &dev); + s = sget_dev(fc, dev); if (IS_ERR(s)) - return ERR_CAST(s); + return PTR_ERR(s); if (!s->s_root) { - err = setup_bdev_super(s, flags, NULL); + err = setup_bdev_super(s, fc->sb_flags, fc); if (!err) - err = nilfs_fill_super(s, data, - flags & SB_SILENT ? 1 : 0); + err = nilfs_fill_super(s, fc); if (err) goto failed_super; s->s_flags |= SB_ACTIVE; - } else if (!sd.cno) { + } else if (!ctx->cno) { if (nilfs_tree_is_busy(s->s_root)) { - if ((flags ^ s->s_flags) & SB_RDONLY) { + if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { nilfs_err(s, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); @@ -1312,37 +1241,75 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } } else { /* - * Try remount to setup mount states if the current + * Try reconfigure to setup mount states if the current * tree is not mounted and only snapshots use this sb. + * + * Since nilfs_reconfigure() requires fc->root to be + * set, set it first and release it on failure. */ - err = nilfs_remount(s, &flags, data); - if (err) + fc->root = dget(s->s_root); + err = nilfs_reconfigure(fc); + if (err) { + dput(fc->root); + fc->root = NULL; /* prevent double release */ goto failed_super; + } + return 0; } } - if (sd.cno) { + if (ctx->cno) { struct dentry *root_dentry; - err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); + err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry); if (err) goto failed_super; - return root_dentry; + fc->root = root_dentry; + return 0; } - return dget(s->s_root); + fc->root = dget(s->s_root); + return 0; failed_super: deactivate_locked_super(s); - return ERR_PTR(err); + return err; +} + +static void nilfs_free_fc(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static const struct fs_context_operations nilfs_context_ops = { + .parse_param = nilfs_parse_param, + .get_tree = nilfs_get_tree, + .reconfigure = nilfs_reconfigure, + .free = nilfs_free_fc, +}; + +static int nilfs_init_fs_context(struct fs_context *fc) +{ + struct nilfs_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + fc->fs_private = ctx; + fc->ops = &nilfs_context_ops; + + return 0; } struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", - .mount = nilfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = nilfs_init_fs_context, + .parameters = nilfs_param_spec, }; MODULE_ALIAS_FS("nilfs2"); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 379d22e28ed6..14868a3dd592 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \ sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ -static struct kobj_type nilfs_##name##_ktype = { \ +static const struct kobj_type nilfs_##name##_ktype = { \ .default_groups = nilfs_##name##_groups, \ .sysfs_ops = &nilfs_##name##_attr_ops, \ .release = nilfs_##name##_attr_release, \ @@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = { .store = nilfs_snapshot_attr_store, }; -static struct kobj_type nilfs_snapshot_ktype = { +static const struct kobj_type nilfs_snapshot_ktype = { .default_groups = nilfs_snapshot_groups, .sysfs_ops = &nilfs_snapshot_attr_ops, .release = nilfs_snapshot_attr_release, @@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u32 major = le32_to_cpu(sbp[0]->s_rev_level); - u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level); + struct nilfs_super_block *raw_sb; + u32 major; + u16 minor; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + major = le32_to_cpu(raw_sb->s_rev_level); + minor = le16_to_cpu(raw_sb->s_minor_rev_level); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%d.%d\n", major, minor); } @@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size); + struct nilfs_super_block *raw_sb; + u64 dev_size; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + dev_size = le64_to_cpu(raw_sb->s_dev_size); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%llu\n", dev_size); } @@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; - return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid); + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid); + up_read(&nilfs->ns_sem); + + return len; } static @@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n", + raw_sb->s_volume_name); + up_read(&nilfs->ns_sem); - return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n", - sbp[0]->s_volume_name); + return len; } static const char dev_readme_str[] = @@ -967,7 +990,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = { .store = nilfs_dev_attr_store, }; -static struct kobj_type nilfs_dev_ktype = { +static const struct kobj_type nilfs_dev_ktype = { .default_groups = nilfs_dev_groups, .sysfs_ops = &nilfs_dev_attr_ops, .release = nilfs_dev_attr_release, diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 2ae2c1bbf6d1..d0bcf744c553 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -12,7 +12,6 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> -#include <linux/random.h> #include <linux/log2.h> #include <linux/crc32.h> #include "nilfs.h" @@ -50,8 +49,8 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, * alloc_nilfs - allocate a nilfs object * @sb: super block instance * - * Return Value: On success, pointer to the_nilfs is returned. - * On error, NULL is returned. + * Return: a pointer to the allocated nilfs object on success, or NULL on + * failure. */ struct the_nilfs *alloc_nilfs(struct super_block *sb) { @@ -69,7 +68,6 @@ struct the_nilfs *alloc_nilfs(struct super_block *sb) INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); - spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); @@ -167,6 +165,9 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) * containing a super root from a given super block, and initializes * relevant information on the nilfs object preparatory for log * scanning and recovery. + * + * Return: 0 on success, or %-EINVAL if current segment number is out + * of range. */ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) @@ -202,8 +203,7 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, * exponent information written in @sbp and stores it in @blocksize, * or aborts with an error message if it's too large. * - * Return Value: On success, 0 is returned. If the block size is too - * large, -EINVAL is returned. + * Return: 0 on success, or %-EINVAL if the block size is too large. */ static int nilfs_get_blocksize(struct super_block *sb, struct nilfs_super_block *sbp, int *blocksize) @@ -228,6 +228,13 @@ static int nilfs_get_blocksize(struct super_block *sb, * load_nilfs() searches and load the latest super root, * attaches the last segment, and does recovery if needed. * The caller must call this exclusively for simultaneous mounts. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - No valid segment found. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-EROFS - Read only device or RO compat mode (if recovery is required) */ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { @@ -397,6 +404,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) * nilfs_nrsvsegs - calculate the number of reserved segments * @nilfs: nilfs object * @nsegs: total number of segments + * + * Return: Number of reserved segments. */ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) { @@ -408,6 +417,8 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) /** * nilfs_max_segment_count - calculate the maximum number of segments * @nilfs: nilfs object + * + * Return: Maximum number of segments */ static u64 nilfs_max_segment_count(struct the_nilfs *nilfs) { @@ -452,6 +463,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, } nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); + if (nilfs->ns_first_ino < NILFS_USER_INO) { + nilfs_err(nilfs->ns_sb, + "too small lower limit for non-reserved inode numbers: %u", + nilfs->ns_first_ino); + return -EINVAL; + } nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { @@ -534,7 +551,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) * area, or if the parameters themselves are not normal, it is * determined to be invalid. * - * Return Value: true if invalid, false if valid. + * Return: true if invalid, false if valid. */ static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) { @@ -592,7 +609,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, struct nilfs_super_block **sbp = nilfs->ns_sbp; struct buffer_head **sbh = nilfs->ns_sbh; u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev); - int valid[2], swp = 0; + int valid[2], swp = 0, older; if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { nilfs_err(sb, "device size too small"); @@ -648,9 +665,25 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, if (swp) nilfs_swap_super_block(nilfs); + /* + * Calculate the array index of the older superblock data. + * If one has been dropped, set index 0 pointing to the remaining one, + * otherwise set index 1 pointing to the old one (including if both + * are the same). + * + * Divided case valid[0] valid[1] swp -> older + * ------------------------------------------------------------- + * Both SBs are invalid 0 0 N/A (Error) + * SB1 is invalid 0 1 1 0 + * SB2 is invalid 1 0 0 0 + * SB2 is newer 1 1 1 0 + * SB2 is older or the same 1 1 0 1 + */ + older = valid[1] ^ swp; + nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); - nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); + nilfs->ns_prot_seq = le64_to_cpu(sbp[older]->s_last_seq); *sbpp = sbp[0]; return 0; } @@ -659,23 +692,19 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure * @sb: super block - * @data: mount options * * init_nilfs() performs common initialization per block device (e.g. * reading the super block, getting disk layout information, initializing * shared fields in the_nilfs). * - * Return Value: On success, 0 is returned. On error, a negative error - * code is returned. + * Return: 0 on success, or a negative error code on failure. */ -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { struct nilfs_super_block *sbp; int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { nilfs_err(sb, "unable to set blocksize"); @@ -686,7 +715,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto out; - err = nilfs_store_magic_and_option(sb, sbp, data); + err = nilfs_store_magic(sb, sbp); if (err) goto failed_sbh; @@ -733,9 +762,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; - get_random_bytes(&nilfs->ns_next_generation, - sizeof(nilfs->ns_next_generation)); - err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; @@ -751,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index cd4ae1b8ae16..4776a70f01ae 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -71,8 +71,6 @@ enum { * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_next_generation: next generation number for inodes - * @ns_next_gen_lock: lock protecting @ns_next_generation * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks @@ -161,10 +159,6 @@ struct the_nilfs { /* GC inode list */ struct list_head ns_gc_inodes; - /* Inode allocator */ - u32 ns_next_generation; - spinlock_t ns_next_gen_lock; - /* Mount options */ unsigned long ns_mount_opt; @@ -182,7 +176,7 @@ struct the_nilfs { unsigned long ns_nrsvsegs; unsigned long ns_first_data_block; int ns_inode_size; - int ns_first_ino; + unsigned int ns_first_ino; u32 ns_crc_seed; /* /sys/fs/<nilfs>/<device> */ @@ -219,10 +213,6 @@ THE_NILFS_FNS(PURGING, purging) #define nilfs_set_opt(nilfs, opt) \ ((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt) #define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt) -#define nilfs_write_opt(nilfs, mask, opt) \ - ((nilfs)->ns_mount_opt = \ - (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \ - NILFS_MOUNT_##opt)) \ /** * struct nilfs_root - nilfs root object @@ -276,7 +266,7 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct super_block *sb); void destroy_nilfs(struct the_nilfs *nilfs); -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb); int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); |