diff options
Diffstat (limited to 'fs/nilfs2')
42 files changed, 7379 insertions, 4242 deletions
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig index 80da8eb27393..7dae168e346e 100644 --- a/fs/nilfs2/Kconfig +++ b/fs/nilfs2/Kconfig @@ -1,6 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only config NILFS2_FS tristate "NILFS2 file system support" + select BUFFER_HEAD select CRC32 + select LEGACY_DIRECT_IO help NILFS2 is a log-structured file system (LFS) supporting continuous snapshotting. In addition to versioning capability of the entire diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile index 85c98737a146..43b60b8a4d07 100644 --- a/fs/nilfs2/Makefile +++ b/fs/nilfs2/Makefile @@ -1,5 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_NILFS2_FS) += nilfs2.o nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ btnode.o bmap.o btree.o direct.o dat.o recovery.o \ the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ - ifile.o alloc.o gcinode.o ioctl.o + ifile.o alloc.o gcinode.o ioctl.o sysfs.o diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 741fd02e0444..6b506995818d 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -1,25 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * alloc.c - NILFS dat/inode allocator + * NILFS dat/inode allocator * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Original code was written by Koji Sato <koji@osrg.net>. - * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>, - * Amagai Yoshiji <amagai@osrg.net>. + * Originally written by Koji Sato. + * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji. */ #include <linux/types.h> @@ -35,17 +21,21 @@ * nilfs_palloc_groups_per_desc_block - get the number of groups that a group * descriptor block can maintain * @inode: inode of metadata file using this allocator + * + * Return: Number of groups that a group descriptor block can maintain. */ static inline unsigned long nilfs_palloc_groups_per_desc_block(const struct inode *inode) { - return (1UL << inode->i_blkbits) / + return i_blocksize(inode) / sizeof(struct nilfs_palloc_group_desc); } /** * nilfs_palloc_groups_count - get maximum number of groups * @inode: inode of metadata file using this allocator + * + * Return: Maximum number of groups. */ static inline unsigned long nilfs_palloc_groups_count(const struct inode *inode) @@ -57,8 +47,10 @@ nilfs_palloc_groups_count(const struct inode *inode) * nilfs_palloc_init_blockgroup - initialize private variables for allocator * @inode: inode of metadata file using this allocator * @entry_size: size of the persistent object + * + * Return: 0 on success, or a negative error code on failure. */ -int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) +int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); @@ -73,13 +65,17 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) mi->mi_blocks_per_group = DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode), mi->mi_entries_per_block) + 1; - /* Number of blocks in a group including entry blocks and - a bitmap block */ + /* + * Number of blocks in a group including entry blocks + * and a bitmap block + */ mi->mi_blocks_per_desc_block = nilfs_palloc_groups_per_desc_block(inode) * mi->mi_blocks_per_group + 1; - /* Number of blocks per descriptor including the - descriptor block */ + /* + * Number of blocks per descriptor including the + * descriptor block + */ return 0; } @@ -88,6 +84,9 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) * @offset: pointer to store offset number in the group + * + * Return: Number of the group that contains the entry with the index + * specified by @nr. */ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, unsigned long *offset) @@ -103,8 +102,8 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, * @inode: inode of metadata file using this allocator * @group: group number * - * nilfs_palloc_desc_blkoff() returns block offset of the descriptor - * block which contains a descriptor of the specified group. + * Return: Index number in the metadata file of the descriptor block of + * the group specified by @group. */ static unsigned long nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) @@ -121,6 +120,9 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) * * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap * block used to allocate/deallocate entries in the specified group. + * + * Return: Index number in the metadata file of the bitmap block of + * the group specified by @group. */ static unsigned long nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) @@ -133,44 +135,52 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) /** * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group - * @inode: inode of metadata file using this allocator - * @group: group number * @desc: pointer to descriptor structure for the group + * @lock: spin lock protecting @desc + * + * Return: Number of free entries written in the group descriptor @desc. */ static unsigned long -nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, - const struct nilfs_palloc_group_desc *desc) +nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, + spinlock_t *lock) { unsigned long nfree; - spin_lock(nilfs_mdt_bgl_lock(inode, group)); + spin_lock(lock); nfree = le32_to_cpu(desc->pg_nfrees); - spin_unlock(nilfs_mdt_bgl_lock(inode, group)); + spin_unlock(lock); return nfree; } /** * nilfs_palloc_group_desc_add_entries - adjust count of free entries - * @inode: inode of metadata file using this allocator - * @group: group number * @desc: pointer to descriptor structure for the group + * @lock: spin lock protecting @desc * @n: delta to be added + * + * Return: Number of free entries after adjusting the group descriptor + * @desc. */ -static void -nilfs_palloc_group_desc_add_entries(struct inode *inode, - unsigned long group, - struct nilfs_palloc_group_desc *desc, - u32 n) +static u32 +nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, + spinlock_t *lock, u32 n) { - spin_lock(nilfs_mdt_bgl_lock(inode, group)); + u32 nfree; + + spin_lock(lock); le32_add_cpu(&desc->pg_nfrees, n); - spin_unlock(nilfs_mdt_bgl_lock(inode, group)); + nfree = le32_to_cpu(desc->pg_nfrees); + spin_unlock(lock); + return nfree; } /** * nilfs_palloc_entry_blkoff - get block offset of an entry block * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) + * + * Return: Index number in the metadata file of the block containing + * the entry specified by @nr. */ static unsigned long nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) @@ -187,12 +197,14 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block * @inode: inode of metadata file * @bh: buffer head of the buffer to be initialized - * @kaddr: kernel address mapped for the page including the buffer + * @from: kernel address mapped for a chunk of the block + * + * This function does not yet support the case where block size > PAGE_SIZE. */ static void nilfs_palloc_desc_block_init(struct inode *inode, - struct buffer_head *bh, void *kaddr) + struct buffer_head *bh, void *from) { - struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh); + struct nilfs_palloc_group_desc *desc = from; unsigned long n = nilfs_palloc_groups_per_desc_block(inode); __le32 nfrees; @@ -215,7 +227,8 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, int ret; spin_lock(lock); - if (prev->bh && blkoff == prev->blkoff) { + if (prev->bh && blkoff == prev->blkoff && + likely(buffer_uptodate(prev->bh))) { get_bh(prev->bh); *bhp = prev->bh; spin_unlock(lock); @@ -240,11 +253,39 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, } /** + * nilfs_palloc_delete_block - delete a block on the persistent allocator file + * @inode: inode of metadata file using this allocator + * @blkoff: block offset + * @prev: nilfs_bh_assoc struct of the last used buffer + * @lock: spin lock protecting @prev + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Non-existent block. + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, + struct nilfs_bh_assoc *prev, + spinlock_t *lock) +{ + spin_lock(lock); + if (prev->bh && blkoff == prev->blkoff) { + brelse(prev->bh); + prev->bh = NULL; + } + spin_unlock(lock); + return nilfs_mdt_delete_block(inode, blkoff); +} + +/** * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block * @inode: inode of metadata file using this allocator * @group: group number * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_get_desc_block(struct inode *inode, unsigned long group, @@ -264,6 +305,8 @@ static int nilfs_palloc_get_desc_block(struct inode *inode, * @group: group number * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_get_bitmap_block(struct inode *inode, unsigned long group, @@ -278,11 +321,31 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode, } /** + * nilfs_palloc_delete_bitmap_block - delete a bitmap block + * @inode: inode of metadata file using this allocator + * @group: group number + * + * Return: 0 on success, or a negative error code on failure. + */ +static int nilfs_palloc_delete_bitmap_block(struct inode *inode, + unsigned long group) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_delete_block(inode, + nilfs_palloc_bitmap_blkoff(inode, + group), + &cache->prev_bitmap, &cache->lock); +} + +/** * nilfs_palloc_get_entry_block - get buffer head of an entry block * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) * @create: create flag * @bhp: pointer to store the resultant buffer head + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, int create, struct buffer_head **bhp) @@ -296,87 +359,115 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, } /** - * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor + * nilfs_palloc_delete_entry_block - delete an entry block + * @inode: inode of metadata file using this allocator + * @nr: serial number of the entry + * + * Return: 0 on success, or a negative error code on failure. + */ +static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_delete_block(inode, + nilfs_palloc_entry_blkoff(inode, nr), + &cache->prev_entry, &cache->lock); +} + +/** + * nilfs_palloc_group_desc_offset - calculate the byte offset of a group + * descriptor in the folio containing it * @inode: inode of metadata file using this allocator * @group: group number - * @bh: buffer head of the buffer storing the group descriptor block - * @kaddr: kernel address mapped for the page including the buffer + * @bh: buffer head of the group descriptor block + * + * Return: Byte offset in the folio of the group descriptor for @group. */ -static struct nilfs_palloc_group_desc * -nilfs_palloc_block_get_group_desc(const struct inode *inode, - unsigned long group, - const struct buffer_head *bh, void *kaddr) +static size_t nilfs_palloc_group_desc_offset(const struct inode *inode, + unsigned long group, + const struct buffer_head *bh) { - return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) + - group % nilfs_palloc_groups_per_desc_block(inode); + return offset_in_folio(bh->b_folio, bh->b_data) + + sizeof(struct nilfs_palloc_group_desc) * + (group % nilfs_palloc_groups_per_desc_block(inode)); } /** - * nilfs_palloc_block_get_entry - get kernel address of an entry + * nilfs_palloc_bitmap_offset - calculate the byte offset of a bitmap block + * in the folio containing it + * @bh: buffer head of the bitmap block + * + * Return: Byte offset in the folio of the bitmap block for @bh. + */ +static size_t nilfs_palloc_bitmap_offset(const struct buffer_head *bh) +{ + return offset_in_folio(bh->b_folio, bh->b_data); +} + +/** + * nilfs_palloc_entry_offset - calculate the byte offset of an entry in the + * folio containing it * @inode: inode of metadata file using this allocator - * @nr: serial number of the entry (e.g. inode number) - * @bh: buffer head of the buffer storing the entry block - * @kaddr: kernel address mapped for the page including the buffer + * @nr: serial number of the entry (e.g. inode number) + * @bh: buffer head of the entry block + * + * Return: Byte offset in the folio of the entry @nr. */ -void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, - const struct buffer_head *bh, void *kaddr) +size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr, + const struct buffer_head *bh) { - unsigned long entry_offset, group_offset; + unsigned long entry_index_in_group, entry_index_in_block; - nilfs_palloc_group(inode, nr, &group_offset); - entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block; + nilfs_palloc_group(inode, nr, &entry_index_in_group); + entry_index_in_block = entry_index_in_group % + NILFS_MDT(inode)->mi_entries_per_block; - return kaddr + bh_offset(bh) + - entry_offset * NILFS_MDT(inode)->mi_entry_size; + return offset_in_folio(bh->b_folio, bh->b_data) + + entry_index_in_block * NILFS_MDT(inode)->mi_entry_size; } /** * nilfs_palloc_find_available_slot - find available slot in a group - * @inode: inode of metadata file using this allocator - * @group: group number - * @target: offset number of an entry in the group (start point) * @bitmap: bitmap of the group + * @target: offset number of an entry in the group (start point) * @bsize: size in bits + * @lock: spin lock protecting @bitmap + * @wrap: whether to wrap around + * + * Return: Offset number within the group of the found free entry, or + * %-ENOSPC if not found. */ -static int nilfs_palloc_find_available_slot(struct inode *inode, - unsigned long group, +static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, - unsigned char *bitmap, - int bsize) -{ - int curr, pos, end, i; - - if (target > 0) { - end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1); - if (end > bsize) - end = bsize; - pos = nilfs_find_next_zero_bit(bitmap, end, target); - if (pos < end && - !nilfs_set_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), pos, bitmap)) - return pos; - } else - end = 0; - - for (i = 0, curr = end; - i < bsize; - i += BITS_PER_LONG, curr += BITS_PER_LONG) { - /* wrap around */ - if (curr >= bsize) - curr = 0; - while (*((unsigned long *)bitmap + curr / BITS_PER_LONG) - != ~0UL) { - end = curr + BITS_PER_LONG; - if (end > bsize) - end = bsize; - pos = nilfs_find_next_zero_bit(bitmap, end, curr); - if ((pos < end) && - !nilfs_set_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), pos, - bitmap)) + unsigned int bsize, + spinlock_t *lock, bool wrap) +{ + int pos, end = bsize; + + if (likely(target < bsize)) { + pos = target; + do { + pos = nilfs_find_next_zero_bit(bitmap, end, pos); + if (pos >= end) + break; + if (!nilfs_set_bit_atomic(lock, pos, bitmap)) return pos; - } + } while (++pos < end); + + end = target; + } + if (!wrap) + return -ENOSPC; + + /* wrap around */ + for (pos = 0; pos < end; pos++) { + pos = nilfs_find_next_zero_bit(bitmap, end, pos); + if (pos >= end) + break; + if (!nilfs_set_bit_atomic(lock, pos, bitmap)) + return pos; } + return -ENOSPC; } @@ -386,6 +477,9 @@ static int nilfs_palloc_find_available_slot(struct inode *inode, * @inode: inode of metadata file using this allocator * @curr: current group number * @max: maximum number of groups + * + * Return: Number of remaining descriptors (= groups) managed by the descriptor + * block. */ static unsigned long nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, @@ -401,17 +495,20 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, * nilfs_palloc_count_desc_blocks - count descriptor blocks number * @inode: inode of metadata file using this allocator * @desc_blocks: descriptor blocks number [out] + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_palloc_count_desc_blocks(struct inode *inode, unsigned long *desc_blocks) { - unsigned long blknum; + __u64 blknum; int ret; ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); if (likely(!ret)) *desc_blocks = DIV_ROUND_UP( - blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); + (unsigned long)blknum, + NILFS_MDT(inode)->mi_blocks_per_desc_block); return ret; } @@ -420,6 +517,8 @@ static int nilfs_palloc_count_desc_blocks(struct inode *inode, * MDT file growing * @inode: inode of metadata file using this allocator * @desc_blocks: known current descriptor blocks count + * + * Return: true if a group can be added in the metadata file, false if not. */ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, unsigned long desc_blocks) @@ -434,6 +533,12 @@ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, * @inode: inode of metadata file using this allocator * @nused: current number of used entries * @nmaxp: max number of entries [out] + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ERANGE - Number of entries in use is out of range. */ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) { @@ -464,28 +569,36 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation + * @wrap: whether to wrap around + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - Entries exhausted (No entries available for allocation). + * * %-EROFS - Read only filesystem */ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, - struct nilfs_palloc_req *req) + struct nilfs_palloc_req *req, bool wrap) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned long group, maxgroup, ngroups; unsigned long group_offset, maxgroup_offset; - unsigned long n, entries_per_group, groups_per_desc_block; + unsigned long n, entries_per_group; unsigned long i, j; + spinlock_t *lock; int pos, ret; ngroups = nilfs_palloc_groups_count(inode); maxgroup = ngroups - 1; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); entries_per_group = nilfs_palloc_entries_per_group(inode); - groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode); for (i = 0; i < ngroups; i += n) { - if (group >= ngroups) { + if (group >= ngroups && wrap) { /* wrap around */ group = 0; maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, @@ -494,54 +607,64 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); if (ret < 0) return ret; - desc_kaddr = kmap(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); + + doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh); + desc = kmap_local_folio(desc_bh->b_folio, doff); n = nilfs_palloc_rest_groups_in_desc_block(inode, group, maxgroup); - for (j = 0; j < n; j++, desc++, group++) { - if (nilfs_palloc_group_desc_nfrees(inode, group, desc) - > 0) { - ret = nilfs_palloc_get_bitmap_block( - inode, group, 1, &bitmap_bh); - if (ret < 0) - goto out_desc; - bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(bitmap_bh); - pos = nilfs_palloc_find_available_slot( - inode, group, group_offset, bitmap, - entries_per_group); - if (pos >= 0) { - /* found a free entry */ - nilfs_palloc_group_desc_add_entries( - inode, group, desc, -1); - req->pr_entry_nr = - entries_per_group * group + pos; - kunmap(desc_bh->b_page); - kunmap(bitmap_bh->b_page); - - req->pr_desc_bh = desc_bh; - req->pr_bitmap_bh = bitmap_bh; - return 0; - } - kunmap(bitmap_bh->b_page); - brelse(bitmap_bh); + for (j = 0; j < n; j++, group++, group_offset = 0) { + lock = nilfs_mdt_bgl_lock(inode, group); + if (nilfs_palloc_group_desc_nfrees(&desc[j], lock) == 0) + continue; + + kunmap_local(desc); + ret = nilfs_palloc_get_bitmap_block(inode, group, 1, + &bitmap_bh); + if (unlikely(ret < 0)) { + brelse(desc_bh); + return ret; } - group_offset = 0; + /* + * Re-kmap the folio containing the first (and + * subsequent) group descriptors. + */ + desc = kmap_local_folio(desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(bitmap_bh); + bitmap = kmap_local_folio(bitmap_bh->b_folio, boff); + pos = nilfs_palloc_find_available_slot( + bitmap, group_offset, entries_per_group, lock, + wrap); + /* + * Since the search for a free slot in the second and + * subsequent bitmap blocks always starts from the + * beginning, the wrap flag only has an effect on the + * first search. + */ + kunmap_local(bitmap); + if (pos >= 0) + goto found; + + brelse(bitmap_bh); } - kunmap(desc_bh->b_page); + kunmap_local(desc); brelse(desc_bh); } /* no entries left */ return -ENOSPC; - out_desc: - kunmap(desc_bh->b_page); - brelse(desc_bh); - return ret; +found: + /* found a free entry */ + nilfs_palloc_group_desc_add_entries(&desc[j], lock, -1); + req->pr_entry_nr = entries_per_group * group + pos; + kunmap_local(desc); + + req->pr_desc_bh = desc_bh; + req->pr_bitmap_bh = bitmap_bh; + return 0; } /** @@ -568,27 +691,30 @@ void nilfs_palloc_commit_alloc_entry(struct inode *inode, void nilfs_palloc_commit_free_entry(struct inode *inode, struct nilfs_palloc_req *req) { - struct nilfs_palloc_group_desc *desc; unsigned long group, group_offset; + size_t doff, boff; + struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; + spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap(req->pr_desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc(inode, group, - req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); - - if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) - printk(KERN_WARNING "%s: entry number %llu already freed\n", - __func__, (unsigned long long)req->pr_entry_nr); + doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh); + desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh); + bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff); + lock = nilfs_mdt_bgl_lock(inode, group); + + if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)req->pr_entry_nr); else - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap(req->pr_bitmap_bh->b_page); - kunmap(req->pr_desc_bh->b_page); + kunmap_local(bitmap); + kunmap_local(desc); mark_buffer_dirty(req->pr_desc_bh); mark_buffer_dirty(req->pr_bitmap_bh); @@ -607,25 +733,29 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { struct nilfs_palloc_group_desc *desc; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned char *bitmap; unsigned long group, group_offset; + spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap(req->pr_desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc(inode, group, - req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); - if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) - printk(KERN_WARNING "%s: entry number %llu already freed\n", - __func__, (unsigned long long)req->pr_entry_nr); + doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh); + desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff); + + boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh); + bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff); + lock = nilfs_mdt_bgl_lock(inode, group); + + if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)req->pr_entry_nr); else - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap(req->pr_bitmap_bh->b_page); - kunmap(req->pr_desc_bh->b_page); + kunmap_local(bitmap); + kunmap_local(desc); brelse(req->pr_bitmap_bh); brelse(req->pr_desc_bh); @@ -639,6 +769,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the removal + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_prepare_free_entry(struct inode *inode, struct nilfs_palloc_req *req) @@ -679,37 +811,32 @@ void nilfs_palloc_abort_free_entry(struct inode *inode, } /** - * nilfs_palloc_group_is_in - judge if an entry is in a group - * @inode: inode of metadata file using this allocator - * @group: group number - * @nr: serial number of the entry (e.g. inode number) - */ -static int -nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) -{ - __u64 first, last; - - first = group * nilfs_palloc_entries_per_group(inode); - last = first + nilfs_palloc_entries_per_group(inode) - 1; - return (nr >= first) && (nr <= last); -} - -/** * nilfs_palloc_freev - deallocate a set of persistent objects * @inode: inode of metadata file using this allocator * @entry_nrs: array of entry numbers to be deallocated * @nitems: number of entries stored in @entry_nrs + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; unsigned char *bitmap; - void *desc_kaddr, *bitmap_kaddr; + size_t doff, boff; unsigned long group, group_offset; - int i, j, n, ret; + __u64 group_min_nr, last_nrs[8]; + const unsigned long epg = nilfs_palloc_entries_per_group(inode); + const unsigned int epb = NILFS_MDT(inode)->mi_entries_per_block; + unsigned int entry_start, end, pos; + spinlock_t *lock; + int i, j, k, ret; + u32 nfree; for (i = 0; i < nitems; i = j) { + int change_group = false; + int nempties = 0, n = 0; + group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); if (ret < 0) @@ -720,38 +847,85 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) brelse(desc_bh); return ret; } - desc_kaddr = kmap(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); - bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(bitmap_bh); - for (j = i, n = 0; - (j < nitems) && nilfs_palloc_group_is_in(inode, group, - entry_nrs[j]); - j++) { - nilfs_palloc_group(inode, entry_nrs[j], &group_offset); - if (!nilfs_clear_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) { - printk(KERN_WARNING - "%s: entry number %llu already freed\n", - __func__, - (unsigned long long)entry_nrs[j]); + + /* Get the first entry number of the group */ + group_min_nr = (__u64)group * epg; + + boff = nilfs_palloc_bitmap_offset(bitmap_bh); + bitmap = kmap_local_folio(bitmap_bh->b_folio, boff); + lock = nilfs_mdt_bgl_lock(inode, group); + + j = i; + entry_start = rounddown(group_offset, epb); + do { + if (!nilfs_clear_bit_atomic(lock, group_offset, + bitmap)) { + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)entry_nrs[j]); } else { n++; } - } - nilfs_palloc_group_desc_add_entries(inode, group, desc, n); - kunmap(bitmap_bh->b_page); - kunmap(desc_bh->b_page); + j++; + if (j >= nitems || entry_nrs[j] < group_min_nr || + entry_nrs[j] >= group_min_nr + epg) { + change_group = true; + } else { + group_offset = entry_nrs[j] - group_min_nr; + if (group_offset >= entry_start && + group_offset < entry_start + epb) { + /* This entry is in the same block */ + continue; + } + } + + /* Test if the entry block is empty or not */ + end = entry_start + epb; + pos = nilfs_find_next_bit(bitmap, end, entry_start); + if (pos >= end) { + last_nrs[nempties++] = entry_nrs[j - 1]; + if (nempties >= ARRAY_SIZE(last_nrs)) + break; + } + + if (change_group) + break; - mark_buffer_dirty(desc_bh); - mark_buffer_dirty(bitmap_bh); - nilfs_mdt_mark_dirty(inode); + /* Go on to the next entry block */ + entry_start = rounddown(group_offset, epb); + } while (true); + kunmap_local(bitmap); + mark_buffer_dirty(bitmap_bh); brelse(bitmap_bh); + + for (k = 0; k < nempties; k++) { + ret = nilfs_palloc_delete_entry_block(inode, + last_nrs[k]); + if (ret && ret != -ENOENT) + nilfs_warn(inode->i_sb, + "error %d deleting block that object (entry=%llu, ino=%lu) belongs to", + ret, (unsigned long long)last_nrs[k], + inode->i_ino); + } + + doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh); + desc = kmap_local_folio(desc_bh->b_folio, doff); + nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n); + kunmap_local(desc); + mark_buffer_dirty(desc_bh); + nilfs_mdt_mark_dirty(inode); brelse(desc_bh); + + if (nfree == nilfs_palloc_entries_per_group(inode)) { + ret = nilfs_palloc_delete_bitmap_block(inode, group); + if (ret && ret != -ENOENT) + nilfs_warn(inode->i_sb, + "error %d deleting bitmap block of group=%lu, ino=%lu", + ret, group, inode->i_ino); + } } return 0; } diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 4bd6451b5703..046d876ea3e0 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -1,25 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator + * Persistent object (dat entry/disk inode) allocator/deallocator * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Original code was written by Koji Sato <koji@osrg.net>. - * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>, - * Amagai Yoshiji <amagai@osrg.net>. + * Originally written by Koji Sato. + * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji. */ #ifndef _NILFS_ALLOC_H @@ -35,6 +21,8 @@ * * The number of entries per group is defined by the number of bits * that a bitmap block can maintain. + * + * Return: Number of entries per group. */ static inline unsigned long nilfs_palloc_entries_per_group(const struct inode *inode) @@ -42,16 +30,16 @@ nilfs_palloc_entries_per_group(const struct inode *inode) return 1UL << (inode->i_blkbits + 3 /* log2(8 = CHAR_BITS) */); } -int nilfs_palloc_init_blockgroup(struct inode *, unsigned); +int nilfs_palloc_init_blockgroup(struct inode *, unsigned int); int nilfs_palloc_get_entry_block(struct inode *, __u64, int, struct buffer_head **); -void *nilfs_palloc_block_get_entry(const struct inode *, __u64, - const struct buffer_head *, void *); +size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr, + const struct buffer_head *bh); int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); /** - * nilfs_palloc_req - persistent allocator request and reply + * struct nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap @@ -64,8 +52,8 @@ struct nilfs_palloc_req { struct buffer_head *pr_entry_bh; }; -int nilfs_palloc_prepare_alloc_entry(struct inode *, - struct nilfs_palloc_req *); +int nilfs_palloc_prepare_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req, bool wrap); void nilfs_palloc_commit_alloc_entry(struct inode *, struct nilfs_palloc_req *); void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); @@ -77,6 +65,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_set_bit_atomic ext2_set_bit_atomic #define nilfs_clear_bit_atomic ext2_clear_bit_atomic #define nilfs_find_next_zero_bit find_next_zero_bit_le +#define nilfs_find_next_bit find_next_bit_le /** * struct nilfs_bh_assoc - block offset and buffer head association diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index aadbd0b5e3e8..ccc1a7aa52d2 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * bmap.c - NILFS block mapping. + * NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #include <linux/fs.h> @@ -45,8 +32,8 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, struct inode *inode = bmap->b_inode; if (err == -EINVAL) { - nilfs_error(inode->i_sb, fname, - "broken bmap (inode number=%lu)\n", inode->i_ino); + __nilfs_error(inode->i_sb, fname, + "broken bmap (inode number=%lu)", inode->i_ino); err = -EIO; } return err; @@ -60,17 +47,14 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, * @ptrp: place to store the value associated to @key * * Description: nilfs_bmap_lookup_at_level() finds a record whose key - * matches @key in the block at @level of the bmap. - * - * Return Value: On success, 0 is returned and the record associated with @key - * is stored in the place pointed by @ptrp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A record associated with @key does not exist. + * matches @key in the block at @level of the bmap. The record associated + * with @key is stored in the place pointed to by @ptrp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A record associated with @key does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, __u64 *ptrp) @@ -80,24 +64,32 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); - if (ret < 0) { - ret = nilfs_bmap_convert_error(bmap, __func__, ret); + if (ret < 0) goto out; - } + if (NILFS_BMAP_USE_VBN(bmap)) { ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, &blocknr); if (!ret) *ptrp = blocknr; + else if (ret == -ENOENT) { + /* + * If there was no valid entry in DAT for the block + * address obtained by b_ops->bop_lookup, then pass + * internal code -EINVAL to nilfs_bmap_convert_error + * to treat it as metadata corruption. + */ + ret = -EINVAL; + } } out: up_read(&bmap->b_sem); - return ret; + return nilfs_bmap_convert_error(bmap, __func__, ret); } int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, - unsigned maxblocks) + unsigned int maxblocks) { int ret; @@ -143,18 +135,13 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) * Description: nilfs_bmap_insert() inserts the new key-record pair specified * by @key and @rec into @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EEXIST - A record associated with @key already exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - A record associated with @key already exists. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ -int nilfs_bmap_insert(struct nilfs_bmap *bmap, - unsigned long key, - unsigned long rec) +int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec) { int ret; @@ -191,19 +178,44 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) return bmap->b_ops->bop_delete(bmap, key); } -int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) +/** + * nilfs_bmap_seek_key - seek a valid entry and return its key + * @bmap: bmap struct + * @start: start key number + * @keyp: place to store valid key + * + * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap + * starting from @start, and stores it to @keyp if found. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid entry was found. + * * %-ENOMEM - Insufficient memory available. + */ +int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp) { - __u64 lastkey; int ret; down_read(&bmap->b_sem); - ret = bmap->b_ops->bop_last_key(bmap, &lastkey); + ret = bmap->b_ops->bop_seek_key(bmap, start, keyp); + up_read(&bmap->b_sem); + + if (ret < 0) + ret = nilfs_bmap_convert_error(bmap, __func__, ret); + return ret; +} + +int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp) +{ + int ret; + + down_read(&bmap->b_sem); + ret = bmap->b_ops->bop_last_key(bmap, keyp); up_read(&bmap->b_sem); if (ret < 0) ret = nilfs_bmap_convert_error(bmap, __func__, ret); - else - *key = lastkey; return ret; } @@ -215,16 +227,13 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) * Description: nilfs_bmap_delete() deletes the key-record pair specified by * @key from @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A record associated with @key does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A record associated with @key does not exist. + * * %-ENOMEM - Insufficient memory available. */ -int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) +int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key) { int ret; @@ -235,7 +244,7 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) return nilfs_bmap_convert_error(bmap, __func__, ret); } -static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) +static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key) { __u64 lastkey; int ret; @@ -269,14 +278,12 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are * greater than or equal to @key from @bmap. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ -int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) +int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key) { int ret; @@ -309,12 +316,10 @@ void nilfs_bmap_clear(struct nilfs_bmap *bmap) * Description: nilfs_bmap_propagate() marks the buffers that directly or * indirectly refer to the block specified by @bh dirty. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) { @@ -328,7 +333,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) } /** - * nilfs_bmap_lookup_dirty_buffers - + * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers * @bmap: bmap * @listp: pointer to buffer head list */ @@ -341,22 +346,22 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, /** * nilfs_bmap_assign - assign a new block number to a block - * @bmap: bmap - * @bhp: pointer to buffer head + * @bmap: bmap + * @bh: place to store a pointer to the buffer head to which a block + * address is assigned (in/out) * @blocknr: block number - * @binfo: block information + * @binfo: block information * * Description: nilfs_bmap_assign() assigns the block number @blocknr to the - * buffer specified by @bh. - * - * Return Value: On success, 0 is returned and the buffer head of a newly - * create buffer and the block information associated with the buffer are - * stored in the place pointed by @bh and @binfo, respectively. On error, one - * of the following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * buffer specified by @bh. The block information is stored in the memory + * pointed to by @binfo, and the buffer head may be replaced as a block + * address is assigned, in which case a pointer to the new buffer head is + * stored in the memory pointed to by @bh. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_assign(struct nilfs_bmap *bmap, struct buffer_head **bh, @@ -381,12 +386,10 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap, * Description: nilfs_bmap_mark() marks the block specified by @key and @level * as dirty. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) { @@ -409,7 +412,7 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) * Description: nilfs_test_and_clear() is the atomic operation to test and * clear the dirty state of @bmap. * - * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. + * Return: 1 if @bmap is dirty, or 0 if clear. */ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) { @@ -429,15 +432,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { - struct buffer_head *pbh; - __u64 key; - - key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - - bmap->b_inode->i_blkbits); - for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) - key++; + loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh); - return key; + return pos >> bmap->b_inode->i_blkbits; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) @@ -475,10 +472,10 @@ static struct lock_class_key nilfs_bmap_mdt_lock_key; * * Description: nilfs_bmap_read() initializes the bmap @bmap. * - * Return Value: On success, 0 is returned. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (corrupted bmap). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { @@ -506,7 +503,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) break; case NILFS_IFILE_INO: lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); - /* Fall through */ + fallthrough; default: bmap->b_ptr_type = NILFS_BMAP_PTR_VM; bmap->b_last_allocated_key = 0; @@ -527,13 +524,10 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) */ void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { - down_write(&bmap->b_sem); memcpy(raw_inode->i_bmap, bmap->b_u.u_data, NILFS_INODE_BMAP_SIZE * sizeof(__le64)); if (bmap->b_inode->i_ino == NILFS_DAT_INO) bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; - - up_write(&bmap->b_sem); } void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index b89e68076adc..4656df392722 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * bmap.h - NILFS block mapping. + * NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #ifndef _NILFS_BMAP_H @@ -26,7 +13,7 @@ #include <linux/types.h> #include <linux/fs.h> #include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> +#include <linux/nilfs2_ondisk.h> /* nilfs_binfo, nilfs_inode, etc */ #include "alloc.h" #include "dat.h" @@ -57,11 +44,24 @@ struct nilfs_bmap_stats { /** * struct nilfs_bmap_operations - bmap operation table + * @bop_lookup: single block search operation + * @bop_lookup_contig: consecutive block search operation + * @bop_insert: block insertion operation + * @bop_delete: block delete operation + * @bop_clear: block mapping resource release operation + * @bop_propagate: operation to propagate dirty state towards the + * mapping root + * @bop_lookup_dirty_buffers: operation to collect dirty block buffers + * @bop_assign: disk block address assignment operation + * @bop_mark: operation to mark in-use blocks as dirty for + * relocation by GC + * @bop_seek_key: find valid block key operation + * @bop_last_key: find last valid block key operation */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *, - unsigned); + unsigned int); int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); int (*bop_delete)(struct nilfs_bmap *, __u64); void (*bop_clear)(struct nilfs_bmap *); @@ -76,8 +76,10 @@ struct nilfs_bmap_operations { union nilfs_binfo *); int (*bop_mark)(struct nilfs_bmap *, __u64, int); - /* The following functions are internal use only. */ + int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *); int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); + + /* private: internal use only */ int (*bop_check_insert)(const struct nilfs_bmap *, __u64); int (*bop_check_delete)(struct nilfs_bmap *, __u64); int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); @@ -85,9 +87,8 @@ struct nilfs_bmap_operations { #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) -#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) -#define NILFS_BMAP_NEW_PTR_INIT \ - (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) +#define NILFS_BMAP_KEY_BIT BITS_PER_LONG +#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1)) static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) { @@ -124,10 +125,14 @@ struct nilfs_bmap { /* pointer type */ #define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */ -#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single - version) */ -#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple - versions) */ +#define NILFS_BMAP_PTR_VS 1 /* + * virtual block number (single + * version) + */ +#define NILFS_BMAP_PTR_VM 2 /* + * virtual block number (has multiple + * versions) + */ #define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */ #define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0) @@ -152,11 +157,12 @@ struct nilfs_bmap_store { int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); -int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); -int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); -int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); -int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); -int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long); +int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned int); +int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec); +int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key); +int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp); +int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp); +int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key); void nilfs_bmap_clear(struct nilfs_bmap *); int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *); void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *); diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index a35ae35e6932..568367129092 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -1,25 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * btnode.c - NILFS B-tree node cache + * NILFS B-tree node cache * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * This file was originally written by Seiji Kihara <kihara@osrg.net> - * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for - * stabilization and simplification. + * Originally written by Seiji Kihara. + * Fully revised by Ryusuke Konishi for stabilization and simplification. * */ @@ -34,6 +20,24 @@ #include "page.h" #include "btnode.h" + +/** + * nilfs_init_btnc_inode - initialize B-tree node cache inode + * @btnc_inode: inode to be initialized + * + * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. + */ +void nilfs_init_btnc_inode(struct inode *btnc_inode) +{ + struct nilfs_inode_info *ii = NILFS_I(btnc_inode); + + btnc_inode->i_mode = S_IFREG; + ii->i_flags = 0; + memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); + btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; +} + void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -43,44 +47,58 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) struct buffer_head * nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) { - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct buffer_head *bh; - bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) - return NULL; + return ERR_PTR(-ENOMEM); if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { - brelse(bh); - BUG(); + /* + * The block buffer at the specified new address was already + * in use. This can happen if it is a virtual block number + * and has been reallocated due to corruption of the bitmap + * used to manage its allocation state (if not, the buffer + * clearing of an abandoned b-tree node is missing somewhere). + */ + nilfs_error(inode->i_sb, + "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)", + (unsigned long long)blocknr, inode->i_ino); + goto failed; } - memset(bh->b_data, 0, 1 << inode->i_blkbits); - bh->b_bdev = inode->i_sb->s_bdev; + memset(bh->b_data, 0, i_blocksize(inode)); bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); - unlock_page(bh->b_page); - page_cache_release(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); return bh; + +failed: + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + brelse(bh); + return ERR_PTR(-EIO); } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, int mode, + sector_t pblocknr, blk_opf_t opf, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; - struct inode *inode = NILFS_BTNC_I(btnc); - struct page *page; + struct inode *inode = btnc->host; + struct folio *folio; int err; - bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) return -ENOMEM; err = -EEXIST; /* internal code */ - page = bh->b_page; + folio = bh->b_folio; if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -100,13 +118,13 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, } } - if (mode == READA) { + if (opf & REQ_RAHEAD) { if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { err = -EBUSY; /* internal code */ brelse(bh); goto out_locked; } - } else { /* mode == READ */ + } else { /* opf == REQ_OP_READ */ lock_buffer(bh); } if (buffer_uptodate(bh)) { @@ -115,11 +133,10 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, goto found; } set_buffer_mapped(bh); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, bh); + submit_bh(opf, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ *submit_ptr = pblocknr; err = 0; @@ -127,8 +144,8 @@ found: *pbh = bh; out_locked: - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); return err; } @@ -142,36 +159,58 @@ out_locked: void nilfs_btnode_delete(struct buffer_head *bh) { struct address_space *mapping; - struct page *page = bh->b_page; - pgoff_t index = page_index(page); + struct folio *folio = bh->b_folio; + pgoff_t index = folio->index; int still_dirty; - page_cache_get(page); - lock_page(page); - wait_on_page_writeback(page); + folio_get(folio); + folio_lock(folio); + folio_wait_writeback(folio); nilfs_forget_buffer(bh); - still_dirty = PageDirty(page); - mapping = page->mapping; - unlock_page(page); - page_cache_release(page); + still_dirty = folio_test_dirty(folio); + mapping = folio->mapping; + folio_unlock(folio); + folio_put(folio); if (!still_dirty && mapping) invalidate_inode_pages2_range(mapping, index, index); } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh, *nbh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; int err; @@ -181,89 +220,94 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, obh = ctxt->bh; ctxt->newbh = NULL; - if (inode->i_blkbits == PAGE_CACHE_SHIFT) { - lock_page(obh->b_page); - /* - * We cannot call radix_tree_preload for the kernels older - * than 2.6.23, because it is not exported for modules. - */ + if (inode->i_blkbits == PAGE_SHIFT) { + struct folio *ofolio = obh->b_folio; + folio_lock(ofolio); retry: - err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); - if (err) - goto failed_unlock; - /* BUG_ON(oldkey != obh->b_page->index); */ - if (unlikely(oldkey != obh->b_page->index)) - NILFS_PAGE_BUG(obh->b_page, + /* BUG_ON(oldkey != obh->b_folio->index); */ + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); - spin_lock_irq(&btnc->tree_lock); - err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); - spin_unlock_irq(&btnc->tree_lock); + xa_lock_irq(&btnc->i_pages); + err = __xa_insert(&btnc->i_pages, newkey, ofolio, GFP_NOFS); + xa_unlock_irq(&btnc->i_pages); /* - * Note: page->index will not change to newkey until + * Note: folio->index will not change to newkey until * nilfs_btnode_commit_change_key() will be called. - * To protect the page in intermediate state, the page lock + * To protect the folio in intermediate state, the folio lock * is held. */ - radix_tree_preload_end(); if (!err) return 0; - else if (err != -EEXIST) + else if (err != -EBUSY) goto failed_unlock; err = invalidate_inode_pages2_range(btnc, newkey, newkey); if (!err) goto retry; /* fallback to copy mode */ - unlock_page(obh->b_page); + folio_unlock(ofolio); } nbh = nilfs_btnode_create_block(btnc, newkey); - if (!nbh) - return -ENOMEM; + if (IS_ERR(nbh)) + return PTR_ERR(nbh); BUG_ON(nbh == obh); ctxt->newbh = nbh; return 0; failed_unlock: - unlock_page(obh->b_page); + folio_unlock(obh->b_folio); return err; } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; - struct page *opage; + struct folio *ofolio; if (oldkey == newkey) return; if (nbh == NULL) { /* blocksize == pagesize */ - opage = obh->b_page; - if (unlikely(oldkey != opage->index)) - NILFS_PAGE_BUG(opage, + ofolio = obh->b_folio; + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); mark_buffer_dirty(obh); - spin_lock_irq(&btnc->tree_lock); - radix_tree_delete(&btnc->page_tree, oldkey); - radix_tree_tag_set(&btnc->page_tree, newkey, - PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&btnc->tree_lock); + xa_lock_irq(&btnc->i_pages); + __xa_erase(&btnc->i_pages, oldkey); + __xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY); + xa_unlock_irq(&btnc->i_pages); - opage->index = obh->b_blocknr = newkey; - unlock_page(opage); + ofolio->index = obh->b_blocknr = newkey; + folio_unlock(ofolio); } else { nilfs_copy_buffer(nbh, obh); mark_buffer_dirty(nbh); @@ -275,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -288,10 +343,16 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, return; if (nbh == NULL) { /* blocksize == pagesize */ - spin_lock_irq(&btnc->tree_lock); - radix_tree_delete(&btnc->page_tree, newkey); - spin_unlock_irq(&btnc->tree_lock); - unlock_page(ctxt->bh->b_page); - } else - brelse(nbh); + xa_erase_irq(&btnc->i_pages, newkey); + folio_unlock(ctxt->bh->b_folio); + } else { + /* + * When canceling a buffer that a prepare operation has + * allocated to copy a node block to another location, use + * nilfs_btnode_delete() to initialize and release the buffer + * so that the buffer flags will not be in an inconsistent + * state when it is reallocated. + */ + nilfs_btnode_delete(nbh); + } } diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index d876b565ce64..4bc5612dff94 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -1,24 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * btnode.h - NILFS B-tree node cache + * NILFS B-tree node cache * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Seiji Kihara <kihara@osrg.net> - * Revised by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Seiji Kihara. + * Revised by Ryusuke Konishi. */ #ifndef _NILFS_BTNODE_H @@ -43,11 +30,12 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; +void nilfs_init_btnc_inode(struct inode *btnc_inode); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); -int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int, - struct buffer_head **, sector_t *); +int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, + blk_opf_t, struct buffer_head **, sector_t *); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b2e3ff347620..dd0c8e560ef6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * btree.c - NILFS B-tree. + * NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #include <linux/slab.h> @@ -31,6 +18,8 @@ #include "alloc.h" #include "dat.h" +static void __nilfs_btree_init(struct nilfs_bmap *bmap); + static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { struct nilfs_btree_path *path; @@ -69,12 +58,13 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); - if (!bh) - return -ENOMEM; + if (IS_ERR(bh)) + return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; @@ -121,7 +111,7 @@ nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) static int nilfs_btree_node_size(const struct nilfs_bmap *btree) { - return 1 << btree->b_inode->i_blkbits; + return i_blocksize(btree->b_inode); } static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) @@ -341,12 +331,14 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, * nilfs_btree_node_broken - verify consistency of btree node * @node: btree node block to be examined * @size: node size (in bytes) + * @inode: host inode of btree * @blocknr: block number * - * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + * Return: 0 if normal, 1 if the node is broken. */ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, - size_t size, sector_t blocknr) + size_t size, struct inode *inode, + sector_t blocknr) { int level, flags, nchildren; int ret = 0; @@ -358,11 +350,42 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || - nchildren < 0 || + nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { - printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): " - "level = %d, flags = 0x%x, nchildren = %d\n", - (unsigned long long)blocknr, level, flags, nchildren); + nilfs_crit(inode->i_sb, + "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", + inode->i_ino, (unsigned long long)blocknr, level, + flags, nchildren); + ret = 1; + } + return ret; +} + +/** + * nilfs_btree_root_broken - verify consistency of btree root node + * @node: btree root node to be examined + * @inode: host inode of btree + * + * Return: 0 if normal, 1 if the root node is broken. + */ +static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, + struct inode *inode) +{ + int level, flags, nchildren; + int ret = 0; + + level = nilfs_btree_node_get_level(node); + flags = nilfs_btree_node_get_flags(node); + nchildren = nilfs_btree_node_get_nchildren(node); + + if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || + level >= NILFS_BTREE_LEVEL_MAX || + nchildren < 0 || + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || + (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { + nilfs_crit(inode->i_sb, + "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", + inode->i_ino, level, flags, nchildren); ret = 1; } return ret; @@ -370,13 +393,15 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, int nilfs_btree_broken_node_block(struct buffer_head *bh) { + struct inode *inode; int ret; if (buffer_nilfs_checked(bh)) return 0; + inode = bh->b_folio->mapping->host; ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, - bh->b_size, bh->b_blocknr); + bh->b_size, inode, bh->b_blocknr); if (likely(!ret)) set_buffer_nilfs_checked(bh); return ret; @@ -422,13 +447,15 @@ nilfs_btree_get_node(const struct nilfs_bmap *btree, return node; } -static int -nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) +static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, + struct nilfs_btree_node *node, int level) { if (unlikely(nilfs_btree_node_get_level(node) != level)) { dump_stack(); - printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n", - nilfs_btree_node_get_level(node), level); + nilfs_crit(btree->b_inode->i_sb, + "btree level mismatch (ino=%lu): %d != %d", + btree->b_inode->i_ino, + nilfs_btree_node_get_level(node), level); return 1; } return 0; @@ -445,16 +472,27 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; - ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr); + ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, + &submit_ptr); if (ret) { - if (ret != -EEXIST) - return ret; - goto out_check; + if (likely(ret == -EEXIST)) + goto out_check; + if (ret == -ENOENT) { + /* + * Block address translation failed due to invalid + * value of 'ptr'. In this case, return internal code + * -EINVAL (broken bmap) to notify bmap layer of fatal + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } if (ra) { @@ -466,8 +504,9 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, n > 0 && i < ra->ncmax; n--, i++) { ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); - ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA, - &ra_bh, &submit_ptr); + ret = nilfs_btnode_submit_block(btnc, ptr2, 0, + REQ_OP_READ | REQ_RAHEAD, + &ra_bh, &submit_ptr); if (likely(!ret || ret == -EEXIST)) brelse(ra_bh); else if (ret != -EBUSY) @@ -481,6 +520,9 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, out_no_wait: if (!buffer_uptodate(bh)) { + nilfs_err(btree->b_inode->i_sb, + "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", + btree->b_inode->i_ino, (unsigned long long)ptr); brelse(bh); return -EIO; } @@ -540,7 +582,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, return ret; node = nilfs_btree_get_nonroot_node(path, level); - if (nilfs_btree_bad_node(node, level)) + if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; if (!found) found = nilfs_btree_node_lookup(node, key, &index); @@ -588,7 +630,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); - if (nilfs_btree_bad_node(node, level)) + if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; index = nilfs_btree_node_get_nchildren(node) - 1; ptr = nilfs_btree_node_get_ptr(node, index, ncmax); @@ -603,6 +645,43 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, return 0; } +/** + * nilfs_btree_get_next_key - get next valid key from btree path array + * @btree: bmap struct of btree + * @path: array of nilfs_btree_path struct + * @minlevel: start level + * @nextkey: place to store the next valid key + * + * Return: 0 if the next key was found, %-ENOENT if not found. + */ +static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, + const struct nilfs_btree_path *path, + int minlevel, __u64 *nextkey) +{ + struct nilfs_btree_node *node; + int maxlevel = nilfs_btree_height(btree) - 1; + int index, next_adj, level; + + /* Next index is already set to bp_index for leaf nodes. */ + next_adj = 0; + for (level = minlevel; level <= maxlevel; level++) { + if (level == maxlevel) + node = nilfs_btree_get_root(btree); + else + node = nilfs_btree_get_nonroot_node(path, level); + + index = path[level].bp_index + next_adj; + if (index < nilfs_btree_node_get_nchildren(node)) { + /* Next key is in this node */ + *nextkey = nilfs_btree_node_get_key(node, index); + return 0; + } + /* For non-leaf nodes, next index is stored at bp_index + 1. */ + next_adj = 1; + } + return -ENOENT; +} + static int nilfs_btree_lookup(const struct nilfs_bmap *btree, __u64 key, int level, __u64 *ptrp) { @@ -621,7 +700,8 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *btree, } static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, - __u64 key, __u64 *ptrp, unsigned maxblocks) + __u64 key, __u64 *ptrp, + unsigned int maxblocks) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; @@ -644,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr = blocknr; } cnt = 1; @@ -663,13 +743,12 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) goto end; index++; - continue; } if (level == maxlevel) break; @@ -702,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, out: nilfs_btree_free_path(path); return ret; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, @@ -851,8 +935,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - __u64 newkey; - __u64 newptr; int nchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); @@ -874,9 +956,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); - newkey = nilfs_btree_node_get_key(right, 0); - newptr = path[level].bp_newreq.bpr_ptr; - if (move) { path[level].bp_index -= nilfs_btree_node_get_nchildren(node); nilfs_btree_node_insert(right, path[level].bp_index, @@ -969,12 +1048,12 @@ static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; - else { - ptr = nilfs_btree_find_near(btree, path); - if (ptr != NILFS_BMAP_INVALID_PTR) - /* near */ - return ptr; - } + + ptr = nilfs_btree_find_near(btree, path); + if (ptr != NILFS_BMAP_INVALID_PTR) + /* near */ + return ptr; + /* block group */ return nilfs_bmap_find_target_in_group(btree); } @@ -1533,6 +1612,27 @@ out: return ret; } +static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, + __u64 *keyp) +{ + struct nilfs_btree_path *path; + const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; + int ret; + + path = nilfs_btree_alloc_path(); + if (!path) + return -ENOMEM; + + ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); + if (!ret) + *keyp = start; + else if (ret == -ENOENT) + ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); + + nilfs_btree_free_path(path); + return ret; +} + static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) { struct nilfs_btree_path *path; @@ -1558,13 +1658,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) int nchildren, ret; root = nilfs_btree_get_root(btree); + nchildren = nilfs_btree_node_get_nchildren(root); + if (unlikely(nchildren == 0)) + return 0; + switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, @@ -1573,17 +1676,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } - nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; - if (bh != NULL) - brelse(bh); + brelse(bh); return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } @@ -1631,8 +1733,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *btree, ptrs[i] = le64_to_cpu(dptrs[i]); } - if (bh != NULL) - brelse(bh); + brelse(bh); return nitems; } @@ -1657,6 +1758,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, dat = nilfs_bmap_get_dat(btree); } + ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); + if (ret < 0) + return ret; + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; @@ -1713,7 +1818,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, /* convert and insert */ dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; - nilfs_btree_init(btree); + __nilfs_btree_init(btree); if (nreq != NULL) { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); @@ -1755,19 +1860,28 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, } /** - * nilfs_btree_convert_and_insert - - * @bmap: - * @key: - * @ptr: - * @keys: - * @ptrs: - * @n: + * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree + * @btree: NILFS B-tree structure + * @key: Key of the new entry to be inserted + * @ptr: Pointer (block number) associated with the key to be inserted + * @keys: Array of keys to be inserted in addition to @key + * @ptrs: Array of pointers associated with @keys + * @n: Number of keys and pointers in @keys and @ptrs + * + * This function is used to insert a new entry specified by @key and @ptr, + * along with additional entries specified by @keys and @ptrs arrays, into a + * NILFS B-tree. + * It prepares the necessary changes by allocating the required blocks and any + * necessary intermediate nodes. It converts configurations from other forms of + * block mapping (the one that currently exists is direct mapping) to a B-tree. + * + * Return: 0 on success or a negative error code on failure. */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; struct nilfs_bmap_stats stats; int ret; @@ -1776,7 +1890,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, di = &dreq; ni = NULL; } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( - 1 << btree->b_inode->i_blkbits)) { + nilfs_btree_node_size(btree))) { di = &dreq; ni = &nreq; } else { @@ -1829,7 +1943,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, @@ -1855,7 +1969,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } @@ -1874,7 +1988,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } @@ -1988,9 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) - printk(KERN_CRIT "%s: key = %llu, level == %d\n", - __func__, (unsigned long long)key, level); + if (unlikely(ret == -ENOENT)) { + nilfs_crit(btree->b_inode->i_sb, + "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", + btree->b_inode->i_ino, + (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } @@ -2027,12 +2145,11 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, if (level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX) { dump_stack(); - printk(KERN_WARNING - "%s: invalid btree level: %d (key=%llu, ino=%lu, " - "blocknr=%llu)\n", - __func__, level, (unsigned long long)key, - NILFS_BMAP_I(btree)->vfs_inode.i_ino, - (unsigned long long)bh->b_blocknr); + nilfs_warn(btree->b_inode->i_sb, + "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", + level, (unsigned long long)key, + btree->b_inode->i_ino, + (unsigned long long)bh->b_blocknr); return; } @@ -2049,9 +2166,10 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { - struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; - struct pagevec pvec; + struct folio_batch fbatch; struct buffer_head *bh, *head; pgoff_t index = 0; int level, i; @@ -2061,19 +2179,19 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, level++) INIT_LIST_HEAD(&lists[level]); - pagevec_init(&pvec, 0); + folio_batch_init(&fbatch); - while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - bh = head = page_buffers(pvec.pages[i]); + while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, + PAGECACHE_TAG_DIRTY, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + bh = head = folio_buffers(fbatch.folios[i]); do { if (buffer_dirty(bh)) nilfs_btree_add_dirty_buffer(btree, lists, bh); } while ((bh = bh->b_this_page) != head); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } @@ -2103,12 +2221,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } @@ -2120,6 +2238,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, /* on-disk format */ binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } @@ -2268,7 +2387,9 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = { .bop_assign = nilfs_btree_assign, .bop_mark = nilfs_btree_mark, + .bop_seek_key = nilfs_btree_seek_key, .bop_last_key = nilfs_btree_last_key, + .bop_check_insert = NULL, .bop_check_delete = nilfs_btree_check_delete, .bop_gather_data = nilfs_btree_gather_data, @@ -2288,18 +2409,34 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_assign = nilfs_btree_assign_gc, .bop_mark = NULL, + .bop_seek_key = NULL, .bop_last_key = NULL, + .bop_check_insert = NULL, .bop_check_delete = NULL, .bop_gather_data = NULL, }; -int nilfs_btree_init(struct nilfs_bmap *bmap) +static void __nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); - return 0; +} + +int nilfs_btree_init(struct nilfs_bmap *bmap) +{ + int ret = 0; + + __nilfs_btree_init(bmap); + + if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) + ret = -EIO; + else + ret = nilfs_attach_btree_node_cache( + &NILFS_BMAP_I(bmap)->vfs_inode); + + return ret; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 22c02e35b6ef..2a220f716c91 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * btree.h - NILFS B-tree. + * NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #ifndef _NILFS_BTREE_H @@ -26,7 +13,7 @@ #include <linux/types.h> #include <linux/buffer_head.h> #include <linux/list.h> -#include <linux/nilfs2_fs.h> +#include <linux/nilfs2_ondisk.h> /* nilfs_btree_node */ #include "btnode.h" #include "bmap.h" @@ -37,6 +24,7 @@ * @bp_index: index of child node * @bp_oldreq: ptr end request for old ptr * @bp_newreq: ptr alloc request for new ptr + * @bp_ctxt: context information for changing the key of a b-tree node block * @bp_op: rebalance operation */ struct nilfs_btree_path { diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index deaa3d33a0aa..4bbdc832d7f2 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * cpfile.c - NILFS checkpoint file. + * NILFS checkpoint file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #include <linux/kernel.h> @@ -25,7 +12,6 @@ #include <linux/string.h> #include <linux/buffer_head.h> #include <linux/errno.h> -#include <linux/nilfs2_fs.h> #include "mdt.h" #include "cpfile.h" @@ -41,7 +27,8 @@ static unsigned long nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) { __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; - do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); + + tcno = div64_ul(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); return (unsigned long)tcno; } @@ -50,9 +37,17 @@ static unsigned long nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno) { __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; + return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); } +static __u64 nilfs_cpfile_first_checkpoint_in_block(const struct inode *cpfile, + unsigned long blkoff) +{ + return (__u64)nilfs_cpfile_checkpoints_per_block(cpfile) * blkoff + + 1 - NILFS_MDT(cpfile)->mi_first_entry_offset; +} + static unsigned long nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile, __u64 curr, @@ -73,54 +68,41 @@ static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile, static unsigned int nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile, struct buffer_head *bh, - void *kaddr, unsigned int n) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp; unsigned int count; + cp = kmap_local_folio(bh->b_folio, + offset_in_folio(bh->b_folio, bh->b_data)); count = le32_to_cpu(cp->cp_checkpoints_count) + n; cp->cp_checkpoints_count = cpu_to_le32(count); + kunmap_local(cp); return count; } static unsigned int nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, struct buffer_head *bh, - void *kaddr, unsigned int n) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp; unsigned int count; + cp = kmap_local_folio(bh->b_folio, + offset_in_folio(bh->b_folio, bh->b_data)); WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); count = le32_to_cpu(cp->cp_checkpoints_count) - n; cp->cp_checkpoints_count = cpu_to_le32(count); + kunmap_local(cp); return count; } -static inline struct nilfs_cpfile_header * -nilfs_cpfile_block_get_header(const struct inode *cpfile, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh); -} - -static struct nilfs_checkpoint * -nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) * - NILFS_MDT(cpfile)->mi_entry_size; -} - static void nilfs_cpfile_block_init(struct inode *cpfile, struct buffer_head *bh, - void *kaddr) + void *from) { - struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + struct nilfs_checkpoint *cp = from; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; int n = nilfs_cpfile_checkpoints_per_block(cpfile); @@ -130,10 +112,65 @@ static void nilfs_cpfile_block_init(struct inode *cpfile, } } -static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, - struct buffer_head **bhp) +/** + * nilfs_cpfile_checkpoint_offset - calculate the byte offset of a checkpoint + * entry in the folio containing it + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @bh: buffer head of block containing checkpoint indexed by @cno + * + * Return: Byte offset in the folio of the checkpoint specified by @cno. + */ +static size_t nilfs_cpfile_checkpoint_offset(const struct inode *cpfile, + __u64 cno, + struct buffer_head *bh) +{ + return offset_in_folio(bh->b_folio, bh->b_data) + + nilfs_cpfile_get_offset(cpfile, cno) * + NILFS_MDT(cpfile)->mi_entry_size; +} + +/** + * nilfs_cpfile_cp_snapshot_list_offset - calculate the byte offset of a + * checkpoint snapshot list in the folio + * containing it + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @bh: buffer head of block containing checkpoint indexed by @cno + * + * Return: Byte offset in the folio of the checkpoint snapshot list specified + * by @cno. + */ +static size_t nilfs_cpfile_cp_snapshot_list_offset(const struct inode *cpfile, + __u64 cno, + struct buffer_head *bh) +{ + return nilfs_cpfile_checkpoint_offset(cpfile, cno, bh) + + offsetof(struct nilfs_checkpoint, cp_snapshot_list); +} + +/** + * nilfs_cpfile_ch_snapshot_list_offset - calculate the byte offset of the + * snapshot list in the header + * + * Return: Byte offset in the folio of the checkpoint snapshot list + */ +static size_t nilfs_cpfile_ch_snapshot_list_offset(void) +{ + return offsetof(struct nilfs_cpfile_header, ch_snapshot_list); +} + +static int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(cpfile->i_sb, + "missing header block in checkpoint metadata"); + err = -EIO; + } + return err; } static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, @@ -146,6 +183,41 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, create, nilfs_cpfile_block_init, bhp); } +/** + * nilfs_cpfile_find_checkpoint_block - find and get a buffer on cpfile + * @cpfile: inode of cpfile + * @start_cno: start checkpoint number (inclusive) + * @end_cno: end checkpoint number (inclusive) + * @cnop: place to store the next checkpoint number + * @bhp: place to store a pointer to buffer_head struct + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - no block exists in the range. + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile, + __u64 start_cno, __u64 end_cno, + __u64 *cnop, + struct buffer_head **bhp) +{ + unsigned long start, end, blkoff; + int ret; + + if (unlikely(start_cno > end_cno)) + return -ENOENT; + + start = nilfs_cpfile_get_blkoff(cpfile, start_cno); + end = nilfs_cpfile_get_blkoff(cpfile, end_cno); + + ret = nilfs_mdt_find_block(cpfile, start, end, &blkoff, bhp); + if (!ret) + *cnop = (blkoff == start) ? start_cno : + nilfs_cpfile_first_checkpoint_in_block(cpfile, blkoff); + return ret; +} + static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, __u64 cno) { @@ -154,126 +226,232 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, } /** - * nilfs_cpfile_get_checkpoint - get a checkpoint - * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * @create: create flag - * @cpp: pointer to a checkpoint - * @bhp: pointer to a buffer head - * - * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint - * specified by @cno. A new checkpoint will be created if @cno is the current - * checkpoint number and @create is nonzero. + * nilfs_cpfile_read_checkpoint - read a checkpoint entry in cpfile + * @cpfile: checkpoint file inode + * @cno: number of checkpoint entry to read + * @root: nilfs root object + * @ifile: ifile's inode to read and attach to @root * - * Return Value: On success, 0 is returned, and the checkpoint and the - * buffer head of the buffer on which the checkpoint is located are stored in - * the place pointed by @cpp and @bhp, respectively. On error, one of the - * following negative error codes is returned. + * This function imports checkpoint information from the checkpoint file and + * stores it to the inode file given by @ifile and the nilfs root object + * given by @root. * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid checkpoint. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + */ +int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, struct inode *ifile) +{ + struct buffer_head *cp_bh; + struct nilfs_checkpoint *cp; + size_t offset; + int ret; + + if (cno < 1 || cno > nilfs_mdt_cno(cpfile)) + return -EINVAL; + + down_read(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + ret = -EINVAL; + goto out_sem; + } + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); + if (nilfs_checkpoint_invalid(cp)) { + ret = -EINVAL; + goto put_cp; + } + + ret = nilfs_read_inode_common(ifile, &cp->cp_ifile_inode); + if (unlikely(ret)) { + /* + * Since this inode is on a checkpoint entry, treat errors + * as metadata corruption. + */ + nilfs_err(cpfile->i_sb, + "ifile inode (checkpoint number=%llu) corrupted", + (unsigned long long)cno); + ret = -EIO; + goto put_cp; + } + + /* Configure the nilfs root object */ + atomic64_set(&root->inodes_count, le64_to_cpu(cp->cp_inodes_count)); + atomic64_set(&root->blocks_count, le64_to_cpu(cp->cp_blocks_count)); + root->ifile = ifile; + +put_cp: + kunmap_local(cp); + brelse(cp_bh); +out_sem: + up_read(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +/** + * nilfs_cpfile_create_checkpoint - create a checkpoint entry on cpfile + * @cpfile: checkpoint file inode + * @cno: number of checkpoint to set up * - * %-ENOENT - No such checkpoint. + * This function creates a checkpoint with the number specified by @cno on + * cpfile. If the specified checkpoint entry already exists due to a past + * failure, it will be reused without returning an error. + * In either case, the buffer of the block containing the checkpoint entry + * and the cpfile inode are made dirty for inclusion in the write log. * - * %-EINVAL - invalid checkpoint. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + * * %-EROFS - Read only filesystem */ -int nilfs_cpfile_get_checkpoint(struct inode *cpfile, - __u64 cno, - int create, - struct nilfs_checkpoint **cpp, - struct buffer_head **bhp) +int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) { struct buffer_head *header_bh, *cp_bh; struct nilfs_cpfile_header *header; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; - if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) || - (cno < nilfs_mdt_cno(cpfile) && create))) - return -EINVAL; + if (WARN_ON_ONCE(cno < 1)) + return -EIO; down_write(&NILFS_MDT(cpfile)->mi_sem); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) + if (unlikely(ret < 0)) goto out_sem; - ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh); - if (ret < 0) + + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh); + if (unlikely(ret < 0)) goto out_header; - kaddr = kmap(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { - if (!create) { - kunmap(cp_bh->b_page); - brelse(cp_bh); - ret = -ENOENT; - goto out_header; - } /* a newly-created checkpoint */ nilfs_checkpoint_clear_invalid(cp); + kunmap_local(cp); if (!nilfs_cpfile_is_in_first(cpfile, cno)) nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, - kaddr, 1); - mark_buffer_dirty(cp_bh); + 1); - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, - kaddr); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_ncheckpoints, 1); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); - nilfs_mdt_mark_dirty(cpfile); + } else { + kunmap_local(cp); } - if (cpp != NULL) - *cpp = cp; - *bhp = cp_bh; + /* Force the buffer and the inode to become dirty */ + mark_buffer_dirty(cp_bh); + brelse(cp_bh); + nilfs_mdt_mark_dirty(cpfile); - out_header: +out_header: brelse(header_bh); - out_sem: +out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; } /** - * nilfs_cpfile_put_checkpoint - put a checkpoint - * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * @bh: buffer head + * nilfs_cpfile_finalize_checkpoint - fill in a checkpoint entry in cpfile + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @root: nilfs root object + * @blkinc: number of blocks added by this checkpoint + * @ctime: checkpoint creation time + * @minor: minor checkpoint flag + * + * This function completes the checkpoint entry numbered by @cno in the + * cpfile with the data given by the arguments @root, @blkinc, @ctime, and + * @minor. * - * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint - * specified by @cno. @bh must be the buffer head which has been returned by - * a previous call to nilfs_cpfile_get_checkpoint() with @cno. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). */ -void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno, - struct buffer_head *bh) +int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, __u64 blkinc, + time64_t ctime, bool minor) { - kunmap(bh->b_page); - brelse(bh); + struct buffer_head *cp_bh; + struct nilfs_checkpoint *cp; + size_t offset; + int ret; + + if (WARN_ON_ONCE(cno < 1)) + return -EIO; + + down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + goto error; + goto out_sem; + } + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); + if (unlikely(nilfs_checkpoint_invalid(cp))) { + kunmap_local(cp); + brelse(cp_bh); + goto error; + } + + cp->cp_snapshot_list.ssl_next = 0; + cp->cp_snapshot_list.ssl_prev = 0; + cp->cp_inodes_count = cpu_to_le64(atomic64_read(&root->inodes_count)); + cp->cp_blocks_count = cpu_to_le64(atomic64_read(&root->blocks_count)); + cp->cp_nblk_inc = cpu_to_le64(blkinc); + cp->cp_create = cpu_to_le64(ctime); + cp->cp_cno = cpu_to_le64(cno); + + if (minor) + nilfs_checkpoint_set_minor(cp); + else + nilfs_checkpoint_clear_minor(cp); + + nilfs_write_inode_common(root->ifile, &cp->cp_ifile_inode); + nilfs_bmap_write(NILFS_I(root->ifile)->i_bmap, &cp->cp_ifile_inode); + + kunmap_local(cp); + brelse(cp_bh); +out_sem: + up_write(&NILFS_MDT(cpfile)->mi_sem); + return ret; + +error: + nilfs_error(cpfile->i_sb, + "checkpoint finalization failed due to metadata corruption."); + ret = -EIO; + goto out_sem; } /** * nilfs_cpfile_delete_checkpoints - delete checkpoints * @cpfile: inode of checkpoint file * @start: start checkpoint number - * @end: end checkpoint numer + * @end: end checkpoint number * * Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in * the period from @start to @end, excluding @end itself. The checkpoints * which have been already deleted are ignored. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - invalid checkpoints. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid checkpoints. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, __u64 start, @@ -284,14 +462,15 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, struct nilfs_checkpoint *cp; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cno; + size_t offset; void *kaddr; unsigned long tnicps; int ret, ncps, nicps, nss, count, i; if (unlikely(start == 0 || start > end)) { - printk(KERN_ERR "%s: invalid range of checkpoint numbers: " - "[%llu, %llu)\n", __func__, - (unsigned long long)start, (unsigned long long)end); + nilfs_err(cpfile->i_sb, + "cannot delete checkpoints: invalid range [%llu, %llu)", + (unsigned long long)start, (unsigned long long)end); return -EINVAL; } @@ -314,9 +493,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; } - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint( - cpfile, cno, cp_bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kaddr = kmap_local_folio(cp_bh->b_folio, offset); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { if (nilfs_checkpoint_snapshot(cp)) { @@ -326,43 +504,42 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, nicps++; } } - if (nicps > 0) { - tnicps += nicps; - mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_dirty(cpfile); - if (!nilfs_cpfile_is_in_first(cpfile, cno)) { - count = - nilfs_cpfile_block_sub_valid_checkpoints( - cpfile, cp_bh, kaddr, nicps); - if (count == 0) { - /* make hole */ - kunmap_atomic(kaddr); - brelse(cp_bh); - ret = - nilfs_cpfile_delete_checkpoint_block( - cpfile, cno); - if (ret == 0) - continue; - printk(KERN_ERR - "%s: cannot delete block\n", - __func__); - break; - } - } + kunmap_local(kaddr); + + if (nicps <= 0) { + brelse(cp_bh); + continue; + } + + tnicps += nicps; + mark_buffer_dirty(cp_bh); + nilfs_mdt_mark_dirty(cpfile); + if (nilfs_cpfile_is_in_first(cpfile, cno)) { + brelse(cp_bh); + continue; } - kunmap_atomic(kaddr); + count = nilfs_cpfile_block_sub_valid_checkpoints(cpfile, cp_bh, + nicps); brelse(cp_bh); + if (count) + continue; + + /* Delete the block if there are no more valid checkpoints */ + ret = nilfs_cpfile_delete_checkpoint_block(cpfile, cno); + if (unlikely(ret)) { + nilfs_err(cpfile->i_sb, + "error %d deleting checkpoint block", ret); + break; + } } if (tnicps > 0) { - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, - kaddr); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); - kunmap_atomic(kaddr); + kunmap_local(header); } brelse(header_bh); @@ -388,13 +565,15 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, } static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, - void *buf, unsigned cisz, size_t nci) + void *buf, unsigned int cisz, + size_t nci) { struct nilfs_checkpoint *cp; struct nilfs_cpinfo *ci = buf; struct buffer_head *bh; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; + size_t offset; void *kaddr; int n, ret; int ncps, i; @@ -403,17 +582,18 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, return -ENOENT; /* checkpoint number 0 is invalid */ down_read(&NILFS_MDT(cpfile)->mi_sem); - for (n = 0; cno < cur_cno && n < nci; cno += ncps) { - ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); - ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); + for (n = 0; n < nci; cno += ncps) { + ret = nilfs_cpfile_find_checkpoint_block( + cpfile, cno, cur_cno - 1, &cno, &bh); if (ret < 0) { - if (ret != -ENOENT) - goto out; - continue; /* skip hole */ + if (likely(ret == -ENOENT)) + break; + goto out; } + ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); - kaddr = kmap_atomic(bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh); + cp = kaddr = kmap_local_folio(bh->b_folio, offset); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { if (!nilfs_checkpoint_invalid(cp)) { nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, @@ -422,7 +602,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, n++; } } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); } @@ -438,7 +618,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, } static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, - void *buf, unsigned cisz, size_t nci) + void *buf, unsigned int cisz, + size_t nci) { struct buffer_head *bh; struct nilfs_cpfile_header *header; @@ -446,7 +627,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, struct nilfs_cpinfo *ci = buf; __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; - void *kaddr; + size_t offset; int n = 0, ret; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -455,10 +636,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + header = kmap_local_folio(bh->b_folio, 0); curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(bh); if (curr == 0) { ret = 0; @@ -476,9 +656,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = 0; /* No snapshots (started from a hole block) */ goto out; } - kaddr = kmap_atomic(bh->b_page); + offset = nilfs_cpfile_checkpoint_offset(cpfile, curr, bh); + cp = kmap_local_folio(bh->b_folio, offset); while (n < nci) { - cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); curr = ~(__u64)0; /* Terminator */ if (unlikely(nilfs_checkpoint_invalid(cp) || !nilfs_checkpoint_snapshot(cp))) @@ -490,9 +670,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, if (next == 0) break; /* reach end of the snapshot list */ + kunmap_local(cp); next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { - kunmap_atomic(kaddr); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); @@ -500,12 +680,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, WARN_ON(ret == -ENOENT); goto out; } - kaddr = kmap_atomic(bh->b_page); } + offset = nilfs_cpfile_checkpoint_offset(cpfile, next, bh); + cp = kmap_local_folio(bh->b_folio, offset); curr = next; curr_blkoff = next_blkoff; } - kunmap_atomic(kaddr); + kunmap_local(cp); brelse(bh); *cnop = curr; ret = n; @@ -516,15 +697,33 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, } /** - * nilfs_cpfile_get_cpinfo - - * @cpfile: - * @cno: - * @ci: - * @nci: + * nilfs_cpfile_get_cpinfo - get information on checkpoints + * @cpfile: checkpoint file inode + * @cnop: place to pass a starting checkpoint number and receive a + * checkpoint number to continue the search + * @mode: mode of checkpoints that the caller wants to retrieve + * @buf: buffer for storing checkpoints' information + * @cisz: byte size of one checkpoint info item in array + * @nci: number of checkpoint info items to retrieve + * + * nilfs_cpfile_get_cpinfo() searches for checkpoints in @mode state + * starting from the checkpoint number stored in @cnop, and stores + * information about found checkpoints in @buf. + * The buffer pointed to by @buf must be large enough to store information + * for @nci checkpoints. If at least one checkpoint information is + * successfully retrieved, @cnop is updated to point to the checkpoint + * number to continue searching. + * + * Return: Count of checkpoint info items stored in the output buffer on + * success, or one of the following negative error codes on failure: + * * %-EINVAL - Invalid checkpoint mode. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Invalid checkpoint number specified. */ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, - void *buf, unsigned cisz, size_t nci) + void *buf, unsigned int cisz, size_t nci) { switch (mode) { case NILFS_CHECKPOINT: @@ -537,9 +736,16 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, } /** - * nilfs_cpfile_delete_checkpoint - - * @cpfile: - * @cno: + * nilfs_cpfile_delete_checkpoint - delete a checkpoint + * @cpfile: checkpoint file inode + * @cno: checkpoint number to delete + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Checkpoint in use (snapshot specified). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid checkpoint found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { @@ -558,26 +764,6 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); } -static struct nilfs_snapshot_list * -nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile, - __u64 cno, - struct buffer_head *bh, - void *kaddr) -{ - struct nilfs_cpfile_header *header; - struct nilfs_checkpoint *cp; - struct nilfs_snapshot_list *list; - - if (cno != 0) { - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); - list = &cp->cp_snapshot_list; - } else { - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); - list = &header->ch_snapshot_list; - } - return list; -} - static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) { struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh; @@ -586,94 +772,103 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) struct nilfs_snapshot_list *list; __u64 curr, prev; unsigned long curr_blkoff, prev_blkoff; - void *kaddr; + size_t offset, curr_list_offset, prev_list_offset; int ret; if (cno == 0) return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (unlikely(ret < 0)) + goto out_sem; + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) - goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + goto out_header; + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } if (nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } - kunmap_atomic(kaddr); - - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) - goto out_cp; - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + kunmap_local(cp); + + /* + * Find the last snapshot before the checkpoint being changed to + * snapshot mode by going backwards through the snapshot list. + * Set "prev" to its checkpoint number, or 0 if not found. + */ + header = kmap_local_folio(header_bh->b_folio, 0); list = &header->ch_snapshot_list; curr_bh = header_bh; get_bh(curr_bh); curr = 0; curr_blkoff = 0; + curr_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); prev = le64_to_cpu(list->ssl_prev); while (prev > cno) { prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); curr = prev; + kunmap_local(list); if (curr_blkoff != prev_blkoff) { - kunmap_atomic(kaddr); brelse(curr_bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &curr_bh); - if (ret < 0) - goto out_header; - kaddr = kmap_atomic(curr_bh->b_page); + if (unlikely(ret < 0)) + goto out_cp; } + curr_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, curr, curr_bh); + list = kmap_local_folio(curr_bh->b_folio, curr_list_offset); curr_blkoff = prev_blkoff; - cp = nilfs_cpfile_block_get_checkpoint( - cpfile, curr, curr_bh, kaddr); - list = &cp->cp_snapshot_list; prev = le64_to_cpu(list->ssl_prev); } - kunmap_atomic(kaddr); + kunmap_local(list); if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, &prev_bh); if (ret < 0) goto out_curr; + + prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, prev, prev_bh); } else { prev_bh = header_bh; get_bh(prev_bh); + prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } - kaddr = kmap_atomic(curr_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, curr, curr_bh, kaddr); + /* Update the list entry for the next snapshot */ + list = kmap_local_folio(curr_bh->b_folio, curr_list_offset); list->ssl_prev = cpu_to_le64(cno); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + /* Update the checkpoint being changed to a snapshot */ + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); nilfs_checkpoint_set_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(cp); - kaddr = kmap_atomic(prev_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, prev, prev_bh, kaddr); + /* Update the list entry for the previous snapshot */ + list = kmap_local_folio(prev_bh->b_folio, prev_list_offset); list->ssl_next = cpu_to_le64(cno); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* Update the statistics in the header */ + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_nsnapshots, 1); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(prev_bh); mark_buffer_dirty(curr_bh); @@ -686,12 +881,12 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) out_curr: brelse(curr_bh); - out_header: - brelse(header_bh); - out_cp: brelse(cp_bh); + out_header: + brelse(header_bh); + out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; @@ -704,79 +899,87 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) struct nilfs_checkpoint *cp; struct nilfs_snapshot_list *list; __u64 next, prev; - void *kaddr; + size_t offset, next_list_offset, prev_list_offset; int ret; if (cno == 0) return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (unlikely(ret < 0)) + goto out_sem; + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) - goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + goto out_header; + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh); + cp = kmap_local_folio(cp_bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } if (!nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr); + kunmap_local(cp); goto out_cp; } list = &cp->cp_snapshot_list; next = le64_to_cpu(list->ssl_next); prev = le64_to_cpu(list->ssl_prev); - kunmap_atomic(kaddr); + kunmap_local(cp); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) - goto out_cp; if (next != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &next_bh); if (ret < 0) - goto out_header; + goto out_cp; + + next_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, next, next_bh); } else { next_bh = header_bh; get_bh(next_bh); + next_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, &prev_bh); if (ret < 0) goto out_next; + + prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset( + cpfile, prev, prev_bh); } else { prev_bh = header_bh; get_bh(prev_bh); + prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset(); } - kaddr = kmap_atomic(next_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, next, next_bh, kaddr); + /* Update the list entry for the next snapshot */ + list = kmap_local_folio(next_bh->b_folio, next_list_offset); list->ssl_prev = cpu_to_le64(prev); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(prev_bh->b_page); - list = nilfs_cpfile_block_get_snapshot_list( - cpfile, prev, prev_bh, kaddr); + /* Update the list entry for the previous snapshot */ + list = kmap_local_folio(prev_bh->b_folio, prev_list_offset); list->ssl_next = cpu_to_le64(next); - kunmap_atomic(kaddr); + kunmap_local(list); - kaddr = kmap_atomic(cp_bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + /* Update the snapshot being changed back to a plain checkpoint */ + cp = kmap_local_folio(cp_bh->b_folio, offset); cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); nilfs_checkpoint_clear_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(cp); - kaddr = kmap_atomic(header_bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + /* Update the statistics in the header */ + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->ch_nsnapshots, -1); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(next_bh); mark_buffer_dirty(prev_bh); @@ -789,43 +992,39 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) out_next: brelse(next_bh); - out_header: - brelse(header_bh); - out_cp: brelse(cp_bh); + out_header: + brelse(header_bh); + out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; } /** - * nilfs_cpfile_is_snapshot - + * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * - * Description: - * - * Return Value: On success, 1 is returned if the checkpoint specified by - * @cno is a snapshot, or 0 if not. On error, one of the following negative - * error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * @cno: checkpoint number * - * %-ENOENT - No such checkpoint. + * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or + * one of the following negative error codes on failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) { struct buffer_head *bh; struct nilfs_checkpoint *cp; - void *kaddr; + size_t offset; int ret; - /* CP number is invalid if it's zero or larger than the - largest exist one.*/ + /* + * CP number is invalid if it's zero or larger than the + * largest existing one. + */ if (cno == 0 || cno >= nilfs_mdt_cno(cpfile)) return -ENOENT; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -833,13 +1032,14 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page); - cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + + offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh); + cp = kmap_local_folio(bh->b_folio, offset); if (nilfs_checkpoint_invalid(cp)) ret = -ENOENT; else ret = nilfs_checkpoint_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(cp); brelse(bh); out: @@ -851,19 +1051,16 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) * nilfs_cpfile_change_cpmode - change checkpoint mode * @cpfile: inode of checkpoint file * @cno: checkpoint number - * @status: mode of checkpoint + * @mode: mode of checkpoint * * Description: nilfs_change_cpmode() changes the mode of the checkpoint * specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No such checkpoint. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) { @@ -892,23 +1089,20 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) /** * nilfs_cpfile_get_stat - get checkpoint statistics * @cpfile: inode of checkpoint file - * @stat: pointer to a structure of checkpoint statistics + * @cpstat: pointer to a structure of checkpoint statistics * * Description: nilfs_cpfile_get_stat() returns information about checkpoints. + * The checkpoint statistics are stored in the location pointed to by @cpstat. * - * Return Value: On success, 0 is returned, and checkpoints information is - * stored in the place pointed by @stat. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) { struct buffer_head *bh; struct nilfs_cpfile_header *header; - void *kaddr; int ret; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -916,12 +1110,11 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page); - header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + header = kmap_local_folio(bh->b_folio, 0); cpstat->cs_cno = nilfs_mdt_cno(cpfile); cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(bh); out_sem: @@ -935,6 +1128,8 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) * @cpsize: size of a checkpoint entry * @raw_inode: on-disk cpfile inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, struct nilfs_inode *raw_inode, struct inode **inodep) @@ -942,10 +1137,18 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, struct inode *cpfile; int err; + if (cpsize > sb->s_blocksize) { + nilfs_err(sb, "too large checkpoint size: %zu bytes", cpsize); + return -EINVAL; + } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) { + nilfs_err(sb, "too small checkpoint size: %zu bytes", cpsize); + return -EINVAL; + } + cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); if (unlikely(!cpfile)) return -ENOMEM; - if (!(cpfile->i_state & I_NEW)) + if (!(inode_state_read_once(cpfile) & I_NEW)) goto out; err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0); diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index a242b9a314f9..f5b1d59289eb 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * cpfile.h - NILFS checkpoint file. + * NILFS checkpoint file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #ifndef _NILFS_CPFILE_H @@ -25,20 +12,23 @@ #include <linux/fs.h> #include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> +#include <linux/nilfs2_api.h> /* nilfs_cpstat */ +#include <linux/nilfs2_ondisk.h> /* nilfs_inode, nilfs_checkpoint */ -int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, - struct nilfs_checkpoint **, - struct buffer_head **); -void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *); +int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, struct inode *ifile); +int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno); +int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, __u64 blkinc, + time64_t ctime, bool minor); int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64); int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); int nilfs_cpfile_is_snapshot(struct inode *, __u64); int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); -ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, - size_t); +ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, + unsigned int, size_t); int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, struct nilfs_inode *raw_inode, struct inode **inodep); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index fa0f80308c2d..674380837ab9 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * dat.c - NILFS disk address translation. + * NILFS disk address translation. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #include <linux/types.h> @@ -53,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { - return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, - create, &req->pr_entry_bh); + int ret; + + ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); + if (unlikely(ret == -ENOENT)) { + nilfs_err(dat->i_sb, + "DAT doesn't have a block to manage vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + /* + * Return internal code -EINVAL to notify bmap layer of + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } static void nilfs_dat_commit_entry(struct inode *dat, @@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) { int ret; - ret = nilfs_palloc_prepare_alloc_entry(dat, req); + ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); if (ret < 0) return ret; @@ -89,15 +89,15 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MAX); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr); + kunmap_local(entry); nilfs_palloc_commit_alloc_entry(dat, req); nilfs_dat_commit_entry(dat, req); @@ -113,41 +113,44 @@ static void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MIN); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr); + kunmap_local(entry); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { - int ret; - - ret = nilfs_dat_prepare_entry(dat, req, 0); - WARN_ON(ret == -ENOENT); - return ret; + return nilfs_dat_prepare_entry(dat, req, 0); } void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, sector_t blocknr) { struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); nilfs_dat_commit_entry(dat, req); } @@ -157,21 +160,19 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; __u64 start; sector_t blocknr; - void *kaddr; + size_t offset; int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - WARN_ON(ret == -ENOENT); + if (ret < 0) return ret; - } - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); if (blocknr == 0) { ret = nilfs_palloc_prepare_free_entry(dat, req); @@ -180,6 +181,15 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) return ret; } } + if (unlikely(start > nilfs_mdt_cno(dat))) { + nilfs_err(dat->i_sb, + "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)", + (unsigned long long)req->pr_entry_nr, + (unsigned long long)start, + (unsigned long long)nilfs_mdt_cno(dat)); + nilfs_dat_abort_entry(dat, req); + return -EINVAL; + } return 0; } @@ -190,11 +200,11 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, struct nilfs_dat_entry *entry; __u64 start, end; sector_t blocknr; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); end = start = le64_to_cpu(entry->de_start); if (!dead) { end = nilfs_mdt_cno(dat); @@ -202,7 +212,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); if (blocknr == 0) nilfs_dat_commit_free(dat, req); @@ -215,14 +225,14 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; __u64 start; sector_t blocknr; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, - req->pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, + req->pr_entry_bh); + entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); if (start == nilfs_mdt_cno(dat) && blocknr == 0) nilfs_palloc_abort_free_entry(dat, req); @@ -261,18 +271,16 @@ void nilfs_dat_abort_update(struct inode *dat, } /** - * nilfs_dat_mark_dirty - - * @dat: DAT file inode + * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified + * virtual block address entry as dirty + * @dat: DAT file inode * @vblocknr: virtual block number * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid DAT entry (internal code). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) { @@ -295,14 +303,11 @@ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) * Description: nilfs_dat_freev() frees the virtual block numbers specified by * @vblocknrs and @nitems. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The virtual block number have not been allocated. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The virtual block number have not been allocated. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) { @@ -318,18 +323,16 @@ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) * Description: nilfs_dat_move() changes the block number associated with * @vblocknr to @blocknr. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) { struct buffer_head *entry_bh; struct nilfs_dat_entry *entry; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); @@ -352,20 +355,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) } } - kaddr = kmap_atomic(entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); + entry = kmap_local_folio(entry_bh->b_folio, offset); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { - printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, - (unsigned long long)vblocknr, - (unsigned long long)le64_to_cpu(entry->de_start), - (unsigned long long)le64_to_cpu(entry->de_end)); - kunmap_atomic(kaddr); + nilfs_crit(dat->i_sb, + "%s: invalid vblocknr = %llu, [%llu, %llu)", + __func__, (unsigned long long)vblocknr, + (unsigned long long)le64_to_cpu(entry->de_start), + (unsigned long long)le64_to_cpu(entry->de_end)); + kunmap_local(entry); brelse(entry_bh); return -EINVAL; } WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr); + kunmap_local(entry); mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); @@ -382,24 +386,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) * @blocknrp: pointer to a block number * * Description: nilfs_dat_translate() maps the virtual block number @vblocknr - * to the corresponding block number. - * - * Return Value: On success, 0 is returned and the block number associated - * with @vblocknr is stored in the place pointed by @blocknrp. On error, one - * of the following negative error codes is returned. + * to the corresponding block number. The block number associated with + * @vblocknr is stored in the place pointed to by @blocknrp. * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - A block number associated with @vblocknr does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - A block number associated with @vblocknr does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) { struct buffer_head *entry_bh, *bh; struct nilfs_dat_entry *entry; sector_t blocknr; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); @@ -415,8 +416,8 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) } } - kaddr = kmap_atomic(entry_bh->b_page); - entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); + entry = kmap_local_folio(entry_bh->b_folio, offset); blocknr = le64_to_cpu(entry->de_blocknr); if (blocknr == 0) { ret = -ENOENT; @@ -425,20 +426,21 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) *blocknrp = blocknr; out: - kunmap_atomic(kaddr); + kunmap_local(entry); brelse(entry_bh); return ret; } -ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, +ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, size_t nvi) { struct buffer_head *entry_bh; - struct nilfs_dat_entry *entry; + struct nilfs_dat_entry *entry, *first_entry; struct nilfs_vinfo *vinfo = buf; __u64 first, last; - void *kaddr; + size_t offset; unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; + unsigned int entry_size = NILFS_MDT(dat)->mi_entry_size; int i, j, n, ret; for (i = 0; i < nvi; i += n) { @@ -446,23 +448,28 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page); - /* last virtual block number in this block */ + first = vinfo->vi_vblocknr; - do_div(first, entries_per_block); + first = div64_ul(first, entries_per_block); first *= entries_per_block; + /* first virtual block number in this block */ + last = first + entries_per_block - 1; + /* last virtual block number in this block */ + + offset = nilfs_palloc_entry_offset(dat, first, entry_bh); + first_entry = kmap_local_folio(entry_bh->b_folio, offset); for (j = i, n = 0; j < nvi && vinfo->vi_vblocknr >= first && vinfo->vi_vblocknr <= last; j++, n++, vinfo = (void *)vinfo + visz) { - entry = nilfs_palloc_block_get_entry( - dat, vinfo->vi_vblocknr, entry_bh, kaddr); + entry = (void *)first_entry + + (vinfo->vi_vblocknr - first) * entry_size; vinfo->vi_start = le64_to_cpu(entry->de_start); vinfo->vi_end = le64_to_cpu(entry->de_end); vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } - kunmap_atomic(kaddr); + kunmap_local(first_entry); brelse(entry_bh); } @@ -475,6 +482,8 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, * @entry_size: size of a dat entry * @raw_inode: on-disk dat inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_dat_read(struct super_block *sb, size_t entry_size, struct nilfs_inode *raw_inode, struct inode **inodep) @@ -484,10 +493,20 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, struct nilfs_dat_info *di; int err; + if (entry_size > sb->s_blocksize) { + nilfs_err(sb, "too large DAT entry size: %zu bytes", + entry_size); + return -EINVAL; + } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { + nilfs_err(sb, "too small DAT entry size: %zu bytes", + entry_size); + return -EINVAL; + } + dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); if (unlikely(!dat)) return -ENOMEM; - if (!(dat->i_state & I_NEW)) + if (!(inode_state_read_once(dat) & I_NEW)) goto out; err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); @@ -501,7 +520,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, di = NILFS_DAT_I(dat); lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); nilfs_palloc_setup_cache(dat, &di->palloc_cache); - nilfs_mdt_setup_shadow_map(dat, &di->shadow); + err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); + if (err) + goto failed; err = nilfs_read_inode_common(dat, raw_inode); if (err) diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index cbd8e9732503..468c82d26183 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * dat.h - NILFS disk address translation. + * NILFS disk address translation. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #ifndef _NILFS_DAT_H @@ -26,6 +13,7 @@ #include <linux/types.h> #include <linux/buffer_head.h> #include <linux/fs.h> +#include <linux/nilfs2_ondisk.h> /* nilfs_inode, nilfs_checkpoint */ struct nilfs_palloc_req; @@ -51,7 +39,7 @@ void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *, int nilfs_dat_mark_dirty(struct inode *, __u64); int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); -ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); +ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned int, size_t); int nilfs_dat_read(struct super_block *sb, size_t entry_size, struct nilfs_inode *raw_inode, struct inode **inodep); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 197a63e9d102..6ca3d74be1e1 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * dir.c - NILFS directory entry operations + * NILFS directory entry operations * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net> + * Modified for NILFS by Amagai Yoshiji. */ /* * linux/fs/ext2/dir.c @@ -46,80 +33,91 @@ #include "nilfs.h" #include "page.h" -/* - * nilfs uses block-sized chunks. Arguably, sector-sized ones would be - * more robust, but we have what we have - */ -static inline unsigned nilfs_chunk_size(struct inode *inode) +static inline unsigned int nilfs_rec_len_from_disk(__le16 dlen) { - return inode->i_sb->s_blocksize; + unsigned int len = le16_to_cpu(dlen); + +#if (PAGE_SIZE >= 65536) + if (len == NILFS_MAX_REC_LEN) + return 1 << 16; +#endif + return len; } -static inline void nilfs_put_page(struct page *page) +static inline __le16 nilfs_rec_len_to_disk(unsigned int len) { - kunmap(page); - page_cache_release(page); +#if (PAGE_SIZE >= 65536) + if (len == (1 << 16)) + return cpu_to_le16(NILFS_MAX_REC_LEN); + + BUG_ON(len > (1 << 16)); +#endif + return cpu_to_le16(len); } -static inline unsigned long dir_pages(struct inode *inode) +/* + * nilfs uses block-sized chunks. Arguably, sector-sized ones would be + * more robust, but we have what we have + */ +static inline unsigned int nilfs_chunk_size(struct inode *inode) { - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; + return inode->i_sb->s_blocksize; } /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. */ -static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) +static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr) { - unsigned last_byte = inode->i_size; + u64 last_byte = inode->i_size; - last_byte -= page_nr << PAGE_CACHE_SHIFT; - if (last_byte > PAGE_CACHE_SIZE) - last_byte = PAGE_CACHE_SIZE; + last_byte -= page_nr << PAGE_SHIFT; + if (last_byte > PAGE_SIZE) + last_byte = PAGE_SIZE; return last_byte; } -static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to) +static int nilfs_prepare_chunk(struct folio *folio, unsigned int from, + unsigned int to) { - loff_t pos = page_offset(page) + from; - return __block_write_begin(page, pos, to - from, nilfs_get_block); + loff_t pos = folio_pos(folio) + from; + + return __block_write_begin(folio, pos, to - from, nilfs_get_block); } -static void nilfs_commit_chunk(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) +static void nilfs_commit_chunk(struct folio *folio, + struct address_space *mapping, size_t from, size_t to) { struct inode *dir = mapping->host; - loff_t pos = page_offset(page) + from; - unsigned len = to - from; - unsigned nr_dirty, copied; + loff_t pos = folio_pos(folio) + from; + size_t copied, len = to - from; + unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(page, from, to); - copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); + nr_dirty = nilfs_page_count_clean_buffers(folio, from, to); + copied = block_write_end(pos, len, len, folio); if (pos + copied > dir->i_size) i_size_write(dir, pos + copied); if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); err = nilfs_set_file_dirty(dir, nr_dirty); WARN_ON(err); /* do not happen */ - unlock_page(page); + folio_unlock(folio); } -static void nilfs_check_page(struct page *page) +static bool nilfs_check_folio(struct folio *folio, char *kaddr) { - struct inode *dir = page->mapping->host; + struct inode *dir = folio->mapping->host; struct super_block *sb = dir->i_sb; - unsigned chunk_size = nilfs_chunk_size(dir); - char *kaddr = page_address(page); - unsigned offs, rec_len; - unsigned limit = PAGE_CACHE_SIZE; + unsigned int chunk_size = nilfs_chunk_size(dir); + size_t offs, rec_len; + size_t limit = folio_size(folio); struct nilfs_dir_entry *p; char *error; - if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { - limit = dir->i_size & ~PAGE_CACHE_MASK; + if (dir->i_size < folio_pos(folio) + limit) { + limit = dir->i_size - folio_pos(folio); if (limit & (chunk_size - 1)) goto Ebadsize; if (!limit) @@ -137,20 +135,22 @@ static void nilfs_check_page(struct page *page) goto Enamelen; if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) goto Espan; + if (unlikely(p->inode && + NILFS_PRIVATE_INODE(le64_to_cpu(p->inode)))) + goto Einumber; } if (offs != limit) goto Eend; out: - SetPageChecked(page); - return; + folio_set_checked(folio); + return true; /* Too bad, we had an error */ Ebadsize: - nilfs_error(sb, "nilfs_check_page", + nilfs_error(sb, "size of directory #%lu is not a multiple of chunk size", - dir->i_ino - ); + dir->i_ino); goto fail; Eshort: error = "rec_len is smaller than minimal"; @@ -163,41 +163,47 @@ Enamelen: goto bad_entry; Espan: error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "disallowed inode number"; bad_entry: - nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - " - "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, - (unsigned long) le64_to_cpu(p->inode), + nilfs_error(sb, + "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d", + dir->i_ino, error, (folio->index << PAGE_SHIFT) + offs, + (unsigned long)le64_to_cpu(p->inode), rec_len, p->name_len); goto fail; Eend: p = (struct nilfs_dir_entry *)(kaddr + offs); - nilfs_error(sb, "nilfs_check_page", - "entry in directory #%lu spans the page boundary" - "offset=%lu, inode=%lu", - dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, - (unsigned long) le64_to_cpu(p->inode)); + nilfs_error(sb, + "entry in directory #%lu spans the page boundary offset=%lu, inode=%lu", + dir->i_ino, (folio->index << PAGE_SHIFT) + offs, + (unsigned long)le64_to_cpu(p->inode)); fail: - SetPageChecked(page); - SetPageError(page); + return false; } -static struct page *nilfs_get_page(struct inode *dir, unsigned long n) +static void *nilfs_get_folio(struct inode *dir, unsigned long n, + struct folio **foliop) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_mapping_page(mapping, n, NULL); + struct folio *folio = read_mapping_folio(mapping, n, NULL); + void *kaddr; + + if (IS_ERR(folio)) + return folio; - if (!IS_ERR(page)) { - kmap(page); - if (!PageChecked(page)) - nilfs_check_page(page); - if (PageError(page)) + kaddr = kmap_local_folio(folio, 0); + if (unlikely(!folio_test_checked(folio))) { + if (!nilfs_check_folio(folio, kaddr)) goto fail; } - return page; + + *foliop = folio; + return kaddr; fail: - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return ERR_PTR(-EIO); } @@ -225,46 +231,14 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) nilfs_rec_len_from_disk(p->rec_len)); } -static unsigned char -nilfs_filetype_table[NILFS_FT_MAX] = { - [NILFS_FT_UNKNOWN] = DT_UNKNOWN, - [NILFS_FT_REG_FILE] = DT_REG, - [NILFS_FT_DIR] = DT_DIR, - [NILFS_FT_CHRDEV] = DT_CHR, - [NILFS_FT_BLKDEV] = DT_BLK, - [NILFS_FT_FIFO] = DT_FIFO, - [NILFS_FT_SOCK] = DT_SOCK, - [NILFS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, -}; - -static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) -{ - umode_t mode = inode->i_mode; - - de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - static int nilfs_readdir(struct file *file, struct dir_context *ctx) { loff_t pos = ctx->pos; struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; - unsigned int offset = pos & ~PAGE_CACHE_MASK; - unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned int offset = pos & ~PAGE_MASK; + unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); -/* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) return 0; @@ -272,102 +246,95 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; struct nilfs_dir_entry *de; - struct page *page = nilfs_get_page(inode, n); + struct folio *folio; - if (IS_ERR(page)) { - nilfs_error(sb, __func__, "bad page in #%lu", - inode->i_ino); - ctx->pos += PAGE_CACHE_SIZE - offset; + kaddr = nilfs_get_folio(inode, n, &folio); + if (IS_ERR(kaddr)) { + nilfs_error(sb, "bad page in #%lu", inode->i_ino); + ctx->pos += PAGE_SIZE - offset; return -EIO; } - kaddr = page_address(page); de = (struct nilfs_dir_entry *)(kaddr + offset); limit = kaddr + nilfs_last_byte(inode, n) - NILFS_DIR_REC_LEN(1); for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) { if (de->rec_len == 0) { - nilfs_error(sb, __func__, - "zero-length directory entry"); - nilfs_put_page(page); + nilfs_error(sb, "zero-length directory entry"); + folio_release_kmap(folio, kaddr); return -EIO; } if (de->inode) { unsigned char t; - if (de->file_type < NILFS_FT_MAX) - t = nilfs_filetype_table[de->file_type]; - else - t = DT_UNKNOWN; + t = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), t)) { - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return 0; } } ctx->pos += nilfs_rec_len_from_disk(de->rec_len); } - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); } return 0; } /* - * nilfs_find_entry() + * nilfs_find_entry() * - * finds an entry in the specified directory with the wanted name. It - * returns the page in which the entry was found, and the entry itself - * (as a parameter - res_dir). Page is returned mapped and unlocked. - * Entry is guaranteed to be valid. + * Finds an entry in the specified directory with the wanted name. It + * returns the folio in which the entry was found, and the entry itself. + * The folio is mapped and unlocked. When the caller is finished with + * the entry, it should call folio_release_kmap(). + * + * On failure, returns an error pointer and the caller should ignore foliop. */ -struct nilfs_dir_entry * -nilfs_find_entry(struct inode *dir, const struct qstr *qstr, - struct page **res_page) +struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, + const struct qstr *qstr, struct folio **foliop) { const unsigned char *name = qstr->name; int namelen = qstr->len; - unsigned reclen = NILFS_DIR_REC_LEN(namelen); + unsigned int reclen = NILFS_DIR_REC_LEN(namelen); unsigned long start, n; unsigned long npages = dir_pages(dir); - struct page *page = NULL; struct nilfs_inode_info *ei = NILFS_I(dir); struct nilfs_dir_entry *de; if (npages == 0) goto out; - /* OFFSET_CACHE */ - *res_page = NULL; - start = ei->i_dir_start_lookup; if (start >= npages) start = 0; n = start; do { - char *kaddr; - page = nilfs_get_page(dir, n); - if (!IS_ERR(page)) { - kaddr = page_address(page); - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, __func__, - "zero-length directory entry"); - nilfs_put_page(page); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + char *kaddr = nilfs_get_folio(dir, n, foliop); + + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - nilfs_put_page(page); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; - /* next page is past the blocks we've got */ - if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { - nilfs_error(dir->i_sb, __func__, + /* next folio is past the blocks we've got */ + if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) { + nilfs_error(dir->i_sb, "dir %lu size %lld exceeds block count %llu", dir->i_ino, dir->i_size, (unsigned long long)dir->i_blocks); @@ -375,59 +342,83 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: - *res_page = page; ei->i_dir_start_lookup = n; return de; } -struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) +struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) { - struct page *page = nilfs_get_page(dir, 0); - struct nilfs_dir_entry *de = NULL; + struct folio *folio; + struct nilfs_dir_entry *de, *next_de; + size_t limit; + char *msg; + + de = nilfs_get_folio(dir, 0, &folio); + if (IS_ERR(de)) + return NULL; + + limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */ + if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino || + !nilfs_match(1, ".", de))) { + msg = "missing '.'"; + goto fail; + } - if (!IS_ERR(page)) { - de = nilfs_next_entry( - (struct nilfs_dir_entry *)page_address(page)); - *p = page; + next_de = nilfs_next_entry(de); + /* + * If "next_de" has not reached the end of the chunk, there is + * at least one more record. Check whether it matches "..". + */ + if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) || + !nilfs_match(2, "..", next_de))) { + msg = "missing '..'"; + goto fail; } - return de; + *foliop = folio; + return next_de; + +fail: + nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); + folio_release_kmap(folio, de); + return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; - struct page *page; + struct folio *folio; - de = nilfs_find_entry(dir, qstr, &page); - if (de) { - res = le64_to_cpu(de->inode); - kunmap(page); - page_cache_release(page); - } - return res; + de = nilfs_find_entry(dir, qstr, &folio); + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } -/* Releases the page */ -void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, - struct page *page, struct inode *inode) +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct folio *folio, struct inode *inode) { - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + nilfs_rec_len_from_disk(de->rec_len); - struct address_space *mapping = page->mapping; + size_t from = offset_in_folio(folio, de); + size_t to = from + nilfs_rec_len_from_disk(de->rec_len); + struct address_space *mapping = folio->mapping; int err; - lock_page(page); - err = nilfs_prepare_chunk(page, from, to); - BUG_ON(err); + folio_lock(folio); + err = nilfs_prepare_chunk(folio, from, to); + if (unlikely(err)) { + folio_unlock(folio); + return err; + } de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); - nilfs_commit_chunk(page, mapping, from, to); - nilfs_put_page(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; + de->file_type = fs_umode_to_ftype(inode->i_mode); + nilfs_commit_chunk(folio, mapping, from, to); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); + return 0; } /* @@ -435,37 +426,34 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, */ int nilfs_add_link(struct dentry *dentry, struct inode *inode) { - struct inode *dir = dentry->d_parent->d_inode; + struct inode *dir = d_inode(dentry->d_parent); const unsigned char *name = dentry->d_name.name; int namelen = dentry->d_name.len; - unsigned chunk_size = nilfs_chunk_size(dir); - unsigned reclen = NILFS_DIR_REC_LEN(namelen); + unsigned int chunk_size = nilfs_chunk_size(dir); + unsigned int reclen = NILFS_DIR_REC_LEN(namelen); unsigned short rec_len, name_len; - struct page *page = NULL; + struct folio *folio = NULL; struct nilfs_dir_entry *de; unsigned long npages = dir_pages(dir); unsigned long n; - char *kaddr; - unsigned from, to; + size_t from, to; int err; /* * We take care of directory expansion in the same loop. - * This code plays outside i_size, so it locks the page + * This code plays outside i_size, so it locks the folio * to protect that region. */ for (n = 0; n <= npages; n++) { + char *kaddr = nilfs_get_folio(dir, n, &folio); char *dir_end; - page = nilfs_get_page(dir, n); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto out; - lock_page(page); - kaddr = page_address(page); + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); + folio_lock(folio); dir_end = kaddr + nilfs_last_byte(dir, n); de = (struct nilfs_dir_entry *)kaddr; - kaddr += PAGE_CACHE_SIZE - reclen; + kaddr += folio_size(folio) - reclen; while ((char *)de <= kaddr) { if ((char *)de == dir_end) { /* We hit i_size */ @@ -476,7 +464,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) goto got_it; } if (de->rec_len == 0) { - nilfs_error(dir->i_sb, __func__, + nilfs_error(dir->i_sb, "zero-length directory entry"); err = -EIO; goto out_unlock; @@ -492,16 +480,16 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) goto got_it; de = (struct nilfs_dir_entry *)((char *)de + rec_len); } - unlock_page(page); - nilfs_put_page(page); + folio_unlock(folio); + folio_release_kmap(folio, kaddr); } BUG(); return -EINVAL; got_it: - from = (char *)de - (char *)page_address(page); + from = offset_in_folio(folio, de); to = from + rec_len; - err = nilfs_prepare_chunk(page, from, to); + err = nilfs_prepare_chunk(folio, from, to); if (err) goto out_unlock; if (de->inode) { @@ -515,39 +503,39 @@ got_it: de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); - nilfs_commit_chunk(page, page->mapping, from, to); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; + de->file_type = fs_umode_to_ftype(inode->i_mode); + nilfs_commit_chunk(folio, folio->mapping, from, to); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: - nilfs_put_page(page); -out: + folio_release_kmap(folio, de); return err; out_unlock: - unlock_page(page); + folio_unlock(folio); goto out_put; } /* * nilfs_delete_entry deletes a directory entry by merging it with the - * previous entry. Page is up-to-date. Releases the page. + * previous entry. Folio is up-to-date. */ -int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) +int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; - char *kaddr = page_address(page); - unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); - unsigned to = ((char *)dir - kaddr) + - nilfs_rec_len_from_disk(dir->rec_len); - struct nilfs_dir_entry *pde = NULL; - struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); + char *kaddr = (char *)((unsigned long)dir & ~(folio_size(folio) - 1)); + size_t from, to; + struct nilfs_dir_entry *de, *pde = NULL; int err; + from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); + to = ((char *)dir - kaddr) + nilfs_rec_len_from_disk(dir->rec_len); + de = (struct nilfs_dir_entry *)(kaddr + from); + while ((char *)de < (char *)dir) { if (de->rec_len == 0) { - nilfs_error(inode->i_sb, __func__, + nilfs_error(inode->i_sb, "zero-length directory entry"); err = -EIO; goto out; @@ -556,17 +544,19 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) de = nilfs_next_entry(de); } if (pde) - from = (char *)pde - (char *)page_address(page); - lock_page(page); - err = nilfs_prepare_chunk(page, from, to); - BUG_ON(err); + from = (char *)pde - kaddr; + folio_lock(folio); + err = nilfs_prepare_chunk(folio, from, to); + if (unlikely(err)) { + folio_unlock(folio); + goto out; + } if (pde) pde->rec_len = nilfs_rec_len_to_disk(to - from); dir->inode = 0; - nilfs_commit_chunk(page, mapping, from, to); - inode->i_ctime = inode->i_mtime = CURRENT_TIME; + nilfs_commit_chunk(folio, mapping, from, to); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); out: - nilfs_put_page(page); return err; } @@ -576,39 +566,39 @@ out: int nilfs_make_empty(struct inode *inode, struct inode *parent) { struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); - unsigned chunk_size = nilfs_chunk_size(inode); + struct folio *folio = filemap_grab_folio(mapping, 0); + unsigned int chunk_size = nilfs_chunk_size(inode); struct nilfs_dir_entry *de; int err; void *kaddr; - if (!page) - return -ENOMEM; + if (IS_ERR(folio)) + return PTR_ERR(folio); - err = nilfs_prepare_chunk(page, 0, chunk_size); + err = nilfs_prepare_chunk(folio, 0, chunk_size); if (unlikely(err)) { - unlock_page(page); + folio_unlock(folio); goto fail; } - kaddr = kmap_atomic(page); + kaddr = kmap_local_folio(folio, 0); memset(kaddr, 0, chunk_size); de = (struct nilfs_dir_entry *)kaddr; de->name_len = 1; de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); - nilfs_set_de_type(de, inode); - kunmap_atomic(kaddr); - nilfs_commit_chunk(page, mapping, 0, chunk_size); + de->file_type = fs_umode_to_ftype(inode->i_mode); + kunmap_local(kaddr); + nilfs_commit_chunk(folio, mapping, 0, chunk_size); fail: - page_cache_release(page); + folio_put(folio); return err; } @@ -617,26 +607,25 @@ fail: */ int nilfs_empty_dir(struct inode *inode) { - struct page *page = NULL; + struct folio *folio = NULL; + char *kaddr; unsigned long i, npages = dir_pages(inode); for (i = 0; i < npages; i++) { - char *kaddr; struct nilfs_dir_entry *de; - page = nilfs_get_page(inode, i); - if (IS_ERR(page)) - continue; + kaddr = nilfs_get_folio(inode, i, &folio); + if (IS_ERR(kaddr)) + return 0; - kaddr = page_address(page); de = (struct nilfs_dir_entry *)kaddr; kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); while ((char *)de <= kaddr) { if (de->rec_len == 0) { - nilfs_error(inode->i_sb, __func__, - "zero-length directory entry " - "(kaddr=%p, de=%p)\n", kaddr, de); + nilfs_error(inode->i_sb, + "zero-length directory entry (kaddr=%p, de=%p)", + kaddr, de); goto not_empty; } if (de->inode != 0) { @@ -654,19 +643,19 @@ int nilfs_empty_dir(struct inode *inode) } de = nilfs_next_entry(de); } - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); } return 1; not_empty: - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return 0; } const struct file_operations nilfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = nilfs_readdir, + .iterate_shared = nilfs_readdir, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 82f4865e86dd..2d8dc6b35b54 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * direct.c - NILFS direct block pointer. + * NILFS direct block pointer. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #include <linux/errno.h> @@ -62,7 +49,7 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *direct, static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, __u64 key, __u64 *ptrp, - unsigned maxblocks) + unsigned int maxblocks) { struct inode *dat = NULL; __u64 ptr, ptr2; @@ -79,11 +66,12 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr = blocknr; } - maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1); + maxblocks = min_t(unsigned int, maxblocks, + NILFS_DIRECT_KEY_MAX - key + 1); for (cnt = 1; cnt < maxblocks && (ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) != NILFS_BMAP_INVALID_PTR; @@ -91,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt) @@ -99,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, } *ptrp = ptr; return cnt; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + return ret; } static __u64 @@ -110,9 +103,9 @@ nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key) if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; - else - /* block group */ - return nilfs_bmap_find_target_in_group(direct); + + /* block group */ + return nilfs_bmap_find_target_in_group(direct); } static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) @@ -173,6 +166,21 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) return ret; } +static int nilfs_direct_seek_key(const struct nilfs_bmap *direct, __u64 start, + __u64 *keyp) +{ + __u64 key; + + for (key = start; key <= NILFS_DIRECT_KEY_MAX; key++) { + if (nilfs_direct_get_ptr(direct, key) != + NILFS_BMAP_INVALID_PTR) { + *keyp = key; + return 0; + } + } + return -ENOENT; +} + static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp) { __u64 key, lastkey; @@ -265,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; @@ -311,6 +322,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct, binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = 0; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } @@ -325,14 +337,18 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, key = nilfs_bmap_data_get_key(bmap, *bh); if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { - printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, - (unsigned long long)key); + nilfs_crit(bmap->b_inode->i_sb, + "%s (ino=%lu): invalid key: %llu", + __func__, + bmap->b_inode->i_ino, (unsigned long long)key); return -EINVAL; } ptr = nilfs_direct_get_ptr(bmap, key); if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { - printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, - (unsigned long long)ptr); + nilfs_crit(bmap->b_inode->i_sb, + "%s (ino=%lu): invalid pointer: %llu", + __func__, + bmap->b_inode->i_ino, (unsigned long long)ptr); return -EINVAL; } @@ -355,7 +371,9 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = { .bop_assign = nilfs_direct_assign, .bop_mark = NULL, + .bop_seek_key = nilfs_direct_seek_key, .bop_last_key = nilfs_direct_last_key, + .bop_check_insert = nilfs_direct_check_insert, .bop_check_delete = NULL, .bop_gather_data = nilfs_direct_gather_data, diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index dc643de20a25..b7ca896269af 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * direct.h - NILFS direct block pointer. + * NILFS direct block pointer. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #ifndef _NILFS_DIRECT_H @@ -28,16 +15,6 @@ #include "bmap.h" -/** - * struct nilfs_direct_node - direct node - * @dn_flags: flags - * @dn_pad: padding - */ -struct nilfs_direct_node { - __u8 dn_flags; - __u8 pad[7]; -}; - #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) #define NILFS_DIRECT_KEY_MIN 0 #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h index 19ccbf9522ab..d29fd837c42c 100644 --- a/fs/nilfs2/export.h +++ b/fs/nilfs2/export.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef NILFS_EXPORT_H #define NILFS_EXPORT_H @@ -20,6 +21,6 @@ struct nilfs_fid { u32 parent_gen; u64 parent_ino; -} __attribute__ ((packed)); +} __packed; #endif diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 08fdb77852ac..1b8d754db44d 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * file.c - NILFS regular file handling primitives including fsync(). + * NILFS regular file handling primitives including fsync(). * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Amagai Yoshiji <amagai@osrg.net>, - * Ryusuke Konishi <ryusuke@osrg.net> + * Written by Amagai Yoshiji and Ryusuke Konishi. */ #include <linux/fs.h> @@ -39,61 +25,56 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ struct the_nilfs *nilfs; struct inode *inode = file->f_mapping->host; - int err; - - err = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (err) - return err; - mutex_lock(&inode->i_mutex); + int err = 0; if (nilfs_inode_dirty(inode)) { if (datasync) err = nilfs_construct_dsync_segment(inode->i_sb, inode, - 0, LLONG_MAX); + start, end); else err = nilfs_construct_segment(inode->i_sb); } - mutex_unlock(&inode->i_mutex); nilfs = inode->i_sb->s_fs_info; - if (!err && nilfs_test_opt(nilfs, BARRIER)) { - err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - if (err != -EIO) - err = 0; - } + if (!err) + err = nilfs_flush_device(nilfs); + return err; } -static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct vm_area_struct *vma = vmf->vma; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; + struct buffer_head *bh, *head; int ret = 0; if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ sb_start_pagefault(inode->i_sb); - lock_page(page); - if (page->mapping != inode->i_mapping || - page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { - unlock_page(page); + folio_lock(folio); + if (folio->mapping != inode->i_mapping || + folio_pos(folio) >= i_size_read(inode) || + !folio_test_uptodate(folio)) { + folio_unlock(folio); ret = -EFAULT; /* make the VM retry the fault */ goto out; } /* - * check to see if the page is mapped already (no holes) + * check to see if the folio is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto mapped; - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { int fully_mapped = 1; - bh = head = page_buffers(page); + bh = head; do { if (!buffer_mapped(bh)) { fully_mapped = 0; @@ -102,11 +83,11 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) } while (bh = bh->b_this_page, bh != head); if (fully_mapped) { - SetPageMappedToDisk(page); + folio_set_mappedtodisk(folio); goto mapped; } } - unlock_page(page); + folio_unlock(folio); /* * fill hole blocks @@ -117,7 +98,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; file_update_time(vma->vm_file); - ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + ret = block_page_mkwrite(vma, vmf, nilfs_get_block); if (ret) { nilfs_transaction_abort(inode->i_sb); goto out; @@ -126,22 +107,28 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - wait_for_stable_page(page); + /* + * Since checksumming including data blocks is performed to determine + * the validity of the log to be written and used for recovery, it is + * necessary to wait for writeback to finish here, regardless of the + * stable write requirement of the backing device. + */ + folio_wait_writeback(folio); out: sb_end_pagefault(inode->i_sb); - return block_page_mkwrite_return(ret); + return vmf_fs_error(ret); } static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = nilfs_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; -static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int nilfs_file_mmap_prepare(struct vm_area_desc *desc) { - file_accessed(file); - vma->vm_ops = &nilfs_file_vm_ops; + file_accessed(desc->file); + desc->vm_ops = &nilfs_file_vm_ops; return 0; } @@ -151,25 +138,26 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) */ const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ - .mmap = nilfs_file_mmap, + .mmap_prepare = nilfs_file_mmap_prepare, .open = generic_file_open, /* .release = nilfs_release_file, */ .fsync = nilfs_sync_file, - .splice_read = generic_file_splice_read, + .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, }; const struct inode_operations nilfs_file_inode_operations = { .setattr = nilfs_setattr, .permission = nilfs_permission, .fiemap = nilfs_fiemap, + .fileattr_get = nilfs_fileattr_get, + .fileattr_set = nilfs_fileattr_set, }; /* end of file */ diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 57ceaf33d177..561c220799c7 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -1,25 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * gcinode.c - dummy inodes to buffer blocks for garbage collection + * Dummy inodes to buffer blocks for garbage collection * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, - * and Ryusuke Konishi <ryusuke@osrg.net>. - * Revised by Ryusuke Konishi <ryusuke@osrg.net>. + * Written by Seiji Kihara, Amagai Yoshiji, and Ryusuke Konishi. + * Revised by Ryusuke Konishi. * */ /* @@ -60,14 +46,11 @@ * specified by @pbn to the GC pagecache with the key @blkoff. * This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer. * - * Return Value: On success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The block specified with @pbn does not exist. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The block specified with @pbn does not exist. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, sector_t pbn, __u64 vbn, @@ -87,10 +70,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, struct the_nilfs *nilfs = inode->i_sb->s_fs_info; err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); - if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ - brelse(bh); + if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */ goto failed; - } } lock_buffer(bh); @@ -99,14 +80,12 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, goto out; } - if (!buffer_mapped(bh)) { - bh->b_bdev = inode->i_sb->s_bdev; + if (!buffer_mapped(bh)) set_buffer_mapped(bh); - } bh->b_blocknr = pbn; bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, bh); if (vbn) bh->b_blocknr = vbn; out: @@ -114,8 +93,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, *out_bh = bh; failed: - unlock_page(bh->b_page); - page_cache_release(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + if (unlikely(err)) + brelse(bh); return err; } @@ -130,20 +111,20 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, * specified by @vbn to the GC pagecache. @pbn can be supplied by the * caller to avoid translation of the disk block address. * - * Return Value: On success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Invalid virtual block address. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { + struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; int ret; - ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, - vbn ? : pbn, pbn, READ, out_bh, &pbn); + ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, vbn ? : pbn, pbn, + REQ_OP_READ, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ ret = 0; return ret; @@ -152,8 +133,15 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) { wait_on_buffer(bh); - if (!buffer_uptodate(bh)) + if (!buffer_uptodate(bh)) { + struct inode *inode = bh->b_folio->mapping->host; + + nilfs_err(inode->i_sb, + "I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)", + buffer_nilfs_node(bh) ? "node" : "data", + inode->i_ino, (unsigned long long)bh->b_blocknr); return -EIO; + } if (buffer_dirty(bh)) return -EEXIST; @@ -171,17 +159,17 @@ int nilfs_init_gcinode(struct inode *inode) inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); - inode->i_mapping->a_ops = &empty_aops; - inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; + inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); - return 0; + return nilfs_attach_btree_node_cache(inode); } /** * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes + * @nilfs: NILFS filesystem instance */ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) { @@ -192,7 +180,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 6548c7851b48..99eb8a59009e 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -1,24 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * ifile.c - NILFS inode file + * NILFS inode file * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Amagai Yoshiji <amagai@osrg.net>. - * Revised by Ryusuke Konishi <ryusuke@osrg.net>. + * Written by Amagai Yoshiji. + * Revised by Ryusuke Konishi. * */ @@ -28,6 +15,7 @@ #include "mdt.h" #include "alloc.h" #include "ifile.h" +#include "cpfile.h" /** * struct nilfs_ifile_info - on-memory private data of ifile @@ -50,17 +38,16 @@ static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile) * @out_ino: pointer to a variable to store inode number * @out_bh: buffer_head contains newly allocated disk inode * - * Return Value: On success, 0 is returned and the newly allocated inode - * number is stored in the place pointed by @ino, and buffer_head pointer - * that contains newly allocated disk inode structure is stored in the - * place pointed by @out_bh - * On error, one of the following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * nilfs_ifile_create_inode() allocates a new inode in the ifile metadata + * file and stores the inode number in the variable pointed to by @out_ino, + * as well as storing the ifile's buffer with the disk inode in the location + * pointed to by @out_bh. * - * %-ENOSPC - No inode left. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No inode left. */ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct buffer_head **out_bh) @@ -68,11 +55,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct nilfs_palloc_req req; int ret; - req.pr_entry_nr = 0; /* 0 says find free inode from beginning of - a group. dull code!! */ + req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb); req.pr_entry_bh = NULL; - ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); + ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false); if (!ret) { ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, &req.pr_entry_bh); @@ -96,14 +82,11 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, * @ifile: ifile inode * @ino: inode number * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - The inode number @ino have not been allocated. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Inode number unallocated. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) { @@ -111,7 +94,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) .pr_entry_nr = ino, .pr_entry_bh = NULL }; struct nilfs_inode *raw_inode; - void *kaddr; + size_t offset; int ret; ret = nilfs_palloc_prepare_free_entry(ifile, &req); @@ -126,11 +109,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) return ret; } - kaddr = kmap_atomic(req.pr_entry_bh->b_page); - raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, - req.pr_entry_bh, kaddr); + offset = nilfs_palloc_entry_offset(ifile, req.pr_entry_nr, + req.pr_entry_bh); + raw_inode = kmap_local_folio(req.pr_entry_bh->b_folio, offset); raw_inode->i_flags = 0; - kunmap_atomic(kaddr); + kunmap_local(raw_inode); mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); @@ -147,15 +130,14 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, int err; if (unlikely(!NILFS_VALID_INODE(sb, ino))) { - nilfs_error(sb, __func__, "bad inode number: %lu", - (unsigned long) ino); + nilfs_error(sb, "bad inode number: %lu", (unsigned long)ino); return -EINVAL; } err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); if (unlikely(err)) - nilfs_warning(sb, __func__, "unable to read inode: %lu", - (unsigned long) ino); + nilfs_warn(sb, "error %d reading inode: ino=%lu", + err, (unsigned long)ino); return err; } @@ -164,6 +146,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, * @ifile: ifile inode * @nmaxinodes: current maximum of available inodes count [out] * @nfreeinodes: free inodes count [out] + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_ifile_count_free_inodes(struct inode *ifile, u64 *nmaxinodes, u64 *nfreeinodes) @@ -185,21 +169,26 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile, * nilfs_ifile_read - read or get ifile inode * @sb: super block instance * @root: root object + * @cno: number of checkpoint entry to read * @inode_size: size of an inode - * @raw_inode: on-disk ifile inode - * @inodep: buffer to store the inode + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid checkpoint. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). */ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, - size_t inode_size, struct nilfs_inode *raw_inode, - struct inode **inodep) + __u64 cno, size_t inode_size) { + struct the_nilfs *nilfs; struct inode *ifile; int err; ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO); if (unlikely(!ifile)) return -ENOMEM; - if (!(ifile->i_state & I_NEW)) + if (!(inode_state_read_once(ifile) & I_NEW)) goto out; err = nilfs_mdt_init(ifile, NILFS_MDT_GFP, @@ -213,13 +202,13 @@ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); - err = nilfs_read_inode_common(ifile, raw_inode); + nilfs = sb->s_fs_info; + err = nilfs_cpfile_read_checkpoint(nilfs->ns_cpfile, cno, root, ifile); if (err) goto failed; unlock_new_inode(ifile); out: - *inodep = ifile; return 0; failed: iget_failed(ifile); diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 679674d13372..5d116a566d9e 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -1,24 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * ifile.h - NILFS inode file + * NILFS inode file * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Amagai Yoshiji <amagai@osrg.net> - * Revised by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Amagai Yoshiji. + * Revised by Ryusuke Konishi. * */ @@ -27,7 +14,6 @@ #include <linux/fs.h> #include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> #include "mdt.h" #include "alloc.h" @@ -35,14 +21,14 @@ static inline struct nilfs_inode * nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) { - void *kaddr = kmap(ibh->b_page); - return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr); + size_t __offset_in_folio = nilfs_palloc_entry_offset(ifile, ino, ibh); + + return kmap_local_folio(ibh->b_folio, __offset_in_folio); } -static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino, - struct buffer_head *ibh) +static inline void nilfs_ifile_unmap_inode(struct nilfs_inode *raw_inode) { - kunmap(ibh->b_page); + kunmap_local(raw_inode); } int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); @@ -52,7 +38,6 @@ int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, - size_t inode_size, struct nilfs_inode *raw_inode, - struct inode **inodep); + __u64 cno, size_t inode_size); #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b1a5277cfd18..51bde45d5865 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -1,31 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * inode.c - NILFS inode operations. + * NILFS inode operations. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. * */ #include <linux/buffer_head.h> #include <linux/gfp.h> #include <linux/mpage.h> +#include <linux/pagemap.h> #include <linux/writeback.h> -#include <linux/aio.h> +#include <linux/uio.h> +#include <linux/fiemap.h> +#include <linux/random.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -39,20 +29,22 @@ * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag + * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - int for_gc; + unsigned int type; }; +static int nilfs_iget_test(struct inode *inode, void *opaque); + void nilfs_inode_add_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; - inode_add_bytes(inode, (1 << inode->i_blkbits) * n); + inode_add_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_add(n, &root->blocks_count); } @@ -61,21 +53,23 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; - inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); + inode_sub_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_sub(n, &root->blocks_count); } /** * nilfs_get_block() - get a file block on the filesystem (callback function) - * @inode - inode struct of the target file - * @blkoff - file block number - * @bh_result - buffer head to be mapped on - * @create - indicate whether allocating the block or not when it has not + * @inode: inode struct of the target file + * @blkoff: file block number + * @bh_result: buffer head to be mapped on + * @create: indicate whether allocating the block or not when it has not * been allocated yet. * * This function does not issue actual read request of the specified data * block. It is done by VFS. + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) @@ -84,7 +78,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct the_nilfs *nilfs = inode->i_sb->s_fs_info; __u64 blknum = 0; int err = 0, ret; - unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; + unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); @@ -103,7 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, err = nilfs_transaction_begin(inode->i_sb, &ti, 1); if (unlikely(err)) goto out; - err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, + err = nilfs_bmap_insert(ii->i_bmap, blkoff, (unsigned long)bh_result); if (unlikely(err != 0)) { if (err == -EEXIST) { @@ -113,28 +107,28 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, * However, the page having this block must * be locked in this case. */ - printk(KERN_WARNING - "nilfs_get_block: a race condition " - "while inserting a data block. " - "(inode number=%lu, file block " - "offset=%llu)\n", - inode->i_ino, - (unsigned long long)blkoff); - err = 0; + nilfs_warn(inode->i_sb, + "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", + __func__, inode->i_ino, + (unsigned long long)blkoff); + err = -EAGAIN; } nilfs_transaction_abort(inode->i_sb); goto out; } - nilfs_mark_inode_dirty(inode); + nilfs_mark_inode_dirty_sync(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); set_buffer_delay(bh_result); - map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed - to proper value */ + map_bh(bh_result, inode->i_sb, 0); + /* Disk block number must be changed to proper value */ + } else if (ret == -ENOENT) { - /* not found is not error (e.g. hole); must return without - the mapped state flag. */ + /* + * not found is not error (e.g. hole); must return without + * the mapped state flag. + */ ; } else { err = ret; @@ -145,28 +139,21 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, } /** - * nilfs_readpage() - implement readpage() method of nilfs_aops {} + * nilfs_read_folio() - implement read_folio() method of nilfs_aops {} * address_space_operations. - * @file - file struct of the file to be read - * @page - the page to be read + * @file: file struct of the file to be read + * @folio: the folio to be read + * + * Return: 0 on success, or a negative error code on failure. */ -static int nilfs_readpage(struct file *file, struct page *page) +static int nilfs_read_folio(struct file *file, struct folio *folio) { - return mpage_readpage(page, nilfs_get_block); + return mpage_read_folio(folio, nilfs_get_block); } -/** - * nilfs_readpages() - implement readpages() method of nilfs_aops {} - * address_space_operations. - * @file - file struct of the file to be read - * @mapping - address_space struct used for reading multiple pages - * @pages - the pages to be read - * @nr_pages - number of pages to be read - */ -static int nilfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void nilfs_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); + mpage_readahead(rac, nilfs_get_block); } static int nilfs_writepages(struct address_space *mapping, @@ -175,8 +162,8 @@ static int nilfs_writepages(struct address_space *mapping, struct inode *inode = mapping->host; int err = 0; - if (inode->i_sb->s_flags & MS_RDONLY) { - nilfs_clear_dirty_pages(mapping, false); + if (sb_rdonly(inode->i_sb)) { + nilfs_clear_dirty_pages(mapping); return -EROFS; } @@ -187,53 +174,22 @@ static int nilfs_writepages(struct address_space *mapping, return err; } -static int nilfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - int err; - - if (inode->i_sb->s_flags & MS_RDONLY) { - /* - * It means that filesystem was remounted in read-only - * mode because of error or metadata corruption. But we - * have dirty pages that try to be flushed in background. - * So, here we simply discard this dirty page. - */ - nilfs_clear_dirty_page(page, false); - unlock_page(page); - return -EROFS; - } - - redirty_page_for_writepage(wbc, page); - unlock_page(page); - - if (wbc->sync_mode == WB_SYNC_ALL) { - err = nilfs_construct_segment(inode->i_sb); - if (unlikely(err)) - return err; - } else if (wbc->for_reclaim) - nilfs_flush_segment(inode->i_sb, inode->i_ino); - - return 0; -} - -static int nilfs_set_page_dirty(struct page *page) +static bool nilfs_dirty_folio(struct address_space *mapping, + struct folio *folio) { - int ret = __set_page_dirty_nobuffers(page); + struct inode *inode = mapping->host; + struct buffer_head *head; + unsigned int nr_dirty = 0; + bool ret = filemap_dirty_folio(mapping, folio); - if (page_has_buffers(page)) { - struct inode *inode = page->mapping->host; - unsigned nr_dirty = 0; - struct buffer_head *bh, *head; + /* + * The page may not be locked, eg if called from try_to_unmap_one() + */ + spin_lock(&mapping->i_private_lock); + head = folio_buffers(folio); + if (head) { + struct buffer_head *bh = head; - /* - * This page is locked by callers, and no other thread - * concurrently marks its buffers dirty since they are - * only dirtied through routines in fs/buffer.c in - * which call sites of mark_buffer_dirty are protected - * by page lock. - */ - bh = head = page_buffers(page); do { /* Do not mark hole blocks dirty */ if (buffer_dirty(bh) || !buffer_mapped(bh)) @@ -242,10 +198,13 @@ static int nilfs_set_page_dirty(struct page *page) set_buffer_dirty(bh); nr_dirty++; } while (bh = bh->b_this_page, bh != head); - - if (nr_dirty) - nilfs_set_file_dirty(inode, nr_dirty); + } else if (ret) { + nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); } + spin_unlock(&mapping->i_private_lock); + + if (nr_dirty) + nilfs_set_file_dirty(inode, nr_dirty); return ret; } @@ -254,14 +213,15 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); nilfs_truncate(inode); } } -static int nilfs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +static int nilfs_write_begin(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, + struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; @@ -270,8 +230,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - err = block_write_begin(mapping, pos, len, flags, pagep, - nilfs_get_block); + err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); if (unlikely(err)) { nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); @@ -279,18 +238,19 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, return err; } -static int nilfs_write_end(struct file *file, struct address_space *mapping, +static int nilfs_write_end(const struct kiocb *iocb, + struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; - unsigned start = pos & (PAGE_CACHE_SIZE - 1); - unsigned nr_dirty; + unsigned int start = pos & (PAGE_SIZE - 1); + unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(page, start, + nr_dirty = nilfs_page_count_clean_buffers(folio, start, start + copied); - copied = generic_write_end(file, mapping, pos, len, copied, page, + copied = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); @@ -298,57 +258,52 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, } static ssize_t -nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = file->f_mapping->host; - ssize_t size; + struct inode *inode = file_inode(iocb->ki_filp); - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - nilfs_get_block); - - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again. - */ - if (unlikely((rw & WRITE) && size < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - nilfs_write_failed(mapping, end); - } - - return size; + return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); } const struct address_space_operations nilfs_aops = { - .writepage = nilfs_writepage, - .readpage = nilfs_readpage, + .read_folio = nilfs_read_folio, .writepages = nilfs_writepages, - .set_page_dirty = nilfs_set_page_dirty, - .readpages = nilfs_readpages, + .dirty_folio = nilfs_dirty_folio, + .readahead = nilfs_readahead, .write_begin = nilfs_write_begin, .write_end = nilfs_write_end, - /* .releasepage = nilfs_releasepage, */ - .invalidatepage = block_invalidatepage, + .invalidate_folio = block_invalidate_folio, .direct_IO = nilfs_direct_IO, + .migrate_folio = buffer_migrate_folio_norefs, .is_partially_uptodate = block_is_partially_uptodate, }; +const struct address_space_operations nilfs_buffer_cache_aops = { + .invalidate_folio = block_invalidate_folio, +}; + +static int nilfs_insert_inode_locked(struct inode *inode, + struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL + }; + + return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); +} + struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; + struct buffer_head *bh; int err = -ENOMEM; ino_t ino; @@ -357,27 +312,29 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) goto failed; mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); - ii->i_state = 1 << NILFS_I_NEW; + ii->i_state = BIT(NILFS_I_NEW); + ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; - err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ + ii->i_bh = bh; atomic64_inc(&root->inodes_count); - inode_init_owner(inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + simple_inode_init_ts(inode); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); if (err < 0) - goto failed_bmap; + goto failed_after_creation; set_bit(NILFS_I_BMAP, &ii->i_state); /* No lock is needed; iget() ensures it. */ @@ -390,30 +347,35 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&nilfs->ns_next_gen_lock); - inode->i_generation = nilfs->ns_next_generation++; - spin_unlock(&nilfs->ns_next_gen_lock); - insert_inode_hash(inode); + inode->i_generation = get_random_u32(); + if (nilfs_insert_inode_locked(inode, root, ino) < 0) { + err = -EIO; + goto failed_after_creation; + } err = nilfs_init_acl(inode, dir); if (unlikely(err)) - goto failed_acl; /* never occur. When supporting - nilfs_init_acl(), proper cancellation of - above jobs should be considered */ + /* + * Never occur. When supporting nilfs_init_acl(), + * proper cancellation of above jobs should be considered. + */ + goto failed_after_creation; return inode; - failed_acl: - failed_bmap: + failed_after_creation: clear_nlink(inode); - iput(inode); /* raw_inode will be deleted through - generic_delete_inode() */ + if (inode_state_read_once(inode) & I_NEW) + unlock_new_inode(inode); + iput(inode); /* + * raw_inode will be deleted through + * nilfs_evict_inode(). + */ goto failed; failed_ifile_create_inode: make_bad_inode(inode); - iput(inode); /* if i_nlink == 1, generic_forget_inode() will be - called */ + iput(inode); failed: return ERR_PTR(err); } @@ -421,21 +383,20 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) void nilfs_set_inode_flags(struct inode *inode) { unsigned int flags = NILFS_I(inode)->i_flags; + unsigned int new_fl = 0; - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | - S_DIRSYNC); if (flags & FS_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & FS_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & FS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; - mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + new_fl |= S_DIRSYNC; + inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | + S_NOATIME | S_DIRSYNC); } int nilfs_read_inode_common(struct inode *inode, @@ -449,14 +410,16 @@ int nilfs_read_inode_common(struct inode *inode, i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - if (inode->i_nlink == 0 && inode->i_mode == 0) - return -EINVAL; /* this inode is deleted */ + inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime), + le32_to_cpu(raw_inode->i_mtime_nsec)); + inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), + le32_to_cpu(raw_inode->i_ctime_nsec)); + inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime), + le32_to_cpu(raw_inode->i_mtime_nsec)); + if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) + return -EIO; /* this inode is for metadata and corrupted */ + if (inode->i_nlink == 0) + return -ESTALE; /* this inode is deleted */ inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); ii->i_flags = le32_to_cpu(raw_inode->i_flags); @@ -509,21 +472,31 @@ static int __nilfs_read_inode(struct super_block *sb, inode->i_mapping->a_ops = &nilfs_aops; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &nilfs_symlink_inode_operations; + inode_nohighmem(inode); inode->i_mapping->a_ops = &nilfs_aops; - } else { + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { inode->i_op = &nilfs_special_inode_operations; init_special_inode( inode, inode->i_mode, huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); + } else { + nilfs_error(sb, + "invalid file type bits in mode 0%o for inode %lu", + inode->i_mode, ino); + err = -EIO; + goto failed_unmap; } - nilfs_ifile_unmap_inode(root->ifile, ino, bh); + nilfs_ifile_unmap_inode(raw_inode); brelse(bh); up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); return 0; failed_unmap: - nilfs_ifile_unmap_inode(root->ifile, ino, bh); + nilfs_ifile_unmap_inode(raw_inode); brelse(bh); bad_inode: @@ -540,10 +513,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; + if (ii->i_type != args->type) + return 0; - return args->for_gc && args->cno == ii->i_cno; + return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) @@ -551,15 +524,11 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) struct nilfs_iget_args *args = opaque; inode->i_ino = args->ino; - if (args->for_gc) { - NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; - NILFS_I(inode)->i_cno = args->cno; - NILFS_I(inode)->i_root = NULL; - } else { - if (args->root && args->ino == NILFS_ROOT_INO) - nilfs_get_root(args->root); - NILFS_I(inode)->i_root = args->root; - } + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = args->root; + NILFS_I(inode)->i_type = args->type; + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); return 0; } @@ -567,7 +536,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -577,7 +546,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -592,8 +561,14 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, inode = nilfs_iget_locked(sb, root, ino); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + + if (!(inode_state_read_once(inode) & I_NEW)) { + if (!inode->i_nlink) { + iput(inode); + return ERR_PTR(-ESTALE); + } return inode; + } err = __nilfs_read_inode(sb, root, ino, inode); if (unlikely(err)) { @@ -608,7 +583,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; @@ -616,7 +591,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + if (!(inode_state_read_once(inode) & I_NEW)) return inode; err = nilfs_init_gcinode(inode); @@ -628,8 +603,118 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, return inode; } +/** + * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode + * @inode: inode object + * + * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, + * or does nothing if the inode already has it. This function allocates + * an additional inode to maintain page cache of B-tree nodes one-on-one. + * + * Return: 0 on success, or %-ENOMEM if memory is insufficient. + */ +int nilfs_attach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode; + struct nilfs_iget_args args; + + if (ii->i_assoc_inode) + return 0; + + args.ino = inode->i_ino; + args.root = ii->i_root; + args.cno = ii->i_cno; + args.type = ii->i_type | NILFS_I_TYPE_BTNC; + + btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!btnc_inode)) + return -ENOMEM; + if (inode_state_read_once(btnc_inode) & I_NEW) { + nilfs_init_btnc_inode(btnc_inode); + unlock_new_inode(btnc_inode); + } + NILFS_I(btnc_inode)->i_assoc_inode = inode; + NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; + ii->i_assoc_inode = btnc_inode; + + return 0; +} + +/** + * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode + * @inode: inode object + * + * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its + * holder inode bound to @inode, or does nothing if @inode doesn't have it. + */ +void nilfs_detach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode = ii->i_assoc_inode; + + if (btnc_inode) { + NILFS_I(btnc_inode)->i_assoc_inode = NULL; + ii->i_assoc_inode = NULL; + iput(btnc_inode); + } +} + +/** + * nilfs_iget_for_shadow - obtain inode for shadow mapping + * @inode: inode object that uses shadow mapping + * + * nilfs_iget_for_shadow() allocates a pair of inodes that holds page + * caches for shadow mapping. The page cache for data pages is set up + * in one inode and the one for b-tree node pages is set up in the + * other inode, which is attached to the former inode. + * + * Return: a pointer to the inode for data pages on success, or %-ENOMEM + * if memory is insufficient. + */ +struct inode *nilfs_iget_for_shadow(struct inode *inode) +{ + struct nilfs_iget_args args = { + .ino = inode->i_ino, .root = NULL, .cno = 0, + .type = NILFS_I_TYPE_SHADOW + }; + struct inode *s_inode; + int err; + + s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!s_inode)) + return ERR_PTR(-ENOMEM); + if (!(inode_state_read_once(s_inode) & I_NEW)) + return inode; + + NILFS_I(s_inode)->i_flags = 0; + memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); + s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; + + err = nilfs_attach_btree_node_cache(s_inode); + if (unlikely(err)) { + iget_failed(s_inode); + return ERR_PTR(err); + } + unlock_new_inode(s_inode); + return s_inode; +} + +/** + * nilfs_write_inode_common - export common inode information to on-disk inode + * @inode: inode object + * @raw_inode: on-disk inode + * + * This function writes standard information from the on-memory inode @inode + * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap + * data is not exported, nilfs_bmap_write() must be called separately during + * log writing. + */ void nilfs_write_inode_common(struct inode *inode, - struct nilfs_inode *raw_inode, int has_bmap) + struct nilfs_inode *raw_inode) { struct nilfs_inode_info *ii = NILFS_I(inode); @@ -638,35 +723,22 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); - raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); - raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); - raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); + raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); + raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); + raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); - if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { - struct the_nilfs *nilfs = inode->i_sb->s_fs_info; - - /* zero-fill unused portion in the case of super root block */ - raw_inode->i_xattr = 0; - raw_inode->i_pad = 0; - memset((void *)raw_inode + sizeof(*raw_inode), 0, - nilfs->ns_inode_size - sizeof(*raw_inode)); - } - - if (has_bmap) - nilfs_bmap_write(ii->i_bmap, raw_inode); - else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - raw_inode->i_device_code = - cpu_to_le64(huge_encode_dev(inode->i_rdev)); - /* When extending inode, nilfs->ns_inode_size should be checked - for substitutions of appended fields */ + /* + * When extending inode, nilfs->ns_inode_size should be checked + * for substitutions of appended fields. + */ } -void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) +void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) { ino_t ino = inode->i_ino; struct nilfs_inode_info *ii = NILFS_I(inode); @@ -677,13 +749,16 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); - set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); + if (flags & I_DIRTY_DATASYNC) + set_bit(NILFS_I_INODE_SYNC, &ii->i_state); + + nilfs_write_inode_common(inode, raw_inode); - nilfs_write_inode_common(inode, raw_inode, 0); - /* XXX: call with has_bmap = 0 is a workaround to avoid - deadlock of bmap. This delays update of i_bmap to just - before writing */ - nilfs_ifile_unmap_inode(ifile, ino, ibh); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_device_code = + cpu_to_le64(huge_encode_dev(inode->i_rdev)); + + nilfs_ifile_unmap_inode(raw_inode); } #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ @@ -691,7 +766,7 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, unsigned long from) { - unsigned long b; + __u64 b; int ret; if (!test_bit(NILFS_I_BMAP, &ii->i_state)) @@ -706,7 +781,7 @@ repeat: if (b < from) return; - b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); + b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); ret = nilfs_bmap_truncate(ii->i_bmap, b); nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); if (!ret || (ret == -ENOMEM && @@ -714,9 +789,8 @@ repeat: goto repeat; failed: - nilfs_warning(ii->vfs_inode.i_sb, __func__, - "failed to truncate bmap (ino=%lu, err=%d)", - ii->vfs_inode.i_ino, ret); + nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", + ret, ii->vfs_inode.i_ino); } void nilfs_truncate(struct inode *inode) @@ -740,21 +814,22 @@ void nilfs_truncate(struct inode *inode) nilfs_truncate_bmap(ii, blkoff); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_mark_inode_dirty(inode); nilfs_set_file_dirty(inode, 0); nilfs_transaction_commit(sb); - /* May construct a logical segment and may fail in sync mode. - But truncate has no return value. */ + /* + * May construct a logical segment and may fail in sync mode. + * But truncate has no return value. + */ } static void nilfs_clear_inode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct nilfs_mdt_info *mdi = NILFS_MDT(inode); /* * Free resources allocated in nilfs_read_inode(), here. @@ -763,13 +838,14 @@ static void nilfs_clear_inode(struct inode *inode) brelse(ii->i_bh); ii->i_bh = NULL; - if (mdi && mdi->mi_palloc_cache) - nilfs_palloc_destroy_cache(inode); + if (nilfs_is_metadata_file_inode(inode)) + nilfs_mdt_clear(inode); if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + if (!(ii->i_type & NILFS_I_TYPE_BTNC)) + nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) nilfs_put_root(ii->i_root); @@ -780,19 +856,35 @@ void nilfs_evict_inode(struct inode *inode) struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs; int ret; if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); nilfs_clear_inode(inode); return; } nilfs_transaction_begin(sb, &ti, 0); /* never fails */ - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); + + nilfs = sb->s_fs_info; + if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) { + /* + * If this inode is about to be disposed after the file system + * has been degraded to read-only due to file system corruption + * or after the writer has been detached, do not make any + * changes that cause writes, just clear it. + * Do this check after read-locking ns_segctor_sem by + * nilfs_transaction_begin() in order to avoid a race with + * the writer detach operation. + */ + clear_inode(inode); + nilfs_clear_inode(inode); + nilfs_transaction_abort(sb); + return; + } /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); @@ -808,18 +900,21 @@ void nilfs_evict_inode(struct inode *inode) if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_transaction_commit(sb); - /* May construct a logical segment and may fail in sync mode. - But delete_inode has no return value. */ + /* + * May construct a logical segment and may fail in sync mode. + * But delete_inode has no return value. + */ } -int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) +int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *iattr) { struct nilfs_transaction_info ti; - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct super_block *sb = inode->i_sb; int err; - err = inode_change_ok(inode, iattr); + err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (err) return err; @@ -834,7 +929,7 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) nilfs_truncate(inode); } - setattr_copy(inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { @@ -850,14 +945,16 @@ out_err: return err; } -int nilfs_permission(struct inode *inode, int mask) +int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, + int mask) { struct nilfs_root *root = NILFS_I(inode)->i_root; + if ((mask & MAY_WRITE) && root && root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ - return generic_permission(inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) @@ -867,7 +964,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) int err; spin_lock(&nilfs->ns_inode_lock); - if (ii->i_bh == NULL) { + if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) { spin_unlock(&nilfs->ns_inode_lock); err = nilfs_ifile_get_inode_block(ii->i_root->ifile, inode->i_ino, pbh); @@ -876,7 +973,10 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL) ii->i_bh = *pbh; - else { + else if (unlikely(!buffer_uptodate(ii->i_bh))) { + __brelse(ii->i_bh); + ii->i_bh = *pbh; + } else { brelse(*pbh); *pbh = ii->i_bh; } @@ -903,7 +1003,7 @@ int nilfs_inode_dirty(struct inode *inode) return ret; } -int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) +int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) { struct nilfs_inode_info *ii = NILFS_I(inode); struct the_nilfs *nilfs = inode->i_sb->s_fs_info; @@ -916,17 +1016,23 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) spin_lock(&nilfs->ns_inode_lock); if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && !test_bit(NILFS_I_BUSY, &ii->i_state)) { - /* Because this routine may race with nilfs_dispose_list(), - we have to check NILFS_I_QUEUED here, too. */ + /* + * Because this routine may race with nilfs_dispose_list(), + * we have to check NILFS_I_QUEUED here, too. + */ if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { - /* This will happen when somebody is freeing - this inode. */ - nilfs_warning(inode->i_sb, __func__, - "cannot get inode (ino=%lu)\n", - inode->i_ino); + /* + * This will happen when somebody is freeing + * this inode. + */ + nilfs_warn(inode->i_sb, + "cannot set file dirty (ino=%lu): the file is being freed", + inode->i_ino); spin_unlock(&nilfs->ns_inode_lock); - return -EINVAL; /* NILFS_I_DIRTY may remain for - freeing inode */ + return -EINVAL; /* + * NILFS_I_DIRTY may remain for + * freeing inode. + */ } list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); set_bit(NILFS_I_QUEUED, &ii->i_state); @@ -935,18 +1041,27 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) return 0; } -int nilfs_mark_inode_dirty(struct inode *inode) +int __nilfs_mark_inode_dirty(struct inode *inode, int flags) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct buffer_head *ibh; int err; + /* + * Do not dirty inodes after the log writer has been detached + * and its nilfs_root struct has been freed. + */ + if (unlikely(nilfs_purging(nilfs))) + return 0; + err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { - nilfs_warning(inode->i_sb, __func__, - "failed to reget inode block.\n"); + nilfs_warn(inode->i_sb, + "cannot mark inode dirty (ino=%lu): error %d loading inode block", + inode->i_ino, err); return err; } - nilfs_update_inode(inode, ibh); + nilfs_update_inode(inode, ibh, flags); mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); brelse(ibh); @@ -956,6 +1071,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) /** * nilfs_dirty_inode - reflect changes on given inode to an inode block. * @inode: inode of the file to be registered. + * @flags: flags to determine the dirty state of the inode * * nilfs_dirty_inode() loads a inode block containing the specified * @inode and copies data from a nilfs_inode to a corresponding inode @@ -969,8 +1085,8 @@ void nilfs_dirty_inode(struct inode *inode, int flags) struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (is_bad_inode(inode)) { - nilfs_warning(inode->i_sb, __func__, - "tried to mark bad_inode dirty. ignored.\n"); + nilfs_warn(inode->i_sb, + "tried to mark bad_inode dirty. ignored."); dump_stack(); return; } @@ -979,7 +1095,7 @@ void nilfs_dirty_inode(struct inode *inode, int flags) return; } nilfs_transaction_begin(inode->i_sb, &ti, 0); - nilfs_mark_inode_dirty(inode); + __nilfs_mark_inode_dirty(inode, flags); nilfs_transaction_commit(inode->i_sb); /* never fails */ } @@ -996,11 +1112,11 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, unsigned int blkbits = inode->i_blkbits; int ret, n; - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + ret = fiemap_prep(inode, fieinfo, start, &len, 0); if (ret) return ret; - mutex_lock(&inode->i_mutex); + inode_lock(inode); isize = i_size_read(inode); @@ -1079,7 +1195,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (size) { if (phys && blkphy << blkbits == phys + size) { /* The current extent goes on */ - size += n << blkbits; + size += (u64)n << blkbits; } else { /* Terminate the current extent */ ret = fiemap_fill_next_extent( @@ -1092,14 +1208,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } } else { /* Start a new extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } blkoff += n; } @@ -1110,6 +1226,6 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (ret == 1) ret = 0; - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); return ret; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index b44bdb291b84..e17b8da66491 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * ioctl.c - NILFS ioctl operations. + * NILFS ioctl operations. * * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #include <linux/fs.h> @@ -29,7 +16,8 @@ #include <linux/compat.h> /* compat_ptr() */ #include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */ #include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> +#include <linux/fileattr.h> +#include <linux/string.h> #include "nilfs.h" #include "segment.h" #include "bmap.h" @@ -37,7 +25,23 @@ #include "sufile.h" #include "dat.h" - +/** + * nilfs_ioctl_wrap_copy - wrapping function of get/set metadata info + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @dir: set of direction flags + * @dofunc: concrete function of get/set metadata info + * + * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of + * calling dofunc() function on the basis of @argv argument. If successful, + * the requested metadata information is copied to userspace memory. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EINVAL - Invalid arguments from userspace. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, ssize_t (*dofunc)(struct the_nilfs *, @@ -45,7 +49,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, void *, size_t, size_t)) { void *buf; - void __user *base = (void __user *)(unsigned long)argv->v_base; + void __user *base = u64_to_user_ptr(argv->v_base); size_t maxmembs, total, n; ssize_t nr; int ret, i; @@ -54,10 +58,18 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_nmembs == 0) return 0; - if (argv->v_size > PAGE_SIZE) + if ((size_t)argv->v_size > PAGE_SIZE) return -EINVAL; - buf = (void *)__get_free_pages(GFP_NOFS, 0); + /* + * Reject pairs of a start item position (argv->v_index) and a + * total count (argv->v_nmembs) which leads position 'pos' to + * overflow by the increment at the end of the loop. + */ + if (argv->v_index > ~(__u64)0 - argv->v_nmembs) + return -EINVAL; + + buf = (void *)get_zeroed_page(GFP_NOFS); if (unlikely(!buf)) return -ENOMEM; maxmembs = PAGE_SIZE / argv->v_size; @@ -99,70 +111,87 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, return ret; } -static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp) +/** + * nilfs_fileattr_get - retrieve miscellaneous file attributes + * @dentry: the object to retrieve from + * @fa: fileattr pointer + * + * Return: always 0 as success. + */ +int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { - unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE; + struct inode *inode = d_inode(dentry); + + fileattr_fill_flags(fa, NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE); - return put_user(flags, (int __user *)argp); + return 0; } -static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, - void __user *argp) +/** + * nilfs_fileattr_set - change miscellaneous file attributes + * @idmap: idmap of the mount + * @dentry: the object to change + * @fa: fileattr pointer + * + * Return: 0 on success, or a negative error code on failure. + */ +int nilfs_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa) { + struct inode *inode = d_inode(dentry); struct nilfs_transaction_info ti; unsigned int flags, oldflags; int ret; - if (!inode_owner_or_capable(inode)) - return -EACCES; - - if (get_user(flags, (int __user *)argp)) - return -EFAULT; - - ret = mnt_want_write_file(filp); - if (ret) - return ret; - - flags = nilfs_mask_flags(inode->i_mode, flags); - - mutex_lock(&inode->i_mutex); + if (fileattr_has_fsx(fa)) + return -EOPNOTSUPP; - oldflags = NILFS_I(inode)->i_flags; - - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by the - * relevant capability. - */ - ret = -EPERM; - if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && - !capable(CAP_LINUX_IMMUTABLE)) - goto out; + flags = nilfs_mask_flags(inode->i_mode, fa->flags); ret = nilfs_transaction_begin(inode->i_sb, &ti, 0); if (ret) - goto out; + return ret; - NILFS_I(inode)->i_flags = (oldflags & ~FS_FL_USER_MODIFIABLE) | - (flags & FS_FL_USER_MODIFIABLE); + oldflags = NILFS_I(inode)->i_flags & ~FS_FL_USER_MODIFIABLE; + NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE); nilfs_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME; + inode_set_ctime_current(inode); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_mark_inode_dirty(inode); - ret = nilfs_transaction_commit(inode->i_sb); -out: - mutex_unlock(&inode->i_mutex); - mnt_drop_write_file(filp); - return ret; + return nilfs_transaction_commit(inode->i_sb); } +/** + * nilfs_ioctl_getversion - get info about a file's version (generation number) + * @inode: inode object + * @argp: userspace memory where the generation number of @inode is stored + * + * Return: 0 on success, or %-EFAULT on error. + */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { return put_user(inode->i_generation, (int __user *)argp); } +/** + * nilfs_ioctl_change_cpmode - change checkpoint mode (checkpoint/snapshot) + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_change_cpmode() function changes mode of + * given checkpoint between checkpoint and snapshot state. This ioctl + * is used in chcp and mkcp utilities. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * %-EFAULT - Failure during checkpoint mode changing. + * %-EPERM - Operation not permitted. + */ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -198,6 +227,22 @@ out: return ret; } +/** + * nilfs_ioctl_delete_checkpoint - remove checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_delete_checkpoint() function removes + * checkpoint from NILFS2 file system. This ioctl is used in rmcp + * utility. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * %-EFAULT - Failure during checkpoint removing. + * %-EPERM - Operation not permitted. + */ static int nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -229,6 +274,21 @@ out: return ret; } +/** + * nilfs_ioctl_do_get_cpinfo - callback method getting info about checkpoints + * @nilfs: nilfs object + * @posp: pointer on array of checkpoint's numbers + * @flags: checkpoint mode (checkpoint or snapshot) + * @buf: buffer for storing checkponts' info + * @size: size in bytes of one checkpoint info item in array + * @nmembs: number of checkpoints in array (numbers and infos) + * + * Description: nilfs_ioctl_do_get_cpinfo() function returns info about + * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in + * lscp utility and by nilfs_cleanerd daemon. + * + * Return: Count of nilfs_cpinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -242,6 +302,24 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_get_cpstat - get checkpoints statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints. + * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities + * and by nilfs_cleanerd daemon. The checkpoint statistics are copied to + * the userspace memory pointed to by @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting checkpoints statistics. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -260,6 +338,22 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_suinfo - callback method getting segment usage info + * @nilfs: nilfs object + * @posp: pointer on array of segment numbers + * @flags: *not used* + * @buf: buffer for storing suinfo array + * @size: size in bytes of one suinfo item in array + * @nmembs: count of segment numbers and suinfos in array + * + * Description: nilfs_ioctl_do_get_suinfo() function returns segment usage + * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used + * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon. + * + * Return: Count of nilfs_suinfo structures in output buffer on success, + * or a negative error code on failure. + */ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -273,6 +367,24 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_get_sustat - get segment usage statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_sustat() returns segment usage statistics. + * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities + * and by nilfs_cleanerd daemon. The requested segment usage information is + * copied to the userspace memory pointed to by @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting segment usage statistics. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -291,6 +403,22 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_vinfo - callback method getting virtual blocks info + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_vinfo structures + * @size: size in bytes of one vinfo item in array + * @nmembs: count of vinfos in array + * + * Description: nilfs_ioctl_do_get_vinfo() function returns information + * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used + * by nilfs_cleanerd daemon. + * + * Return: Count of nilfs_vinfo structures in output buffer on success, or + * a negative error code on failure. + */ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -303,6 +431,22 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_do_get_bdescs - callback method getting disk block descriptors + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_bdesc structures + * @size: size in bytes of one bdesc item in array + * @nmembs: count of bdescs in array + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. + * + * Return: Count of nilfs_bdescs structures in output buffer on success, or + * a negative error code on failure. + */ static ssize_t nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -329,6 +473,25 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, return nmembs; } +/** + * nilfs_ioctl_get_bdescs - get disk block descriptors + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. If successful, disk block descriptors + * are copied to userspace pointer @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during getting disk block descriptors. + * * %-EINVAL - Invalid arguments from userspace. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -352,6 +515,22 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_move_inode_block - prepare data/node block for moving by GC + * @inode: inode object + * @vdesc: descriptor of virtual block number + * @buffers: list of moving buffers + * + * Description: nilfs_ioctl_move_inode_block() function registers data/node + * buffer in the GC pagecache and submit read request. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - Block conflict detected. + * * %-EIO - I/O error. + * * %-ENOENT - Requested block doesn't exist. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_move_inode_block(struct inode *inode, struct nilfs_vdesc *vdesc, struct list_head *buffers) @@ -369,27 +548,25 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, if (unlikely(ret < 0)) { if (ret == -ENOENT) - printk(KERN_CRIT - "%s: invalid virtual block address (%s): " - "ino=%llu, cno=%llu, offset=%llu, " - "blocknr=%llu, vblocknr=%llu\n", - __func__, vdesc->vd_flags ? "node" : "data", - (unsigned long long)vdesc->vd_ino, - (unsigned long long)vdesc->vd_cno, - (unsigned long long)vdesc->vd_offset, - (unsigned long long)vdesc->vd_blocknr, - (unsigned long long)vdesc->vd_vblocknr); + nilfs_crit(inode->i_sb, + "%s: invalid virtual block address (%s): ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); return ret; } if (unlikely(!list_empty(&bh->b_assoc_buffers))) { - printk(KERN_CRIT "%s: conflicting %s buffer: ino=%llu, " - "cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu\n", - __func__, vdesc->vd_flags ? "node" : "data", - (unsigned long long)vdesc->vd_ino, - (unsigned long long)vdesc->vd_cno, - (unsigned long long)vdesc->vd_offset, - (unsigned long long)vdesc->vd_blocknr, - (unsigned long long)vdesc->vd_vblocknr); + nilfs_crit(inode->i_sb, + "%s: conflicting %s buffer: ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); brelse(bh); return -EEXIST; } @@ -397,6 +574,19 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } +/** + * nilfs_ioctl_move_blocks - move valid inode's blocks during garbage collection + * @sb: superblock object + * @argv: vector of arguments from userspace + * @buf: array of nilfs_vdesc structures + * + * Description: nilfs_ioctl_move_blocks() function reads valid data/node + * blocks that garbage collector specified with the array of nilfs_vdesc + * structures and stores them into page caches of GC inodes. + * + * Return: Number of processed nilfs_vdesc structures on success, or + * a negative error code on failure. + */ static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) { @@ -462,6 +652,22 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, return ret; } +/** + * nilfs_ioctl_delete_checkpoints - delete checkpoints + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of periods of checkpoints numbers + * + * Description: nilfs_ioctl_delete_checkpoints() function deletes checkpoints + * in the period from p_start to p_end, excluding p_end itself. The checkpoints + * which have been already deleted are ignored. + * + * Return: Number of processed nilfs_period structures on success, or one of + * the following negative error codes on failure: + * * %-EINVAL - invalid checkpoints. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { @@ -479,6 +685,21 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, return nmembs; } +/** + * nilfs_ioctl_free_vblocknrs - free virtual block numbers + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of virtual block numbers + * + * Description: nilfs_ioctl_free_vblocknrs() function frees + * the virtual block numbers specified by @buf and @argv->v_nmembs. + * + * Return: Number of processed virtual block numbers on success, or one of the + * following negative error codes on failure: + * * %-EIO - I/O error. + * * %-ENOENT - Unallocated virtual block number. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { @@ -490,12 +711,28 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, return (ret < 0) ? ret : nmembs; } +/** + * nilfs_ioctl_mark_blocks_dirty - mark blocks dirty + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of block descriptors + * + * Description: nilfs_ioctl_mark_blocks_dirty() function marks + * metadata file or data blocks as dirty. + * + * Return: Number of processed block descriptors on success, or one of the + * following negative error codes on failure: + * * %-EIO - I/O error. + * * %-ENOENT - Non-existent block (hole block). + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; + struct buffer_head *bh; int ret, i; for (i = 0; i < nmembs; i++) { @@ -513,12 +750,16 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, /* skip dead block */ continue; if (bdescs[i].bd_level == 0) { - ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat, - bdescs[i].bd_offset); - if (ret < 0) { + ret = nilfs_mdt_get_block(nilfs->ns_dat, + bdescs[i].bd_offset, + false, NULL, &bh); + if (unlikely(ret)) { WARN_ON(ret == -ENOENT); return ret; } + mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(nilfs->ns_dat); + put_bh(bh); } else { ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset, bdescs[i].bd_level); @@ -566,11 +807,24 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, return 0; failed: - printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n", - msg, ret); + nilfs_err(nilfs->ns_sb, "error %d preparing GC: %s", ret, msg); return ret; } +/** + * nilfs_ioctl_clean_segments - clean segments + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_clean_segments() function makes garbage + * collection operation in the environment of requested parameters + * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by + * nilfs_cleanerd daemon. + * + * Return: 0 on success, or a negative error code on failure. + */ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -582,7 +836,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, sizeof(struct nilfs_bdesc), sizeof(__u64), }; - void __user *base; void *kbufs[5]; struct the_nilfs *nilfs; size_t len, nsegs; @@ -603,16 +856,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; - if (nsegs > UINT_MAX / sizeof(__u64)) - goto out; /* * argv[4] points to segment numbers this ioctl cleans. We - * use kmalloc() for its buffer because memory used for the - * segment numbers is enough small. + * use kmalloc() for its buffer because the memory used for the + * segment numbers is small enough. */ - kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, - nsegs * sizeof(__u64)); + kbufs[4] = memdup_array_user(u64_to_user_ptr(argv[4].v_base), + nsegs, sizeof(__u64)); if (IS_ERR(kbufs[4])) { ret = PTR_ERR(kbufs[4]); goto out; @@ -631,20 +882,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; len = argv[n].v_size * argv[n].v_nmembs; - base = (void __user *)(unsigned long)argv[n].v_base; if (len == 0) { kbufs[n] = NULL; continue; } - kbufs[n] = vmalloc(len); - if (!kbufs[n]) { - ret = -ENOMEM; - goto out_free; - } - if (copy_from_user(kbufs[n], base, len)) { - ret = -EFAULT; - vfree(kbufs[n]); + kbufs[n] = vmemdup_user(u64_to_user_ptr(argv[n].v_base), len); + if (IS_ERR(kbufs[n])) { + ret = PTR_ERR(kbufs[n]); goto out_free; } } @@ -661,10 +906,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, } ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); - if (ret < 0) - printk(KERN_ERR "NILFS: GC failed during preparation: " - "cannot read source blocks: err=%d\n", ret); - else { + if (ret < 0) { + nilfs_err(inode->i_sb, + "error %d preparing GC: cannot read source blocks", + ret); + } else { if (nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); @@ -675,13 +921,34 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, out_free: while (--n >= 0) - vfree(kbufs[n]); + kvfree(kbufs[n]); kfree(kbufs[4]); out: mnt_drop_write_file(filp); return ret; } +/** + * nilfs_ioctl_sync - make a checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_sync() function constructs a logical segment + * for checkpointing. This function guarantees that all modified data + * and metadata are written out to the device when it successfully + * returned. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. + */ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -694,11 +961,9 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return ret; nilfs = inode->i_sb->s_fs_info; - if (nilfs_test_opt(nilfs, BARRIER)) { - ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - if (ret == -EIO) - return ret; - } + ret = nilfs_flush_device(nilfs); + if (ret < 0) + return ret; if (argp != NULL) { down_read(&nilfs->ns_segctor_sem); @@ -710,6 +975,14 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } +/** + * nilfs_ioctl_resize - resize NILFS2 volume + * @inode: inode object + * @filp: file object + * @argp: pointer on argument from userspace + * + * Return: 0 on success, or a negative error code on failure. + */ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp, void __user *argp) { @@ -735,6 +1008,59 @@ out: return ret; } +/** + * nilfs_ioctl_trim_fs() - trim ioctl handle function + * @inode: inode object + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It + * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which + * performs the actual trim operation. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + struct fstrim_range range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!bdev_max_discard_sectors(nilfs->ns_bdev)) + return -EOPNOTSUPP; + + if (copy_from_user(&range, argp, sizeof(range))) + return -EFAULT; + + range.minlen = max_t(u64, range.minlen, + bdev_discard_granularity(nilfs->ns_bdev)); + + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range); + up_read(&nilfs->ns_segctor_sem); + + if (ret < 0) + return ret; + + if (copy_to_user(argp, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + +/** + * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated + * @inode: inode object + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_set_alloc_range() function defines lower limit + * of segments in bytes and upper limit of segments in bytes. + * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility. + * + * Return: 0 on success, or a negative error code on failure. + */ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) { struct the_nilfs *nilfs = inode->i_sb->s_fs_info; @@ -751,15 +1077,22 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) goto out; ret = -ERANGE; - if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode)) + if (range[1] > bdev_nr_bytes(inode->i_sb->s_bdev)) goto out; segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize; minseg = range[0] + segbytes - 1; - do_div(minseg, segbytes); + minseg = div64_ul(minseg, segbytes); + + if (range[1] < 4096) + goto out; + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); - do_div(maxseg, segbytes); + if (maxseg < segbytes) + goto out; + + maxseg = div64_ul(maxseg, segbytes); maxseg--; ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg); @@ -767,6 +1100,26 @@ out: return ret; } +/** + * nilfs_ioctl_get_info - wrapping function of get metadata info + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * @membsz: size of an item in bytes + * @dofunc: concrete function of getting metadata info + * + * Description: nilfs_ioctl_get_info() gets metadata info by means of + * calling dofunc() function. The requested metadata information is copied + * to userspace memory @argp. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Failure during execution of requested operation. + * * %-EINVAL - Invalid arguments from userspace. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, size_t membsz, @@ -794,16 +1147,175 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_set_suinfo - set segment usage info + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: Expects an array of nilfs_suinfo_update structures + * encapsulated in nilfs_argv and updates the segment usage info + * according to the flags in nilfs_suinfo_update. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EEXIST - Block conflict detected. + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Invalid values in input (segment number, flags or nblocks). + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-EPERM - Not enough permissions. + */ +static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + struct nilfs_transaction_info ti; + struct nilfs_argv argv; + size_t len; + void *kbuf; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = -EFAULT; + if (copy_from_user(&argv, argp, sizeof(argv))) + goto out; + + ret = -EINVAL; + if (argv.v_size < sizeof(struct nilfs_suinfo_update)) + goto out; + + if (argv.v_nmembs > nilfs->ns_nsegments) + goto out; + + if (argv.v_nmembs >= UINT_MAX / argv.v_size) + goto out; + + len = argv.v_size * argv.v_nmembs; + if (!len) { + ret = 0; + goto out; + } + + kbuf = vmemdup_user(u64_to_user_ptr(argv.v_base), len); + if (IS_ERR(kbuf)) { + ret = PTR_ERR(kbuf); + goto out; + } + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size, + argv.v_nmembs); + if (unlikely(ret < 0)) + nilfs_transaction_abort(inode->i_sb); + else + nilfs_transaction_commit(inode->i_sb); /* never fails */ + + kvfree(kbuf); +out: + mnt_drop_write_file(filp); + return ret; +} + +/** + * nilfs_ioctl_get_fslabel - get the volume name of the file system + * @sb: super block instance + * @argp: pointer to userspace memory where the volume name should be stored + * + * Return: 0 on success, %-EFAULT if copying to userspace memory fails. + */ +static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + char label[NILFS_MAX_VOLUME_NAME + 1]; + + BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX); + + down_read(&nilfs->ns_sem); + memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name); + up_read(&nilfs->ns_sem); + + if (copy_to_user(argp, label, sizeof(label))) + return -EFAULT; + return 0; +} + +/** + * nilfs_ioctl_set_fslabel - set the volume name of the file system + * @sb: super block instance + * @filp: file object + * @argp: pointer to userspace memory that contains the volume name + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Label length exceeds record size in superblock. + * * %-EIO - I/O error. + * * %-EPERM - Operation not permitted (insufficient permissions). + * * %-EROFS - Read only file system. + */ +static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp, + void __user *argp) +{ + char label[NILFS_MAX_VOLUME_NAME + 1]; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + size_t len; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) { + ret = -EFAULT; + goto out_drop_write; + } + + len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1); + if (len > NILFS_MAX_VOLUME_NAME) { + nilfs_err(sb, "unable to set label with more than %zu bytes", + NILFS_MAX_VOLUME_NAME); + ret = -EINVAL; + goto out_drop_write; + } + + down_write(&nilfs->ns_sem); + sbp = nilfs_prepare_super(sb, false); + if (unlikely(!sbp)) { + ret = -EIO; + goto out_unlock; + } + + strtomem_pad(sbp[0]->s_volume_name, label, 0); + if (sbp[1]) + strtomem_pad(sbp[1]->s_volume_name, label, 0); + + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + +out_unlock: + up_write(&nilfs->ns_sem); +out_drop_write: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); void __user *argp = (void __user *)arg; switch (cmd) { - case FS_IOC_GETFLAGS: - return nilfs_ioctl_getflags(inode, argp); - case FS_IOC_SETFLAGS: - return nilfs_ioctl_setflags(inode, filp, argp); case FS_IOC_GETVERSION: return nilfs_ioctl_getversion(inode, argp); case NILFS_IOCTL_CHANGE_CPMODE: @@ -820,6 +1332,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_get_info(inode, filp, cmd, argp, sizeof(struct nilfs_suinfo), nilfs_ioctl_do_get_suinfo); + case NILFS_IOCTL_SET_SUINFO: + return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: @@ -836,6 +1350,12 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_resize(inode, filp, argp); case NILFS_IOCTL_SET_ALLOC_RANGE: return nilfs_ioctl_set_alloc_range(inode, argp); + case FITRIM: + return nilfs_ioctl_trim_fs(inode, argp); + case FS_IOC_GETFSLABEL: + return nilfs_ioctl_get_fslabel(inode->i_sb, argp); + case FS_IOC_SETFSLABEL: + return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp); default: return -ENOTTY; } @@ -845,12 +1365,6 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { - case FS_IOC32_GETFLAGS: - cmd = FS_IOC_GETFLAGS; - break; - case FS_IOC32_SETFLAGS: - cmd = FS_IOC_SETFLAGS; - break; case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; @@ -859,6 +1373,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_GET_CPINFO: case NILFS_IOCTL_GET_CPSTAT: case NILFS_IOCTL_GET_SUINFO: + case NILFS_IOCTL_SET_SUINFO: case NILFS_IOCTL_GET_SUSTAT: case NILFS_IOCTL_GET_VINFO: case NILFS_IOCTL_GET_BDESCS: @@ -866,6 +1381,9 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_SYNC: case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: + case FITRIM: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index c4dcd1db57ee..946b0d3534a5 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * mdt.c - meta data file for NILFS + * Meta data file for NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. */ #include <linux/buffer_head.h> @@ -32,7 +19,9 @@ #include "segment.h" #include "page.h" #include "mdt.h" +#include "alloc.h" /* nilfs_palloc_destroy_cache() */ +#include <trace/events/nilfs2.h> #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) @@ -44,7 +33,8 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, struct buffer_head *, void *)) { struct nilfs_inode_info *ii = NILFS_I(inode); - void *kaddr; + struct folio *folio = bh->b_folio; + void *from; int ret; /* Caller exclude read accesses using page lock */ @@ -58,16 +48,21 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_atomic(bh->b_page); - memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); + /* Initialize block (block size > PAGE_SIZE not yet supported) */ + from = kmap_local_folio(folio, offset_in_folio(folio, bh->b_data)); + memset(from, 0, bh->b_size); if (init_block) - init_block(inode, bh, kaddr); - flush_dcache_page(bh->b_page); - kunmap_atomic(kaddr); + init_block(inode, bh, from); + kunmap_local(from); + + flush_dcache_folio(folio); set_buffer_uptodate(bh); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); + + trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block); + return 0; } @@ -97,7 +92,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, if (buffer_uptodate(bh)) goto failed_bh; - bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); @@ -105,8 +99,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, } failed_bh: - unlock_page(bh->b_page); - page_cache_release(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); brelse(bh); failed_unlock: @@ -119,8 +113,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, } static int -nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, - int mode, struct buffer_head **out_bh) +nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf, + struct buffer_head **out_bh) { struct buffer_head *bh; __u64 blknum = 0; @@ -134,12 +128,12 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, if (buffer_uptodate(bh)) goto out; - if (mode == READA) { + if (opf & REQ_RAHEAD) { if (!trylock_buffer(bh)) { ret = -EBUSY; goto failed_bh; } - } else /* mode == READ */ + } else /* opf == REQ_OP_READ */ lock_buffer(bh); if (buffer_uptodate(bh)) { @@ -156,15 +150,18 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, bh); + submit_bh(opf, bh); ret = 0; + + trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, + opf & REQ_OP_MASK); out: get_bh(bh); *out_bh = bh; failed_bh: - unlock_page(bh->b_page); - page_cache_release(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); brelse(bh); failed: return ret; @@ -178,7 +175,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; int err; - err = nilfs_mdt_submit_block(inode, block, READ, &first_bh); + err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, &first_bh); if (err == -EEXIST) /* internal code */ goto out; @@ -188,7 +185,8 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, if (readahead) { blkoff = block + 1; for (i = 0; i < nr_ra_blocks; i++, blkoff++) { - err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); + err = nilfs_mdt_submit_block(inode, blkoff, + REQ_OP_READ | REQ_RAHEAD, &bh); if (likely(!err || err == -EEXIST)) brelse(bh); else if (err != -EBUSY) @@ -203,8 +201,12 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, out_no_wait: err = -EIO; - if (!buffer_uptodate(first_bh)) + if (!buffer_uptodate(first_bh)) { + nilfs_err(inode->i_sb, + "I/O error reading meta-data file (ino=%lu, block-offset=%lu)", + inode->i_ino, block); goto failed_bh; + } out: *out_bh = first_bh; return 0; @@ -224,20 +226,21 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, * @out_bh: output of a pointer to the buffer_head * * nilfs_mdt_get_block() looks up the specified buffer and tries to create - * a new buffer if @create is not zero. On success, the returned buffer is - * assured to be either existing or formatted using a buffer lock on success. - * @out_bh is substituted only when zero is returned. - * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error + * a new buffer if @create is not zero. If (and only if) this function + * succeeds, it stores a pointer to the retrieved buffer head in the location + * pointed to by @out_bh. * - * %-ENOENT - the specified block does not exist (hole block) + * The retrieved buffer may be either an existing one or a newly allocated one. + * For a newly created buffer, if the callback function argument @init_block + * is non-NULL, the callback will be called with the buffer locked to format + * the block. * - * %-EROFS - Read only filesystem (for create mode) + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - The specified block does not exist (hole block). + * * %-ENOMEM - Insufficient memory available. + * * %-EROFS - Read only filesystem (for create mode). */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, void (*init_block)(struct inode *, @@ -261,16 +264,66 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, } /** - * nilfs_mdt_delete_block - make a hole on the meta data file. + * nilfs_mdt_find_block - find and get a buffer on meta data file. * @inode: inode of the meta data file - * @block: block offset + * @start: start block offset (inclusive) + * @end: end block offset (inclusive) + * @blkoff: block offset + * @out_bh: place to store a pointer to buffer_head struct * - * Return Value: On success, zero is returned. - * On error, one of the following negative error code is returned. + * nilfs_mdt_find_block() looks up an existing block in range of + * [@start, @end] and stores pointer to a buffer head of the block to + * @out_bh, and block offset to @blkoff, respectively. @out_bh and + * @blkoff are substituted only when zero is returned. * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No block was found in the range. + * * %-ENOMEM - Insufficient memory available. + */ +int nilfs_mdt_find_block(struct inode *inode, unsigned long start, + unsigned long end, unsigned long *blkoff, + struct buffer_head **out_bh) +{ + __u64 next; + int ret; + + if (unlikely(start > end)) + return -ENOENT; + + ret = nilfs_mdt_read_block(inode, start, true, out_bh); + if (!ret) { + *blkoff = start; + goto out; + } + if (unlikely(ret != -ENOENT || start == ULONG_MAX)) + goto out; + + ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next); + if (!ret) { + if (next <= end) { + ret = nilfs_mdt_read_block(inode, next, true, out_bh); + if (!ret) + *blkoff = next; + } else { + ret = -ENOENT; + } + } +out: + return ret; +} + +/** + * nilfs_mdt_delete_block - make a hole on the meta data file. + * @inode: inode of the meta data file + * @block: block offset * - * %-EIO - I/O error + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Non-existent block. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { @@ -293,39 +346,35 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and * tries to release the page including the buffer from a page cache. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EBUSY - page has an active buffer. - * - * %-ENOENT - page cache has no page addressed by the offset. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Page has an active buffer. + * * %-ENOENT - Page cache has no page addressed by the offset. */ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) { - pgoff_t index = (pgoff_t)block >> - (PAGE_CACHE_SHIFT - inode->i_blkbits); - struct page *page; - unsigned long first_block; + pgoff_t index = block >> (PAGE_SHIFT - inode->i_blkbits); + struct folio *folio; + struct buffer_head *bh; int ret = 0; int still_dirty; - page = find_lock_page(inode->i_mapping, index); - if (!page) + folio = filemap_lock_folio(inode->i_mapping, index); + if (IS_ERR(folio)) return -ENOENT; - wait_on_page_writeback(page); - - first_block = (unsigned long)index << - (PAGE_CACHE_SHIFT - inode->i_blkbits); - if (page_has_buffers(page)) { - struct buffer_head *bh; + folio_wait_writeback(folio); - bh = nilfs_page_get_nth_block(page, block - first_block); + bh = folio_buffers(folio); + if (bh) { + unsigned long first_block = index << + (PAGE_SHIFT - inode->i_blkbits); + bh = get_nth_bh(bh, block - first_block); nilfs_forget_buffer(bh); } - still_dirty = PageDirty(page); - unlock_page(page); - page_cache_release(page); + still_dirty = folio_test_dirty(folio); + folio_unlock(folio); + folio_put(folio); if (still_dirty || invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) @@ -333,34 +382,6 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) return ret; } -/** - * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty. - * @inode: inode of the meta data file - * @block: block offset - * - * Return Value: On success, it returns 0. On error, the following negative - * error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-EIO - I/O error - * - * %-ENOENT - the specified block does not exist (hole block) - */ -int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) -{ - struct buffer_head *bh; - int err; - - err = nilfs_mdt_read_block(inode, block, 0, &bh); - if (unlikely(err)) - return err; - mark_buffer_dirty(bh); - nilfs_mdt_mark_dirty(inode); - brelse(bh); - return 0; -} - int nilfs_mdt_fetch_dirty(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); @@ -372,27 +393,27 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) return test_bit(NILFS_I_DIRTY, &ii->i_state); } -static int -nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) +static int nilfs_mdt_write_folio(struct folio *folio, + struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct super_block *sb; int err = 0; - if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + if (inode && sb_rdonly(inode->i_sb)) { /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we - * have dirty pages that try to be flushed in background. - * So, here we simply discard this dirty page. + * have dirty folios that try to be flushed in background. + * So, here we simply discard this dirty folio. */ - nilfs_clear_dirty_page(page, false); - unlock_page(page); + nilfs_clear_folio_dirty(folio); + folio_unlock(folio); return -EROFS; } - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); if (!inode) return 0; @@ -401,15 +422,27 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); - else if (wbc->for_reclaim) - nilfs_flush_segment(sb, inode->i_ino); return err; } +static int nilfs_mdt_writeback(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct folio *folio = NULL; + int error; + + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + error = nilfs_mdt_write_folio(folio, wbc); + + return error; +} static const struct address_space_operations def_mdt_aops = { - .writepage = nilfs_mdt_write_page, + .dirty_folio = block_dirty_folio, + .invalidate_folio = block_invalidate_folio, + .writepages = nilfs_mdt_writeback, + .migrate_folio = buffer_migrate_folio_norefs, }; static const struct inode_operations def_mdt_iops; @@ -429,7 +462,6 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, gfp_mask); - inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; inode->i_op = &def_mdt_iops; inode->i_fop = &def_mdt_fops; @@ -438,13 +470,46 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) return 0; } -void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, - unsigned header_size) +/** + * nilfs_mdt_clear - do cleanup for the metadata file + * @inode: inode of the metadata file + */ +void nilfs_mdt_clear(struct inode *inode) +{ + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mdi->mi_shadow; + + if (mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); + + if (shadow) { + struct inode *s_inode = shadow->inode; + + shadow->inode = NULL; + iput(s_inode); + mdi->mi_shadow = NULL; + } +} + +/** + * nilfs_mdt_destroy - release resources used by the metadata file + * @inode: inode of the metadata file + */ +void nilfs_mdt_destroy(struct inode *inode) +{ + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + + kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ + kfree(mdi); +} + +void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size, + unsigned int header_size) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); mi->mi_entry_size = entry_size; - mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; + mi->mi_entries_per_block = i_blocksize(inode) / entry_size; mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } @@ -452,18 +517,22 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file * @inode: inode of the metadata file * @shadow: shadow mapping + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); - struct backing_dev_info *bdi = inode->i_sb->s_bdi; + struct inode *s_inode; INIT_LIST_HEAD(&shadow->frozen_buffers); - address_space_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, inode, bdi); - address_space_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi); + + s_inode = nilfs_iget_for_shadow(inode); + if (IS_ERR(s_inode)) + return PTR_ERR(s_inode); + + shadow->inode = s_inode; mi->mi_shadow = shadow; return 0; } @@ -471,20 +540,23 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, /** * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map * @inode: inode of the metadata file + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_mdt_save_to_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *s_inode = shadow->inode; int ret; - ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); if (ret) goto out; - ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, - &ii->i_btnode_cache); + ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, + ii->i_assoc_inode->i_mapping); if (ret) goto out; @@ -497,17 +569,20 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen; - struct page *page; + struct folio *folio; int blkbits = inode->i_blkbits; - page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); - if (!page) - return -ENOMEM; + folio = filemap_grab_folio(shadow->inode->i_mapping, + bh->b_folio->index); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << blkbits, 0); + bh_frozen = folio_buffers(folio); + if (!bh_frozen) + bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0); - bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); + bh_frozen = get_nth_bh(bh_frozen, + offset_in_folio(folio, bh->b_data) >> blkbits); if (!buffer_uptodate(bh_frozen)) nilfs_copy_buffer(bh_frozen, bh); @@ -519,8 +594,8 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) brelse(bh_frozen); /* already frozen */ } - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); return 0; } @@ -529,17 +604,20 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen = NULL; - struct page *page; + struct folio *folio; int n; - page = find_lock_page(&shadow->frozen_data, bh->b_page->index); - if (page) { - if (page_has_buffers(page)) { - n = bh_offset(bh) >> inode->i_blkbits; - bh_frozen = nilfs_page_get_nth_block(page, n); + folio = filemap_lock_folio(shadow->inode->i_mapping, + bh->b_folio->index); + if (!IS_ERR(folio)) { + bh_frozen = folio_buffers(folio); + if (bh_frozen) { + n = offset_in_folio(folio, bh->b_data) >> + inode->i_blkbits; + bh_frozen = get_nth_bh(bh_frozen, n); } - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); } return bh_frozen; } @@ -572,11 +650,12 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping, true); - nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + nilfs_clear_dirty_pages(inode->i_mapping); + nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); - nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping); + nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, + NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); @@ -591,10 +670,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; down_write(&mi->mi_sem); nilfs_release_frozen_buffers(shadow); - truncate_inode_pages(&shadow->frozen_data, 0); - truncate_inode_pages(&shadow->frozen_btnodes, 0); + truncate_inode_pages(shadow->inode->i_mapping, 0); + truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index ab172e8549c5..9e23bab3ff12 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * mdt.h - NILFS meta data file prototype and definitions + * NILFS meta data file prototype and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. */ #ifndef _NILFS_MDT_H @@ -31,14 +18,12 @@ /** * struct nilfs_shadow_map - shadow mapping of meta data file * @bmap_store: shadow copy of bmap state - * @frozen_data: shadowed dirty data pages - * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @inode: holder of page caches used in shadow mapping * @frozen_buffers: list of frozen buffers */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; - struct address_space frozen_data; - struct address_space frozen_btnodes; + struct inode *inode; struct list_head frozen_buffers; }; @@ -57,8 +42,8 @@ struct nilfs_shadow_map { struct nilfs_mdt_info { struct rw_semaphore mi_sem; struct blockgroup_lock *mi_bgl; - unsigned mi_entry_size; - unsigned mi_first_entry_offset; + unsigned int mi_entry_size; + unsigned int mi_first_entry_offset; unsigned long mi_entries_per_block; struct nilfs_palloc_cache *mi_palloc_cache; struct nilfs_shadow_map *mi_shadow; @@ -71,20 +56,30 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) return inode->i_private; } +static inline int nilfs_is_metadata_file_inode(const struct inode *inode) +{ + return inode->i_private != NULL; +} + /* Default GFP flags using highmem */ -#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) +#define NILFS_MDT_GFP (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM) int nilfs_mdt_get_block(struct inode *, unsigned long, int, void (*init_block)(struct inode *, struct buffer_head *, void *), struct buffer_head **); +int nilfs_mdt_find_block(struct inode *inode, unsigned long start, + unsigned long end, unsigned long *blkoff, + struct buffer_head **out_bh); int nilfs_mdt_delete_block(struct inode *, unsigned long); int nilfs_mdt_forget_block(struct inode *, unsigned long); -int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); int nilfs_mdt_fetch_dirty(struct inode *); int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz); -void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); +void nilfs_mdt_clear(struct inode *inode); +void nilfs_mdt_destroy(struct inode *inode); + +void nilfs_mdt_set_entry_size(struct inode *, unsigned int, unsigned int); int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow); @@ -111,7 +106,10 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode) return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno; } -#define nilfs_mdt_bgl_lock(inode, bg) \ - (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) +static inline spinlock_t * +nilfs_mdt_bgl_lock(struct inode *inode, unsigned int block_group) +{ + return bgl_lock_ptr(NILFS_MDT(inode)->mi_bgl, block_group); +} #endif /* _NILFS_MDT_H */ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 9de78f08989e..40f4b1a28705 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * namei.c - NILFS pathname lookup operations. + * NILFS pathname lookup operations. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>, - * Ryusuke Konishi <ryusuke@osrg.net> + * Modified for NILFS by Amagai Yoshiji and Ryusuke Konishi. */ /* * linux/fs/ext2/namei.c @@ -49,11 +35,13 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) { int err = nilfs_add_link(dentry, inode); + if (!err) { - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -67,12 +55,25 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + if (inode == ERR_PTR(-ESTALE)) { + nilfs_error(dir->i_sb, + "deleted inode referenced: %lu", ino); + return ERR_PTR(-EIO); + } + } + return d_splice_alias(inode, dentry); } @@ -84,8 +85,8 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) +static int nilfs_create(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; struct nilfs_transaction_info ti; @@ -112,15 +113,13 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } static int -nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) +nilfs_mknod(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; struct nilfs_transaction_info ti; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; @@ -139,12 +138,12 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) return err; } -static int nilfs_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) +static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, const char *symname) { struct nilfs_transaction_info ti; struct super_block *sb = dir->i_sb; - unsigned l = strlen(symname)+1; + unsigned int l = strlen(symname) + 1; struct inode *inode; int err; @@ -155,13 +154,17 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, if (err) return err; - inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO); + inode = nilfs_new_inode(dir, S_IFLNK | 0777); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out; /* slow symlink */ inode->i_op = &nilfs_symlink_inode_operations; + inode_nohighmem(inode); + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_constraint(inode->i_mapping, + ~__GFP_FS)); inode->i_mapping->a_ops = &nilfs_aops; err = page_symlink(inode, symname, l); if (err) @@ -182,6 +185,7 @@ out: out_fail: drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -189,7 +193,7 @@ out_fail: static int nilfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct inode *inode = old_dentry->d_inode; + struct inode *inode = d_inode(old_dentry); struct nilfs_transaction_info ti; int err; @@ -197,20 +201,25 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, if (err) return err; - inode->i_ctime = CURRENT_TIME; + inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); - err = nilfs_add_nondir(dentry, inode); - if (!err) + err = nilfs_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); err = nilfs_transaction_commit(dir->i_sb); - else + } else { + inode_dec_link_count(inode); + iput(inode); nilfs_transaction_abort(dir->i_sb); + } return err; } -static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static struct dentry *nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; struct nilfs_transaction_info ti; @@ -218,7 +227,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) - return err; + return ERR_PTR(err); inc_nlink(dir); @@ -242,19 +251,20 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; nilfs_mark_inode_dirty(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); else nilfs_transaction_abort(dir->i_sb); - return err; + return ERR_PTR(err); out_fail: drop_nlink(inode); drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); out_dir: drop_nlink(dir); @@ -266,30 +276,32 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode; struct nilfs_dir_entry *de; - struct page *page; + struct folio *folio; int err; - err = -ENOENT; - de = nilfs_find_entry(dir, &dentry->d_name, &page); - if (!de) + de = nilfs_find_entry(dir, &dentry->d_name, &folio); + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } - inode = dentry->d_inode; + inode = d_inode(dentry); err = -EIO; if (le64_to_cpu(de->inode) != inode->i_ino) goto out; if (!inode->i_nlink) { - nilfs_warning(inode->i_sb, __func__, - "deleting nonexistent file (%lu), %d\n", - inode->i_ino, inode->i_nlink); + nilfs_warn(inode->i_sb, + "deleting nonexistent file (ino=%lu), %d", + inode->i_ino, inode->i_nlink); set_nlink(inode, 1); } - err = nilfs_delete_entry(de, page); + err = nilfs_delete_entry(de, folio); + folio_release_kmap(folio, de); if (err) goto out; - inode->i_ctime = dir->i_ctime; + inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); drop_nlink(inode); err = 0; out: @@ -309,7 +321,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) if (!err) { nilfs_mark_inode_dirty(dir); - nilfs_mark_inode_dirty(dentry->d_inode); + nilfs_mark_inode_dirty(d_inode(dentry)); err = nilfs_transaction_commit(dir->i_sb); } else nilfs_transaction_abort(dir->i_sb); @@ -319,7 +331,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct nilfs_transaction_info ti; int err; @@ -346,50 +358,62 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) return err; } -static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int nilfs_rename(struct mnt_idmap *idmap, + struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { - struct inode *old_inode = old_dentry->d_inode; - struct inode *new_inode = new_dentry->d_inode; - struct page *dir_page = NULL; + struct inode *old_inode = d_inode(old_dentry); + struct inode *new_inode = d_inode(new_dentry); + struct folio *dir_folio = NULL; struct nilfs_dir_entry *dir_de = NULL; - struct page *old_page; + struct folio *old_folio; struct nilfs_dir_entry *old_de; struct nilfs_transaction_info ti; + bool old_is_dir = S_ISDIR(old_inode->i_mode); int err; + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); if (unlikely(err)) return err; - err = -ENOENT; - old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_de) + old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } - if (S_ISDIR(old_inode->i_mode)) { + if (old_is_dir && old_dir != new_dir) { err = -EIO; - dir_de = nilfs_dotdot(old_inode, &dir_page); + dir_de = nilfs_dotdot(old_inode, &dir_folio); if (!dir_de) goto out_old; } if (new_inode) { - struct page *new_page; + struct folio *new_folio; struct nilfs_dir_entry *new_de; err = -ENOTEMPTY; - if (dir_de && !nilfs_empty_dir(new_inode)) + if (old_is_dir && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); + goto out_dir; + } + err = nilfs_set_link(new_dir, new_de, new_folio, old_inode); + folio_release_kmap(new_folio, new_de); + if (unlikely(err)) goto out_dir; - nilfs_set_link(new_dir, new_de, new_page, old_inode); nilfs_mark_inode_dirty(new_dir); - new_inode->i_ctime = CURRENT_TIME; - if (dir_de) + inode_set_ctime_current(new_inode); + if (old_is_dir) drop_nlink(new_inode); drop_nlink(new_inode); nilfs_mark_inode_dirty(new_inode); @@ -397,7 +421,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, err = nilfs_add_link(new_dentry, old_inode); if (err) goto out_dir; - if (dir_de) { + if (old_is_dir) { inc_nlink(new_dir); nilfs_mark_inode_dirty(new_dir); } @@ -407,30 +431,30 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = CURRENT_TIME; - - nilfs_delete_entry(old_de, old_page); - - if (dir_de) { - nilfs_set_link(old_inode, dir_de, dir_page, new_dir); - drop_nlink(old_dir); + inode_set_ctime_current(old_inode); + + err = nilfs_delete_entry(old_de, old_folio); + if (likely(!err)) { + if (old_is_dir) { + if (old_dir != new_dir) + err = nilfs_set_link(old_inode, dir_de, + dir_folio, new_dir); + drop_nlink(old_dir); + } + nilfs_mark_inode_dirty(old_dir); } - nilfs_mark_inode_dirty(old_dir); nilfs_mark_inode_dirty(old_inode); - err = nilfs_transaction_commit(old_dir->i_sb); - return err; - out_dir: - if (dir_de) { - kunmap(dir_page); - page_cache_release(dir_page); - } + if (dir_de) + folio_release_kmap(dir_folio, dir_de); out_old: - kunmap(old_page); - page_cache_release(old_page); + folio_release_kmap(old_folio, old_de); out: - nilfs_transaction_abort(old_dir->i_sb); + if (likely(!err)) + err = nilfs_transaction_commit(old_dir->i_sb); + else + nilfs_transaction_abort(old_dir->i_sb); return err; } @@ -439,22 +463,17 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; - struct inode *inode; - struct qstr dotdot = QSTR_INIT("..", 2); + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(child->d_inode, &dotdot); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); - root = NILFS_I(child->d_inode)->i_root; + root = NILFS_I(d_inode(child))->i_root; - inode = nilfs_iget(child->d_inode->i_sb, root, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - return d_obtain_alias(inode); + return d_obtain_alias(nilfs_iget(child->d_sb, root, ino)); } static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, @@ -487,8 +506,7 @@ static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh, { struct nilfs_fid *fid = (struct nilfs_fid *)fh; - if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE && - fh_len != NILFS_FID_SIZE_CONNECTABLE) || + if (fh_len < NILFS_FID_SIZE_NON_CONNECTABLE || (fh_type != FILEID_NILFS_WITH_PARENT && fh_type != FILEID_NILFS_WITHOUT_PARENT)) return NULL; @@ -501,7 +519,7 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, { struct nilfs_fid *fid = (struct nilfs_fid *)fh; - if (fh_len != NILFS_FID_SIZE_CONNECTABLE || + if (fh_len < NILFS_FID_SIZE_CONNECTABLE || fh_type != FILEID_NILFS_WITH_PARENT) return NULL; @@ -554,6 +572,8 @@ const struct inode_operations nilfs_dir_inode_operations = { .setattr = nilfs_setattr, .permission = nilfs_permission, .fiemap = nilfs_fiemap, + .fileattr_get = nilfs_fileattr_get, + .fileattr_set = nilfs_fileattr_set, }; const struct inode_operations nilfs_special_inode_operations = { @@ -562,9 +582,7 @@ const struct inode_operations nilfs_special_inode_operations = { }; const struct inode_operations nilfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, + .get_link = page_get_link, .permission = nilfs_permission, }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 9bc72dec3fa6..b7e3d91b6243 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * nilfs.h - NILFS local header file. + * NILFS local header file. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net> - * Ryusuke Konishi <ryusuke@osrg.net> + * Written by Koji Sato and Ryusuke Konishi. */ #ifndef _NILFS_H @@ -28,20 +14,23 @@ #include <linux/buffer_head.h> #include <linux/spinlock.h> #include <linux/blkdev.h> -#include <linux/nilfs2_fs.h> +#include <linux/fs_struct.h> +#include <linux/nilfs2_api.h> +#include <linux/nilfs2_ondisk.h> #include "the_nilfs.h" #include "bmap.h" /** * struct nilfs_inode_info - nilfs inode data in memory * @i_flags: inode flags + * @i_type: inode type (combination of flags that inidicate usage) * @i_state: dynamic state flags * @i_bmap: pointer on i_bmap_data * @i_bmap_data: raw block mapping * @i_xattr: <TODO> * @i_dir_start_lookup: page index of last successful search * @i_cno: checkpoint number for GC inode - * @i_btnode_cache: cached pages of b-tree nodes + * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) * @i_dirty: list for connecting dirty files * @xattr_sem: semaphore for extended attributes processing * @i_bh: buffer contains disk inode @@ -50,13 +39,14 @@ */ struct nilfs_inode_info { __u32 i_flags; + unsigned int i_type; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; struct nilfs_bmap i_bmap_data; __u64 i_xattr; /* sector_t ??? */ __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ - struct address_space i_btnode_cache; + struct inode *i_assoc_inode; struct list_head i_dirty; /* List for connecting dirty files */ #ifdef CONFIG_NILFS_XATTR @@ -69,8 +59,10 @@ struct nilfs_inode_info { */ struct rw_semaphore xattr_sem; #endif - struct buffer_head *i_bh; /* i_bh contains a new or dirty - disk inode */ + struct buffer_head *i_bh; /* + * i_bh contains a new or dirty + * disk inode. + */ struct nilfs_root *i_root; struct inode vfs_inode; }; @@ -86,13 +78,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) return container_of(bmap, struct nilfs_inode_info, i_bmap_data); } -static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) -{ - struct nilfs_inode_info *ii = - container_of(btnc, struct nilfs_inode_info, i_btnode_cache); - return &ii->vfs_inode; -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -100,13 +85,24 @@ enum { NILFS_I_NEW = 0, /* Inode is newly created */ NILFS_I_DIRTY, /* The file is dirty */ NILFS_I_QUEUED, /* inode is in dirty_files list */ - NILFS_I_BUSY, /* inode is grabbed by a segment - constructor */ + NILFS_I_BUSY, /* + * Inode is grabbed by a segment + * constructor + */ NILFS_I_COLLECTED, /* All dirty blocks are collected */ NILFS_I_UPDATED, /* The file has been written back */ - NILFS_I_INODE_DIRTY, /* write_inode is requested */ + NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ +}; + +/* + * Flags to identify the usage of on-memory inodes (i_type) + */ +enum { + NILFS_I_TYPE_NORMAL = 0, + NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */ + NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */ + NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */ }; /* @@ -117,23 +113,40 @@ enum { NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; +/** + * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a + * file system volume name + * + * Defined by the size of the volume name field in the on-disk superblocks. + * This volume name does not include the terminating NULL byte if the string + * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the + * size of the buffer that requires a NULL byte termination. + */ +#define NILFS_MAX_VOLUME_NAME \ + sizeof_field(struct nilfs_super_block, s_volume_name) + /* * Macros to check inode numbers */ -#define NILFS_MDT_INO_BITS \ - ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ - 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ - 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) +#define NILFS_MDT_INO_BITS \ + (BIT(NILFS_DAT_INO) | BIT(NILFS_CPFILE_INO) | \ + BIT(NILFS_SUFILE_INO) | BIT(NILFS_IFILE_INO) | \ + BIT(NILFS_ATIME_INO) | BIT(NILFS_SKETCH_INO)) -#define NILFS_SYS_INO_BITS \ - ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) +#define NILFS_SYS_INO_BITS (BIT(NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) + ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) + ((ino) >= NILFS_FIRST_INO(sb) || \ + ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + +#define NILFS_PRIVATE_INODE(ino) ({ \ + ino_t __ino = (ino); \ + ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \ + (__ino) != NILFS_SKETCH_INO); }) /** * struct nilfs_transaction_info: context information for synchronization @@ -141,16 +154,16 @@ enum { * @ti_save: Backup of journal_info field of task_struct * @ti_flags: Flags * @ti_count: Nest level - * @ti_garbage: List of inode to be put when releasing semaphore */ struct nilfs_transaction_info { u32 ti_magic; void *ti_save; - /* This should never used. If this happens, - one of other filesystems has a bug. */ + /* + * This should never be used. If it happens, + * one of other filesystems has a bug. + */ unsigned short ti_flags; unsigned short ti_count; - struct list_head ti_garbage; }; /* ti_magic */ @@ -158,8 +171,10 @@ struct nilfs_transaction_info { /* ti_flags */ #define NILFS_TI_DYNAMIC_ALLOC 0x0001 /* Allocated from slab */ -#define NILFS_TI_SYNC 0x0002 /* Force to construct segment at the - end of transaction. */ +#define NILFS_TI_SYNC 0x0002 /* + * Force to construct segment at the + * end of transaction. + */ #define NILFS_TI_GC 0x0004 /* GC context */ #define NILFS_TI_COMMIT 0x0008 /* Change happened or not */ #define NILFS_TI_WRITER 0x0010 /* Constructor context */ @@ -211,6 +226,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) { + if (S_ISLNK(inode->i_mode)) + return 0; + inode->i_mode &= ~current_umask(); return 0; } @@ -236,21 +254,24 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) } /* dir.c */ -extern int nilfs_add_link(struct dentry *, struct inode *); -extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); -extern int nilfs_make_empty(struct inode *, struct inode *); -extern struct nilfs_dir_entry * -nilfs_find_entry(struct inode *, const struct qstr *, struct page **); -extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); -extern int nilfs_empty_dir(struct inode *); -extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); -extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, - struct page *, struct inode *); +int nilfs_add_link(struct dentry *, struct inode *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); +int nilfs_make_empty(struct inode *, struct inode *); +struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, + struct folio **); +int nilfs_delete_entry(struct nilfs_dir_entry *, struct folio *); +int nilfs_empty_dir(struct inode *); +struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct folio **); +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct folio *folio, struct inode *inode); /* file.c */ extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); /* ioctl.c */ +int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *m); +int nilfs_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa); long nilfs_ioctl(struct file *, unsigned int, unsigned long); long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, @@ -260,11 +281,11 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, void nilfs_inode_add_blocks(struct inode *inode, int n); void nilfs_inode_sub_blocks(struct inode *inode, int n); extern struct inode *nilfs_new_inode(struct inode *, umode_t); -extern void nilfs_free_inode(struct inode *); extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void nilfs_set_inode_flags(struct inode *); extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); -extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); +void nilfs_write_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode); struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino); struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, @@ -273,31 +294,77 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, unsigned long ino); extern struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno); -extern void nilfs_update_inode(struct inode *, struct buffer_head *); +int nilfs_attach_btree_node_cache(struct inode *inode); +void nilfs_detach_btree_node_cache(struct inode *inode); +struct inode *nilfs_iget_for_shadow(struct inode *inode); +extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); -extern int nilfs_setattr(struct dentry *, struct iattr *); +extern int nilfs_setattr(struct mnt_idmap *, struct dentry *, + struct iattr *); extern void nilfs_write_failed(struct address_space *mapping, loff_t to); -int nilfs_permission(struct inode *inode, int mask); +int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, + int mask); int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); -int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); -extern int nilfs_mark_inode_dirty(struct inode *); +int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty); +extern int __nilfs_mark_inode_dirty(struct inode *, int); extern void nilfs_dirty_inode(struct inode *, int flags); int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); +static inline int nilfs_mark_inode_dirty(struct inode *inode) +{ + return __nilfs_mark_inode_dirty(inode, I_DIRTY); +} +static inline int nilfs_mark_inode_dirty_sync(struct inode *inode) +{ + return __nilfs_mark_inode_dirty(inode, I_DIRTY_SYNC); +} /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); -extern void nilfs_destroy_inode(struct inode *); -extern __printf(3, 4) -void nilfs_error(struct super_block *, const char *, const char *, ...); + +__printf(2, 3) +void __nilfs_msg(struct super_block *sb, const char *fmt, ...); extern __printf(3, 4) -void nilfs_warning(struct super_block *, const char *, const char *, ...); +void __nilfs_error(struct super_block *sb, const char *function, + const char *fmt, ...); + +#ifdef CONFIG_PRINTK + +#define nilfs_msg(sb, level, fmt, ...) \ + __nilfs_msg(sb, level fmt, ##__VA_ARGS__) +#define nilfs_error(sb, fmt, ...) \ + __nilfs_error(sb, __func__, fmt, ##__VA_ARGS__) + +#else + +#define nilfs_msg(sb, level, fmt, ...) \ + do { \ + no_printk(level fmt, ##__VA_ARGS__); \ + (void)(sb); \ + } while (0) +#define nilfs_error(sb, fmt, ...) \ + do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __nilfs_error(sb, "", " "); \ + } while (0) + +#endif /* CONFIG_PRINTK */ + +#define nilfs_crit(sb, fmt, ...) \ + nilfs_msg(sb, KERN_CRIT, fmt, ##__VA_ARGS__) +#define nilfs_err(sb, fmt, ...) \ + nilfs_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__) +#define nilfs_warn(sb, fmt, ...) \ + nilfs_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__) +#define nilfs_info(sb, fmt, ...) \ + nilfs_msg(sb, KERN_INFO, fmt, ##__VA_ARGS__) + extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); -extern int nilfs_store_magic_and_option(struct super_block *, - struct nilfs_super_block *, char *); +extern int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp); extern int nilfs_check_feature_compatibility(struct super_block *, struct nilfs_super_block *); extern void nilfs_set_log_cursor(struct nilfs_super_block *, @@ -320,6 +387,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); int nilfs_init_gcinode(struct inode *inode); void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs); +/* sysfs.c */ +int __init nilfs_sysfs_init(void); +void nilfs_sysfs_exit(void); +int nilfs_sysfs_create_device_group(struct super_block *); +void nilfs_sysfs_delete_device_group(struct the_nilfs *); +int nilfs_sysfs_create_snapshot_group(struct nilfs_root *); +void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *); + /* * Inodes and files operations */ @@ -327,6 +402,7 @@ extern const struct file_operations nilfs_dir_operations; extern const struct inode_operations nilfs_file_inode_operations; extern const struct file_operations nilfs_file_operations; extern const struct address_space_operations nilfs_aops; +extern const struct address_space_operations nilfs_buffer_cache_aops; extern const struct inode_operations nilfs_dir_inode_operations; extern const struct inode_operations nilfs_special_inode_operations; extern const struct inode_operations nilfs_symlink_inode_operations; diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 0ba679866e50..56c4da417b6a 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * page.c - buffer/page management specific to NILFS + * Buffer/page management specific to NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net>, - * Seiji Kihara <kihara@osrg.net>. + * Written by Ryusuke Konishi and Seiji Kihara. */ #include <linux/pagemap.h> @@ -35,25 +21,24 @@ #include "mdt.h" -#define NILFS_BUFFER_INHERENT_BITS \ - ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ - (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked)) +#define NILFS_BUFFER_INHERENT_BITS \ + (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \ + BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked)) -static struct buffer_head * -__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, - int blkbits, unsigned long b_state) +static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, + unsigned long block, pgoff_t index, int blkbits, + unsigned long b_state) { unsigned long first_block; - struct buffer_head *bh; + struct buffer_head *bh = folio_buffers(folio); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << blkbits, b_state); + if (!bh) + bh = create_empty_buffers(folio, 1 << blkbits, b_state); - first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); - bh = nilfs_page_get_nth_block(page, block - first_block); + first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); + bh = get_nth_bh(bh, block - first_block); - touch_buffer(bh); wait_on_buffer(bh); return bh; } @@ -64,45 +49,45 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, unsigned long b_state) { int blkbits = inode->i_blkbits; - pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); - struct page *page; + pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits); + struct folio *folio; struct buffer_head *bh; - page = grab_cache_page(mapping, index); - if (unlikely(!page)) + folio = filemap_grab_folio(mapping, index); + if (IS_ERR(folio)) return NULL; - bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); + bh = __nilfs_get_folio_block(folio, blkoff, index, blkbits, b_state); if (unlikely(!bh)) { - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); return NULL; } + bh->b_bdev = inode->i_sb->s_bdev; return bh; } /** * nilfs_forget_buffer - discard dirty state - * @inode: owner inode of the buffer * @bh: buffer head of the buffer to be discarded */ void nilfs_forget_buffer(struct buffer_head *bh) { - struct page *page = bh->b_page; + struct folio *folio = bh->b_folio; + const unsigned long clear_bits = + (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | + BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); lock_buffer(bh); - clear_buffer_nilfs_volatile(bh); - clear_buffer_nilfs_checked(bh); - clear_buffer_nilfs_redirected(bh); - clear_buffer_dirty(bh); - if (nilfs_page_buffers_clean(page)) - __nilfs_clear_page_dirty(page); - - clear_buffer_uptodate(bh); - clear_buffer_mapped(bh); + set_mask_bits(&bh->b_state, clear_bits, 0); + if (nilfs_folio_buffers_clean(folio)) + __nilfs_clear_folio_dirty(folio); + bh->b_blocknr = -1; - ClearPageUptodate(page); - ClearPageMappedToDisk(page); + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); unlock_buffer(bh); brelse(bh); } @@ -114,81 +99,81 @@ void nilfs_forget_buffer(struct buffer_head *bh) */ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) { - void *kaddr0, *kaddr1; + void *saddr, *daddr; unsigned long bits; - struct page *spage = sbh->b_page, *dpage = dbh->b_page; + struct folio *sfolio = sbh->b_folio, *dfolio = dbh->b_folio; struct buffer_head *bh; - kaddr0 = kmap_atomic(spage); - kaddr1 = kmap_atomic(dpage); - memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); - kunmap_atomic(kaddr1); - kunmap_atomic(kaddr0); + saddr = kmap_local_folio(sfolio, bh_offset(sbh)); + daddr = kmap_local_folio(dfolio, bh_offset(dbh)); + memcpy(daddr, saddr, sbh->b_size); + kunmap_local(daddr); + kunmap_local(saddr); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; dbh->b_bdev = sbh->b_bdev; bh = dbh; - bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); + bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped)); while ((bh = bh->b_this_page) != dbh) { lock_buffer(bh); bits &= bh->b_state; unlock_buffer(bh); } - if (bits & (1UL << BH_Uptodate)) - SetPageUptodate(dpage); + if (bits & BIT(BH_Uptodate)) + folio_mark_uptodate(dfolio); else - ClearPageUptodate(dpage); - if (bits & (1UL << BH_Mapped)) - SetPageMappedToDisk(dpage); + folio_clear_uptodate(dfolio); + if (bits & BIT(BH_Mapped)) + folio_set_mappedtodisk(dfolio); else - ClearPageMappedToDisk(dpage); + folio_clear_mappedtodisk(dfolio); } /** - * nilfs_page_buffers_clean - check if a page has dirty buffers or not. - * @page: page to be checked + * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not. + * @folio: Folio to be checked. * - * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. - * Otherwise, it returns non-zero value. + * Return: false if the folio has dirty buffers, true otherwise. */ -int nilfs_page_buffers_clean(struct page *page) +bool nilfs_folio_buffers_clean(struct folio *folio) { struct buffer_head *bh, *head; - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); do { if (buffer_dirty(bh)) - return 0; + return false; bh = bh->b_this_page; } while (bh != head); - return 1; + return true; } -void nilfs_page_bug(struct page *page) +void nilfs_folio_bug(struct folio *folio) { + struct buffer_head *bh, *head; struct address_space *m; unsigned long ino; - if (unlikely(!page)) { - printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); + if (unlikely(!folio)) { + printk(KERN_CRIT "NILFS_FOLIO_BUG(NULL)\n"); return; } - m = page->mapping; + m = folio->mapping; ino = m ? m->host->i_ino : 0; - printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " + printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx " "mapping=%p ino=%lu\n", - page, atomic_read(&page->_count), - (unsigned long long)page->index, page->flags, m, ino); + folio, folio_ref_count(folio), + (unsigned long long)folio->index, folio->flags.f, m, ino); - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { int i = 0; - bh = head = page_buffers(page); + bh = head; do { printk(KERN_CRIT " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", @@ -200,30 +185,32 @@ void nilfs_page_bug(struct page *page) } /** - * nilfs_copy_page -- copy the page with buffers - * @dst: destination page - * @src: source page - * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. + * nilfs_copy_folio -- copy the folio with buffers + * @dst: destination folio + * @src: source folio + * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads. * - * This function is for both data pages and btnode pages. The dirty flag - * should be treated by caller. The page must not be under i/o. - * Both src and dst page must be locked + * This function is for both data folios and btnode folios. The dirty flag + * should be treated by caller. The folio must not be under i/o. + * Both src and dst folio must be locked */ -static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) +static void nilfs_copy_folio(struct folio *dst, struct folio *src, + bool copy_dirty) { - struct buffer_head *dbh, *dbufs, *sbh, *sbufs; + struct buffer_head *dbh, *dbufs, *sbh; unsigned long mask = NILFS_BUFFER_INHERENT_BITS; - BUG_ON(PageWriteback(dst)); + BUG_ON(folio_test_writeback(dst)); - sbh = sbufs = page_buffers(src); - if (!page_has_buffers(dst)) - create_empty_buffers(dst, sbh->b_size, 0); + sbh = folio_buffers(src); + dbh = folio_buffers(dst); + if (!dbh) + dbh = create_empty_buffers(dst, sbh->b_size, 0); if (copy_dirty) - mask |= (1UL << BH_Dirty); + mask |= BIT(BH_Dirty); - dbh = dbufs = page_buffers(dst); + dbufs = dbh; do { lock_buffer(sbh); lock_buffer(dbh); @@ -234,16 +221,16 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) dbh = dbh->b_this_page; } while (dbh != dbufs); - copy_highpage(dst, src); + folio_copy(dst, src); - if (PageUptodate(src) && !PageUptodate(dst)) - SetPageUptodate(dst); - else if (!PageUptodate(src) && PageUptodate(dst)) - ClearPageUptodate(dst); - if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) - SetPageMappedToDisk(dst); - else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) - ClearPageMappedToDisk(dst); + if (folio_test_uptodate(src) && !folio_test_uptodate(dst)) + folio_mark_uptodate(dst); + else if (!folio_test_uptodate(src) && folio_test_uptodate(dst)) + folio_clear_uptodate(dst); + if (folio_test_mappedtodisk(src) && !folio_test_mappedtodisk(dst)) + folio_set_mappedtodisk(dst); + else if (!folio_test_mappedtodisk(src) && folio_test_mappedtodisk(dst)) + folio_clear_mappedtodisk(dst); do { unlock_buffer(sbh); @@ -256,43 +243,43 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) int nilfs_copy_dirty_pages(struct address_space *dmap, struct address_space *smap) { - struct pagevec pvec; + struct folio_batch fbatch; unsigned int i; pgoff_t index = 0; int err = 0; - pagevec_init(&pvec, 0); + folio_batch_init(&fbatch); repeat: - if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) + if (!filemap_get_folios_tag(smap, &index, (pgoff_t)-1, + PAGECACHE_TAG_DIRTY, &fbatch)) return 0; - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i], *dpage; + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i], *dfolio; - lock_page(page); - if (unlikely(!PageDirty(page))) - NILFS_PAGE_BUG(page, "inconsistent dirty state"); + folio_lock(folio); + if (unlikely(!folio_test_dirty(folio))) + NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); - dpage = grab_cache_page(dmap, page->index); - if (unlikely(!dpage)) { + dfolio = filemap_grab_folio(dmap, folio->index); + if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ - err = -ENOMEM; - unlock_page(page); + folio_unlock(folio); + err = PTR_ERR(dfolio); break; } - if (unlikely(!page_has_buffers(page))) - NILFS_PAGE_BUG(page, + if (unlikely(!folio_buffers(folio))) + NILFS_FOLIO_BUG(folio, "found empty page in dat page cache"); - nilfs_copy_page(dpage, page, 1); - __set_page_dirty_nobuffers(dpage); + nilfs_copy_folio(dfolio, folio, true); + filemap_dirty_folio(folio_mapping(dfolio), dfolio); - unlock_page(dpage); - page_cache_release(dpage); - unlock_page(page); + folio_unlock(dfolio); + folio_put(dfolio); + folio_unlock(folio); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); if (likely(!err)) @@ -305,66 +292,63 @@ repeat: * @dmap: destination page cache * @smap: source page cache * - * No pages must no be added to the cache during this process. + * No pages must be added to the cache during this process. * This must be ensured by the caller. */ void nilfs_copy_back_pages(struct address_space *dmap, struct address_space *smap) { - struct pagevec pvec; + struct folio_batch fbatch; unsigned int i, n; - pgoff_t index = 0; - int err; + pgoff_t start = 0; - pagevec_init(&pvec, 0); + folio_batch_init(&fbatch); repeat: - n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); + n = filemap_get_folios(smap, &start, ~0UL, &fbatch); if (!n) return; - index = pvec.pages[n - 1]->index + 1; - - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i], *dpage; - pgoff_t offset = page->index; - - lock_page(page); - dpage = find_lock_page(dmap, offset); - if (dpage) { - /* override existing page on the destination cache */ - WARN_ON(PageDirty(dpage)); - nilfs_copy_page(dpage, page, 0); - unlock_page(dpage); - page_cache_release(dpage); - } else { - struct page *page2; - /* move the page to the destination cache */ - spin_lock_irq(&smap->tree_lock); - page2 = radix_tree_delete(&smap->page_tree, offset); - WARN_ON(page2 != page); + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i], *dfolio; + pgoff_t index = folio->index; + + folio_lock(folio); + dfolio = filemap_lock_folio(dmap, index); + if (!IS_ERR(dfolio)) { + /* overwrite existing folio in the destination cache */ + WARN_ON(folio_test_dirty(dfolio)); + nilfs_copy_folio(dfolio, folio, false); + folio_unlock(dfolio); + folio_put(dfolio); + /* Do we not need to remove folio from smap here? */ + } else { + struct folio *f; + /* move the folio to the destination cache */ + xa_lock_irq(&smap->i_pages); + f = __xa_erase(&smap->i_pages, index); + WARN_ON(folio != f); smap->nrpages--; - spin_unlock_irq(&smap->tree_lock); - - spin_lock_irq(&dmap->tree_lock); - err = radix_tree_insert(&dmap->page_tree, offset, page); - if (unlikely(err < 0)) { - WARN_ON(err == -EEXIST); - page->mapping = NULL; - page_cache_release(page); /* for cache */ + xa_unlock_irq(&smap->i_pages); + + xa_lock_irq(&dmap->i_pages); + f = __xa_store(&dmap->i_pages, index, folio, GFP_NOFS); + if (unlikely(f)) { + /* Probably -ENOMEM */ + folio->mapping = NULL; + folio_put(folio); } else { - page->mapping = dmap; + folio->mapping = dmap; dmap->nrpages++; - if (PageDirty(page)) - radix_tree_tag_set(&dmap->page_tree, - offset, - PAGECACHE_TAG_DIRTY); + if (folio_test_dirty(folio)) + __xa_set_mark(&dmap->i_pages, index, + PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&dmap->tree_lock); + xa_unlock_irq(&dmap->i_pages); } - unlock_page(page); + folio_unlock(folio); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); goto repeat; @@ -373,83 +357,101 @@ repeat: /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) +void nilfs_clear_dirty_pages(struct address_space *mapping) { - struct pagevec pvec; + struct folio_batch fbatch; unsigned int i; pgoff_t index = 0; - pagevec_init(&pvec, 0); + folio_batch_init(&fbatch); + + while (filemap_get_folios_tag(mapping, &index, (pgoff_t)-1, + PAGECACHE_TAG_DIRTY, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + + folio_lock(folio); - while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i]; + /* + * This folio may have been removed from the address + * space by truncation or invalidation when the lock + * was acquired. Skip processing in that case. + */ + if (likely(folio->mapping == mapping)) + nilfs_clear_folio_dirty(folio); - lock_page(page); - nilfs_clear_dirty_page(page, silent); - unlock_page(page); + folio_unlock(folio); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } } /** - * nilfs_clear_dirty_page - discard dirty page - * @page: dirty page that will be discarded - * @silent: suppress [true] or print [false] warning messages + * nilfs_clear_folio_dirty - discard dirty folio + * @folio: dirty folio that will be discarded + * + * nilfs_clear_folio_dirty() clears working states including dirty state for + * the folio and its buffers. If the folio has buffers, clear only if it is + * confirmed that none of the buffer heads are busy (none have valid + * references and none are locked). */ -void nilfs_clear_dirty_page(struct page *page, bool silent) +void nilfs_clear_folio_dirty(struct folio *folio) { - struct inode *inode = page->mapping->host; - struct super_block *sb = inode->i_sb; + struct buffer_head *bh, *head; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); - if (!silent) { - nilfs_warning(sb, __func__, - "discard page: offset %lld, ino %lu", - page_offset(page), inode->i_ino); - } + head = folio_buffers(folio); + if (head) { + const unsigned long clear_bits = + (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | + BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); + bool busy, invalidated = false; - ClearPageUptodate(page); - ClearPageMappedToDisk(page); +recheck_buffers: + busy = false; + bh = head; + do { + if (atomic_read(&bh->b_count) | buffer_locked(bh)) { + busy = true; + break; + } + } while (bh = bh->b_this_page, bh != head); - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + if (busy) { + if (invalidated) + return; + invalidate_bh_lrus(); + invalidated = true; + goto recheck_buffers; + } - bh = head = page_buffers(page); + bh = head; do { lock_buffer(bh); - if (!silent) { - nilfs_warning(sb, __func__, - "discard block %llu, size %zu", - (u64)bh->b_blocknr, bh->b_size); - } - clear_buffer_dirty(bh); - clear_buffer_nilfs_volatile(bh); - clear_buffer_nilfs_checked(bh); - clear_buffer_nilfs_redirected(bh); - clear_buffer_uptodate(bh); - clear_buffer_mapped(bh); + set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); } - __nilfs_clear_page_dirty(page); + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); + folio_clear_checked(folio); + __nilfs_clear_folio_dirty(folio); } -unsigned nilfs_page_count_clean_buffers(struct page *page, - unsigned from, unsigned to) +unsigned int nilfs_page_count_clean_buffers(struct folio *folio, + unsigned int from, unsigned int to) { - unsigned block_start, block_end; + unsigned int block_start, block_end; struct buffer_head *bh, *head; - unsigned nc = 0; + unsigned int nc = 0; - for (bh = head = page_buffers(page), block_start = 0; + for (bh = head = folio_buffers(folio), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + bh->b_size; @@ -459,45 +461,33 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, return nc; } -void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, - struct backing_dev_info *bdi) -{ - mapping->host = inode; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->private_data = NULL; - mapping->backing_dev_info = bdi; - mapping->a_ops = &empty_aops; -} - /* * NILFS2 needs clear_page_dirty() in the following two cases: * - * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears - * page dirty flags when it copies back pages from the shadow cache - * (gcdat->{i_mapping,i_btnode_cache}) to its original cache - * (dat->{i_mapping,i_btnode_cache}). + * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty + * flag of pages when it copies back pages from shadow cache to the + * original cache. * * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. */ -int __nilfs_clear_page_dirty(struct page *page) +void __nilfs_clear_folio_dirty(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; if (mapping) { - spin_lock_irq(&mapping->tree_lock); - if (test_bit(PG_dirty, &page->flags)) { - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), + xa_lock_irq(&mapping->i_pages); + if (folio_test_dirty(folio)) { + __xa_clear_mark(&mapping->i_pages, folio->index, PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&mapping->tree_lock); - return clear_page_dirty_for_io(page); + xa_unlock_irq(&mapping->i_pages); + folio_clear_dirty_for_io(folio); + return; } - spin_unlock_irq(&mapping->tree_lock); - return 0; + xa_unlock_irq(&mapping->i_pages); + return; } - return TestClearPageDirty(page); + folio_clear_dirty(folio); } /** @@ -509,48 +499,44 @@ int __nilfs_clear_page_dirty(struct page *page) * This function searches an extent of buffers marked "delayed" which * starts from a block offset equal to or larger than @start_blk. If * such an extent was found, this will store the start offset in - * @blkoff and return its length in blocks. Otherwise, zero is - * returned. + * @blkoff and return its length in blocks. + * + * Return: Length in blocks of found extent, 0 otherwise. */ unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, sector_t *blkoff) { - unsigned int i; + unsigned int i, nr_folios; pgoff_t index; - unsigned int nblocks_in_page; unsigned long length = 0; - sector_t b; - struct pagevec pvec; - struct page *page; + struct folio_batch fbatch; + struct folio *folio; if (inode->i_mapping->nrpages == 0) return 0; - index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits); + index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); - pagevec_init(&pvec, 0); + folio_batch_init(&fbatch); repeat: - pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, - pvec.pages); - if (pvec.nr == 0) + nr_folios = filemap_get_folios_contig(inode->i_mapping, &index, ULONG_MAX, + &fbatch); + if (nr_folios == 0) return length; - if (length > 0 && pvec.pages[0]->index > index) - goto out; - - b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); i = 0; do { - page = pvec.pages[i]; + folio = fbatch.folios[i]; - lock_page(page); - if (page_has_buffers(page)) { + folio_lock(folio); + if (folio_buffers(folio)) { struct buffer_head *bh, *head; + sector_t b; - bh = head = page_buffers(page); + b = folio->index << (PAGE_SHIFT - inode->i_blkbits); + bh = head = folio_buffers(folio); do { if (b < start_blk) continue; @@ -565,21 +551,17 @@ repeat: } else { if (length > 0) goto out_locked; - - b += nblocks_in_page; } - unlock_page(page); + folio_unlock(folio); - } while (++i < pagevec_count(&pvec)); + } while (++i < nr_folios); - index = page->index + 1; - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); goto repeat; out_locked: - unlock_page(page); -out: - pagevec_release(&pvec); + folio_unlock(folio); + folio_batch_release(&fbatch); return length; } diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index ef30c5c2426f..136cd1c143c9 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * page.h - buffer/page management specific to NILFS + * Buffer/page management specific to NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net>, - * Seiji Kihara <kihara@osrg.net>. + * Written by Ryusuke Konishi and Seiji Kihara. */ #ifndef _NILFS_PAGE_H @@ -44,38 +30,26 @@ BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ -int __nilfs_clear_page_dirty(struct page *); +void __nilfs_clear_folio_dirty(struct folio *); struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, unsigned long, unsigned long); void nilfs_forget_buffer(struct buffer_head *); void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); -int nilfs_page_buffers_clean(struct page *); -void nilfs_page_bug(struct page *); +bool nilfs_folio_buffers_clean(struct folio *); +void nilfs_folio_bug(struct folio *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_dirty_page(struct page *, bool); -void nilfs_clear_dirty_pages(struct address_space *, bool); -void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, - struct backing_dev_info *bdi); -unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); +void nilfs_clear_folio_dirty(struct folio *folio); +void nilfs_clear_dirty_pages(struct address_space *mapping); +unsigned int nilfs_page_count_clean_buffers(struct folio *folio, + unsigned int from, unsigned int to); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, sector_t *blkoff); -#define NILFS_PAGE_BUG(page, m, a...) \ - do { nilfs_page_bug(page); BUG(); } while (0) - -static inline struct buffer_head * -nilfs_page_get_nth_block(struct page *page, unsigned int count) -{ - struct buffer_head *bh = page_buffers(page); - - while (count-- > 0) - bh = bh->b_this_page; - get_bh(bh); - return bh; -} +#define NILFS_FOLIO_BUG(folio, m, a...) \ + do { nilfs_folio_bug(folio); BUG(); } while (0) #endif /* _NILFS_PAGE_H */ diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ff00a0b7acb9..a9c61d0492cb 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * recovery.c - NILFS recovery logic + * NILFS recovery logic * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. */ #include <linux/buffer_head.h> @@ -47,8 +34,10 @@ enum { /* work structure for recovery */ struct nilfs_recovery_block { - ino_t ino; /* Inode number of the file that this block - belongs to */ + ino_t ino; /* + * Inode number of the file that this block + * belongs to + */ sector_t blocknr; /* block number */ __u64 vblocknr; /* virtual block number */ unsigned long blkoff; /* File offset of the data block (per block) */ @@ -56,38 +45,37 @@ struct nilfs_recovery_block { }; -static int nilfs_warn_segment_error(int err) +static int nilfs_warn_segment_error(struct super_block *sb, int err) { + const char *msg = NULL; + switch (err) { case NILFS_SEG_FAIL_IO: - printk(KERN_WARNING - "NILFS warning: I/O error on loading last segment\n"); + nilfs_err(sb, "I/O error reading segment"); return -EIO; case NILFS_SEG_FAIL_MAGIC: - printk(KERN_WARNING - "NILFS warning: Segment magic number invalid\n"); + msg = "Magic number mismatch"; break; case NILFS_SEG_FAIL_SEQ: - printk(KERN_WARNING - "NILFS warning: Sequence number mismatch\n"); + msg = "Sequence number mismatch"; break; case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT: - printk(KERN_WARNING - "NILFS warning: Checksum error in super root\n"); + msg = "Checksum error in super root"; break; case NILFS_SEG_FAIL_CHECKSUM_FULL: - printk(KERN_WARNING - "NILFS warning: Checksum error in segment payload\n"); + msg = "Checksum error in segment payload"; break; case NILFS_SEG_FAIL_CONSISTENCY: - printk(KERN_WARNING - "NILFS warning: Inconsistent segment\n"); + msg = "Inconsistency found"; break; case NILFS_SEG_NO_SUPER_ROOT: - printk(KERN_WARNING - "NILFS warning: No super root in the last segment\n"); + msg = "No super root in the last segment"; break; + default: + nilfs_err(sb, "unrecognized segment error %d", err); + return -EINVAL; } + nilfs_warn(sb, "invalid segment: %s", msg); return -EINVAL; } @@ -100,6 +88,8 @@ static int nilfs_warn_segment_error(int err) * @check_bytes: number of bytes to be checked * @start: DBN of start block * @nblock: number of blocks to be checked + * + * Return: 0 on success, or %-EIO if an I/O error occurs. */ static int nilfs_compute_checksum(struct the_nilfs *nilfs, struct buffer_head *bhs, u32 *sum, @@ -138,6 +128,11 @@ static int nilfs_compute_checksum(struct the_nilfs *nilfs, * @sr_block: disk block number of the super root block * @pbh: address of a buffer_head pointer to return super root buffer * @check: CRC check flag + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Super root block corrupted. + * * %-EIO - I/O error. */ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, struct buffer_head **pbh, int check) @@ -156,7 +151,7 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, sr = (struct nilfs_super_root *)bh_sr->b_data; if (check) { - unsigned bytes = le16_to_cpu(sr->sr_bytes); + unsigned int bytes = le16_to_cpu(sr->sr_bytes); if (bytes == 0 || bytes > nilfs->ns_blocksize) { ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; @@ -180,7 +175,7 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, brelse(bh_sr); failed: - return nilfs_warn_segment_error(ret); + return nilfs_warn_segment_error(nilfs->ns_sb, ret); } /** @@ -188,6 +183,8 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, * @nilfs: nilfs object * @start_blocknr: start block number of the log * @sum: pointer to return segment summary structure + * + * Return: Buffer head pointer, or NULL if an I/O error occurs. */ static struct buffer_head * nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, @@ -207,6 +204,13 @@ nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, * @seg_seq: sequence number of segment * @bh_sum: buffer head of summary block * @sum: segment summary struct + * + * Return: 0 on success, or one of the following internal codes on failure: + * * %NILFS_SEG_FAIL_MAGIC - Magic number mismatch. + * * %NILFS_SEG_FAIL_SEQ - Sequence number mismatch. + * * %NIFLS_SEG_FAIL_CONSISTENCY - Block count out of range. + * * %NILFS_SEG_FAIL_IO - I/O error. + * * %NILFS_SEG_FAIL_CHECKSUM_FULL - Full log checksum verification failed. */ static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq, struct buffer_head *bh_sum, @@ -250,6 +254,9 @@ out: * @pbh: the current buffer head on summary blocks [in, out] * @offset: the current byte offset on summary blocks [in, out] * @bytes: byte size of the item to be read + * + * Return: Kernel space address of current segment summary entry, or + * NULL if an I/O error occurs. */ static void *nilfs_read_summary_info(struct the_nilfs *nilfs, struct buffer_head **pbh, @@ -312,6 +319,11 @@ static void nilfs_skip_summary_info(struct the_nilfs *nilfs, * @start_blocknr: start block number of the log * @sum: log summary information * @head: list head to add nilfs_recovery_block struct + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, struct nilfs_segment_summary *sum, @@ -445,8 +457,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * The next segment is invalidated by this recovery. */ err = nilfs_sufile_free(sufile, segnum[1]); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated", + (unsigned long long)nilfs->ns_last_pseg, + (unsigned long long)nilfs->ns_segnum, + (unsigned long long)segnum[1]); + err = -EINVAL; + } goto failed; + } for (i = 1; i < 4; i++) { err = nilfs_segment_list_add(head, segnum[i]); @@ -484,18 +505,16 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - struct page *page) + loff_t pos, struct folio *folio) { struct buffer_head *bh_org; - void *kaddr; + size_t from = offset_in_folio(folio, pos); bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); if (unlikely(!bh_org)) return -EIO; - kaddr = kmap_atomic(page); - memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); - kunmap_atomic(kaddr); + memcpy_to_folio(folio, from, bh_org->b_data, bh_org->b_size); brelse(bh_org); return 0; } @@ -508,8 +527,8 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, { struct inode *inode; struct nilfs_recovery_block *rb, *n; - unsigned blocksize = nilfs->ns_blocksize; - struct page *page; + unsigned int blocksize = nilfs->ns_blocksize; + struct folio *folio; loff_t pos; int err = 0, err2 = 0; @@ -523,42 +542,41 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, pos = rb->blkoff << inode->i_blkbits; err = block_write_begin(inode->i_mapping, pos, blocksize, - 0, &page, nilfs_get_block); + &folio, nilfs_get_block); if (unlikely(err)) { loff_t isize = inode->i_size; + if (pos + blocksize > isize) nilfs_write_failed(inode->i_mapping, pos + blocksize); goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, folio); if (unlikely(err)) - goto failed_page; + goto failed_folio; err = nilfs_set_file_dirty(inode, 1); if (unlikely(err)) - goto failed_page; + goto failed_folio; - block_write_end(NULL, inode->i_mapping, pos, blocksize, - blocksize, page, NULL); + block_write_end(pos, blocksize, blocksize, folio); - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); (*nr_salvaged_blocks)++; goto next; - failed_page: - unlock_page(page); - page_cache_release(page); + failed_folio: + folio_unlock(folio); + folio_put(folio); failed_inode: - printk(KERN_WARNING - "NILFS warning: error recovering data block " - "(err=%d, ino=%lu, block-offset=%llu)\n", - err, (unsigned long)rb->ino, - (unsigned long long)rb->blkoff); + nilfs_warn(sb, + "error %d recovering data block (ino=%lu, block-offset=%llu)", + err, (unsigned long)rb->ino, + (unsigned long long)rb->blkoff); if (!err2) err2 = err; next: @@ -574,7 +592,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, * checkpoint * @nilfs: nilfs object * @sb: super block instance + * @root: NILFS root instance * @ri: pointer to a nilfs_recovery_info + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Log format error. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct super_block *sb, @@ -582,7 +607,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { struct buffer_head *bh_sum = NULL; - struct nilfs_segment_summary *sum; + struct nilfs_segment_summary *sum = NULL; sector_t pseg_start; sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ unsigned long nsalvaged_blocks = 0; @@ -638,7 +663,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, !(flags & NILFS_SS_SYNDT)) goto try_next_pseg; state = RF_DSYNC_ST; - /* Fall through */ + fallthrough; case RF_DSYNC_ST: if (!(flags & NILFS_SS_SYNDT)) goto confused; @@ -681,8 +706,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, } if (nsalvaged_blocks) { - printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", - sb->s_id, nsalvaged_blocks); + nilfs_info(sb, "salvaged %lu blocks", nsalvaged_blocks); ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; } out: @@ -693,10 +717,9 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, confused: err = -EINVAL; failed: - printk(KERN_ERR - "NILFS (device %s): Error roll-forwarding " - "(err=%d, pseg block=%llu). ", - sb->s_id, err, (unsigned long long)pseg_start); + nilfs_err(sb, + "error %d roll-forwarding partial segment at blocknr = %llu", + err, (unsigned long long)pseg_start); goto out; } @@ -711,35 +734,62 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, return; bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize); - BUG_ON(!bh); + if (WARN_ON(!bh)) + return; /* should never happen */ + + lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); set_buffer_dirty(bh); + unlock_buffer(bh); + err = sync_dirty_buffer(bh); if (unlikely(err)) - printk(KERN_WARNING - "NILFS warning: buffer sync write failed during " - "post-cleaning of recovery.\n"); + nilfs_warn(nilfs->ns_sb, + "buffer sync write failed during post-cleaning of recovery."); brelse(bh); } /** + * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery + * @nilfs: nilfs object + */ +static void nilfs_abort_roll_forward(struct the_nilfs *nilfs) +{ + struct nilfs_inode_info *ii, *n; + LIST_HEAD(head); + + /* Abandon inodes that have read recovery data */ + spin_lock(&nilfs->ns_inode_lock); + list_splice_init(&nilfs->ns_dirty_files, &head); + spin_unlock(&nilfs->ns_inode_lock); + if (list_empty(&head)) + return; + + set_nilfs_purging(nilfs); + list_for_each_entry_safe(ii, n, &head, i_dirty) { + spin_lock(&nilfs->ns_inode_lock); + list_del_init(&ii->i_dirty); + spin_unlock(&nilfs->ns_inode_lock); + + iput(&ii->vfs_inode); + } + clear_nilfs_purging(nilfs); +} + +/** * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint * @nilfs: nilfs object * @sb: super block instance * @ri: pointer to a nilfs_recovery_info struct to store search results. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EINVAL - Inconsistent filesystem state. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Inconsistent filesystem state. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. */ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb, @@ -753,8 +803,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root); if (unlikely(err)) { - printk(KERN_ERR - "NILFS: error loading the latest checkpoint.\n"); + nilfs_err(sb, "error %d loading the latest checkpoint", err); return err; } @@ -765,8 +814,8 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri); if (unlikely(err)) { - printk(KERN_ERR "NILFS: Error preparing segments for " - "recovery.\n"); + nilfs_err(sb, "error %d preparing segment for recovery", + err); goto failed; } @@ -779,17 +828,21 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, nilfs_detach_log_writer(sb); if (unlikely(err)) { - printk(KERN_ERR "NILFS: Oops! recovery failed. " - "(err=%d)\n", err); - goto failed; + nilfs_err(sb, "error %d writing segment for recovery", + err); + goto put_root; } nilfs_finish_roll_forward(nilfs, ri); } - failed: +put_root: nilfs_put_root(root); return err; + +failed: + nilfs_abort_roll_forward(nilfs); + goto put_root; } /** @@ -801,20 +854,17 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, * segment pointed by the superblock. It sets up struct the_nilfs through * this search. It fills nilfs_recovery_info (ri) required for recovery. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-EINVAL - No valid segment found - * - * %-EIO - I/O error - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - No valid segment found. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { struct buffer_head *bh_sum = NULL; - struct nilfs_segment_summary *sum; + struct nilfs_segment_summary *sum = NULL; sector_t pseg_start, pseg_end, sr_pseg_start = 0; sector_t seg_start, seg_end; /* range of full segment (block number) */ sector_t b, end; @@ -872,9 +922,11 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, flags = le16_to_cpu(sum->ss_flags); if (!(flags & NILFS_SS_SR) && !scan_newer) { - /* This will never happen because a superblock - (last_segment) always points to a pseg - having a super root. */ + /* + * This will never happen because a superblock + * (last_segment) always points to a pseg with + * a super root. + */ ret = NILFS_SEG_FAIL_CONSISTENCY; goto failed; } @@ -960,5 +1012,5 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, failed: brelse(bh_sum); nilfs_dispose_segment_list(&segments); - return (ret < 0) ? ret : nilfs_warn_segment_error(ret); + return ret < 0 ? ret : nilfs_warn_segment_error(nilfs->ns_sb, ret); } diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index dc9a913784ab..a8bdf3d318ea 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * segbuf.c - NILFS segment buffer + * NILFS segment buffer * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. * */ @@ -114,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf) if (unlikely(!bh)) return -ENOMEM; + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); nilfs_segbuf_add_segsum_buffer(segbuf, bh); return 0; } @@ -133,8 +126,8 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, return 0; } -int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, - time_t ctime, __u64 cno) +int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned int flags, + time64_t ctime, __u64 cno) { int err; @@ -212,7 +205,6 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, { struct buffer_head *bh; struct nilfs_segment_summary *raw_sum; - void *kaddr; u32 crc; bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, @@ -227,9 +219,13 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - kaddr = kmap_atomic(bh->b_page); - crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); - kunmap_atomic(kaddr); + size_t offset = offset_in_folio(bh->b_folio, bh->b_data); + unsigned char *from; + + /* Do not support block sizes larger than PAGE_SIZE */ + from = kmap_local_folio(bh->b_folio, offset); + crc = crc32_le(crc, from, bh->b_size); + kunmap_local(from); } raw_sum->ss_datasum = cpu_to_le32(crc); } @@ -240,7 +236,7 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf, { struct nilfs_super_root *raw_sr; struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info; - unsigned srsize; + unsigned int srsize; u32 crc; raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; @@ -338,18 +334,11 @@ void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed) /* * BIO operations */ -static void nilfs_end_bio_write(struct bio *bio, int err) +static void nilfs_end_bio_write(struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct nilfs_segment_buffer *segbuf = bio->bi_private; - if (err == -EOPNOTSUPP) { - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - bio_put(bio); - /* to be detected by submit_seg_bio() */ - } - - if (!uptodate) + if (bio->bi_status) atomic_inc(&segbuf->sb_err); bio_put(bio); @@ -357,69 +346,20 @@ static void nilfs_end_bio_write(struct bio *bio, int err) } static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi, int mode) + struct nilfs_write_info *wi) { struct bio *bio = wi->bio; - int err; - - if (segbuf->sb_nbio > 0 && - bdi_write_congested(segbuf->sb_super->s_bdi)) { - wait_for_completion(&segbuf->sb_bio_event); - segbuf->sb_nbio--; - if (unlikely(atomic_read(&segbuf->sb_err))) { - bio_put(bio); - err = -EIO; - goto failed; - } - } bio->bi_end_io = nilfs_end_bio_write; bio->bi_private = segbuf; - bio_get(bio); - submit_bio(mode, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - bio_put(bio); - err = -EOPNOTSUPP; - goto failed; - } + submit_bio(bio); segbuf->sb_nbio++; - bio_put(bio); wi->bio = NULL; wi->rest_blocks -= wi->end - wi->start; wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end; return 0; - - failed: - wi->bio = NULL; - return err; -} - -/** - * nilfs_alloc_seg_bio - allocate a new bio for writing log - * @nilfs: nilfs object - * @start: start block number of the bio - * @nr_vecs: request size of page vector. - * - * Return Value: On success, pointer to the struct bio is returned. - * On error, NULL is returned. - */ -static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start, - int nr_vecs) -{ - struct bio *bio; - - bio = bio_alloc(GFP_NOIO, nr_vecs); - if (bio == NULL) { - while (!bio && (nr_vecs >>= 1)) - bio = bio_alloc(GFP_NOIO, nr_vecs); - } - if (likely(bio)) { - bio->bi_bdev = nilfs->ns_bdev; - bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9); - } - return bio; } static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, @@ -427,7 +367,7 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, { wi->bio = NULL; wi->rest_blocks = segbuf->sb_sum.nblocks; - wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev); + wi->max_pages = BIO_MAX_VECS; wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; wi->blocknr = segbuf->sb_pseg_start; @@ -435,26 +375,26 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi, - struct buffer_head *bh, int mode) + struct buffer_head *bh) { - int len, err; + int err; BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { - wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end, - wi->nr_vecs); - if (unlikely(!wi->bio)) - return -ENOMEM; + wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs, + REQ_OP_WRITE, GFP_NOIO); + wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) << + (wi->nilfs->ns_blocksize_bits - 9); } - len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); - if (len == bh->b_size) { + if (bio_add_folio(wi->bio, bh->b_folio, bh->b_size, + offset_in_folio(bh->b_folio, bh->b_data))) { wi->end++; return 0; } /* bio is FULL */ - err = nilfs_segbuf_submit_bio(segbuf, wi, mode); + err = nilfs_segbuf_submit_bio(segbuf, wi); /* never submit current bh */ if (likely(!err)) goto repeat; @@ -466,31 +406,26 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, * @segbuf: buffer storing a log to be written * @nilfs: nilfs object * - * Return Value: On Success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error - * - * %-ENOMEM - Insufficient memory available. + * Return: Always 0. */ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, struct the_nilfs *nilfs) { struct nilfs_write_info wi; struct buffer_head *bh; - int res = 0, rw = WRITE; + int res = 0; wi.nilfs = nilfs; nilfs_segbuf_prepare_write(segbuf, &wi); list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh); if (unlikely(res)) goto failed_bio; } @@ -500,8 +435,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - rw |= REQ_SYNC; - res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); + wi.bio->bi_opf |= REQ_SYNC; + res = nilfs_segbuf_submit_bio(segbuf, &wi); } failed_bio: @@ -512,10 +447,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * nilfs_segbuf_wait - wait for completion of requested BIOs * @segbuf: segment buffer * - * Return Value: On Success, 0 is returned. On Error, one of the following - * negative error code is returned. - * - * %-EIO - I/O error + * Return: 0 on success, or %-EIO if I/O error is detected. */ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) { @@ -529,7 +461,11 @@ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) } while (--segbuf->sb_nbio > 0); if (unlikely(atomic_read(&segbuf->sb_err) > 0)) { - printk(KERN_ERR "NILFS: IO error writing segment\n"); + nilfs_err(segbuf->sb_super, + "I/O error writing log (start-blocknr=%llu, block-count=%lu) in segment %llu", + (unsigned long long)segbuf->sb_pseg_start, + segbuf->sb_sum.nblocks, + (unsigned long long)segbuf->sb_segnum); err = -EIO; } return err; diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index b04f08cc2397..e20091ededba 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * segbuf.h - NILFS Segment buffer prototypes and definitions + * NILFS Segment buffer prototypes and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. * */ #ifndef _NILFS_SEGBUF_H @@ -50,7 +37,7 @@ struct nilfs_segsum_info { unsigned long nfileblk; u64 seg_seq; __u64 cno; - time_t ctime; + time64_t ctime; sector_t next; }; @@ -82,7 +69,7 @@ struct nilfs_segment_buffer { __u64 sb_nextnum; sector_t sb_fseg_start, sb_fseg_end; sector_t sb_pseg_start; - unsigned sb_rest_blocks; + unsigned int sb_rest_blocks; /* Buffers */ struct list_head sb_segsum_buffers; @@ -124,7 +111,8 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, struct nilfs_segment_buffer *prev); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); -int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t, __u64); +int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time64_t, + __u64); int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, struct buffer_head **); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index bd88a7461063..deee16bc9d4e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1,29 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * segment.c - NILFS segment constructor. + * NILFS segment constructor. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. * */ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/bitops.h> #include <linux/bio.h> #include <linux/completion.h> #include <linux/blkdev.h> @@ -33,6 +21,8 @@ #include <linux/crc32.h> #include <linux/pagevec.h> #include <linux/slab.h> +#include <linux/sched/signal.h> + #include "nilfs.h" #include "btnode.h" #include "page.h" @@ -48,18 +38,26 @@ */ #define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ -#define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments - appended in collection retry loop */ +#define SC_MAX_SEGDELTA 64 /* + * Upper limit of the number of segments + * appended in collection retry loop + */ /* Construction mode */ enum { SC_LSEG_SR = 1, /* Make a logical segment having a super root */ - SC_LSEG_DSYNC, /* Flush data blocks of a given file and make - a logical segment without a super root */ - SC_FLUSH_FILE, /* Flush data files, leads to segment writes without - creating a checkpoint */ - SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without - a checkpoint */ + SC_LSEG_DSYNC, /* + * Flush data blocks of a given file and make + * a logical segment without a super root. + */ + SC_FLUSH_FILE, /* + * Flush data files, leads to segment writes without + * creating a checkpoint. + */ + SC_FLUSH_DAT, /* + * Flush DAT file. This also creates segments + * without a checkpoint. + */ }; /* Stage numbers of dirty block collection */ @@ -76,6 +74,36 @@ enum { NILFS_ST_DONE, }; +#define CREATE_TRACE_POINTS +#include <trace/events/nilfs2.h> + +/* + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are + * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of + * the variable must use them because transition of stage count must involve + * trace events (trace_nilfs2_collection_stage_transition). + * + * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't + * produce tracepoint events. It is provided just for making the intention + * clear. + */ +static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) +{ + sci->sc_stage.scnt++; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) +{ + sci->sc_stage.scnt = next_scnt; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) +{ + return sci->sc_stage.scnt; +} + /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ @@ -106,16 +134,12 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); -#define nilfs_cnt32_gt(a, b) \ - (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(b) - (__s32)(a) < 0)) #define nilfs_cnt32_ge(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(a) - (__s32)(b) >= 0)) -#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) -#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) + ((__s32)((a) - (b)) >= 0)) -static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) +static int nilfs_prepare_segment_lock(struct super_block *sb, + struct nilfs_transaction_info *ti) { struct nilfs_transaction_info *cur_ti = current->journal_info; void *save = NULL; @@ -123,17 +147,14 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) if (cur_ti) { if (cur_ti->ti_magic == NILFS_TI_MAGIC) return ++cur_ti->ti_count; - else { - /* - * If journal_info field is occupied by other FS, - * it is saved and will be restored on - * nilfs_transaction_commit(). - */ - printk(KERN_WARNING - "NILFS warning: journal info from a different " - "FS\n"); - save = current->journal_info; - } + + /* + * If journal_info field is occupied by other FS, + * it is saved and will be restored on + * nilfs_transaction_commit(). + */ + nilfs_warn(sb, "journal info from a different FS"); + save = current->journal_info; } if (!ti) { ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); @@ -170,24 +191,29 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) * When @vacancy_check flag is set, this function will check the amount of * free space, and will wait for the GC to reclaim disk space if low capacity. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. - * - * %-ENOSPC - No space left on device + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (if checking free space). */ int nilfs_transaction_begin(struct super_block *sb, struct nilfs_transaction_info *ti, int vacancy_check) { struct the_nilfs *nilfs; - int ret = nilfs_prepare_segment_lock(ti); + int ret = nilfs_prepare_segment_lock(sb, ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret < 0)) return ret; - if (ret > 0) + if (ret > 0) { + trace_ti = current->journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti->ti_count, trace_ti->ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -198,6 +224,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current->journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count, + trace_ti->ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -219,6 +250,8 @@ int nilfs_transaction_begin(struct super_block *sb, * nilfs_transaction_commit() sets a timer to start the segment * constructor. If a sync flag is set, it starts construction * directly. + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_transaction_commit(struct super_block *sb) { @@ -230,6 +263,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti->ti_flags |= NILFS_TI_COMMIT; if (ti->ti_count > 0) { ti->ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs->ns_writer) { @@ -241,6 +276,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(&nilfs->ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_SYNC) @@ -259,10 +297,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); if (ti->ti_count > 0) { ti->ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(&nilfs->ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); + current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); @@ -274,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb) struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci = nilfs->ns_writer; - if (!sci || !sci->sc_flush_request) + if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request) return; set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); @@ -305,10 +348,12 @@ static void nilfs_transaction_lock(struct super_block *sb, ti->ti_count = 0; ti->ti_save = cur_ti; ti->ti_magic = NILFS_TI_MAGIC; - INIT_LIST_HEAD(&ti->ti_garbage); current->journal_info = ti; for (;;) { + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK); + down_write(&nilfs->ns_segctor_sem); if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) break; @@ -316,10 +361,13 @@ static void nilfs_transaction_lock(struct super_block *sb, nilfs_segctor_do_immediate_flush(sci); up_write(&nilfs->ns_segctor_sem); - yield(); + cond_resched(); } if (gcflag) ti->ti_flags |= NILFS_TI_GC; + + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK); } static void nilfs_transaction_unlock(struct super_block *sb) @@ -332,16 +380,17 @@ static void nilfs_transaction_unlock(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; - if (!list_empty(&ti->ti_garbage)) - nilfs_dispose_list(nilfs, &ti->ti_garbage, 0); + + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, struct nilfs_segsum_pointer *ssp, - unsigned bytes) + unsigned int bytes) { struct nilfs_segment_buffer *segbuf = sci->sc_curseg; - unsigned blocksize = sci->sc_super->s_blocksize; + unsigned int blocksize = sci->sc_super->s_blocksize; void *p; if (unlikely(ssp->offset + bytes > blocksize)) { @@ -358,13 +407,15 @@ static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, /** * nilfs_segctor_reset_segment_buffer - reset the current segment buffer * @sci: nilfs_sc_info + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf = sci->sc_curseg; struct buffer_head *sumbh; - unsigned sumbytes; - unsigned flags = 0; + unsigned int sumbytes; + unsigned int flags = 0; int err; if (nilfs_doing_gc()) @@ -381,12 +432,32 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) return 0; } +/** + * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area + * @sci: segment constructor object + * + * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of + * the current segment summary block. + */ +static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci) +{ + struct nilfs_segsum_pointer *ssp; + + ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr; + if (ssp->offset < ssp->bh->b_size) + memset(ssp->bh->b_data + ssp->offset, 0, + ssp->bh->b_size - ssp->offset); +} + static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) { sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) - return -E2BIG; /* The current segment is filled up - (internal code) */ + return -E2BIG; /* + * The current segment is filled up + * (internal code) + */ + nilfs_segctor_zeropad_segsum(sci); sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); return nilfs_segctor_reset_segment_buffer(sci); } @@ -413,9 +484,9 @@ static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) */ static int nilfs_segctor_segsum_block_required( struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, - unsigned binfo_size) + unsigned int binfo_size) { - unsigned blocksize = sci->sc_super->s_blocksize; + unsigned int blocksize = sci->sc_super->s_blocksize; /* Size of finfo and binfo is enough small against blocksize */ return ssp->offset + binfo_size + @@ -450,7 +521,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, ii = NILFS_I(inode); - if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + if (ii->i_type & NILFS_I_TYPE_GC) cno = ii->i_cno; else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) cno = 0; @@ -474,7 +545,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode, - unsigned binfo_size) + unsigned int binfo_size) { struct nilfs_segment_buffer *segbuf; int required, err = 0; @@ -491,6 +562,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, goto retry; } if (unlikely(required)) { + nilfs_segctor_zeropad_segsum(sci); err = nilfs_segbuf_extend_segsum(segbuf); if (unlikely(err)) goto failed; @@ -558,7 +630,7 @@ static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, *vblocknr = binfo->bi_v.bi_vblocknr; } -static struct nilfs_sc_operations nilfs_sc_file_ops = { +static const struct nilfs_sc_operations nilfs_sc_file_ops = { .collect_data = nilfs_collect_file_data, .collect_node = nilfs_collect_file_node, .collect_bmap = nilfs_collect_file_bmap, @@ -607,7 +679,7 @@ static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, *binfo_dat = binfo->bi_dat; } -static struct nilfs_sc_operations nilfs_sc_dat_ops = { +static const struct nilfs_sc_operations nilfs_sc_dat_ops = { .collect_data = nilfs_collect_dat_data, .collect_node = nilfs_collect_file_node, .collect_bmap = nilfs_collect_dat_bmap, @@ -615,7 +687,7 @@ static struct nilfs_sc_operations nilfs_sc_dat_ops = { .write_node_binfo = nilfs_write_dat_node_binfo, }; -static struct nilfs_sc_operations nilfs_sc_dsync_ops = { +static const struct nilfs_sc_operations nilfs_sc_dsync_ops = { .collect_data = nilfs_collect_file_data, .collect_node = NULL, .collect_bmap = NULL, @@ -629,7 +701,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, loff_t start, loff_t end) { struct address_space *mapping = inode->i_mapping; - struct pagevec pvec; + struct folio_batch fbatch; pgoff_t index = 0, last = ULONG_MAX; size_t ndirties = 0; int i; @@ -643,41 +715,46 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, index = start >> PAGE_SHIFT; last = end >> PAGE_SHIFT; } - pagevec_init(&pvec, 0); + folio_batch_init(&fbatch); repeat: if (unlikely(index > last) || - !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - min_t(pgoff_t, last - index, - PAGEVEC_SIZE - 1) + 1)) + !filemap_get_folios_tag(mapping, &index, last, + PAGECACHE_TAG_DIRTY, &fbatch)) return ndirties; - for (i = 0; i < pagevec_count(&pvec); i++) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { struct buffer_head *bh, *head; - struct page *page = pvec.pages[i]; - - if (unlikely(page->index > last)) - break; + struct folio *folio = fbatch.folios[i]; - lock_page(page); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << inode->i_blkbits, 0); - unlock_page(page); + folio_lock(folio); + if (unlikely(folio->mapping != mapping)) { + /* Exclude folios removed from the address space */ + folio_unlock(folio); + continue; + } + head = folio_buffers(folio); + if (!head) + head = create_empty_buffers(folio, + i_blocksize(inode), 0); - bh = head = page_buffers(page); + bh = head; do { - if (!buffer_dirty(bh)) + if (!buffer_dirty(bh) || buffer_async_write(bh)) continue; get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); ndirties++; if (unlikely(ndirties >= nlimit)) { - pagevec_release(&pvec); + folio_unlock(folio); + folio_batch_release(&fbatch); cond_resched(); return ndirties; } } while (bh = bh->b_this_page, bh != head); + + folio_unlock(folio); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); goto repeat; } @@ -686,20 +763,23 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, struct list_head *listp) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct address_space *mapping = &ii->i_btnode_cache; - struct pagevec pvec; + struct inode *btnc_inode = ii->i_assoc_inode; + struct folio_batch fbatch; struct buffer_head *bh, *head; unsigned int i; pgoff_t index = 0; - pagevec_init(&pvec, 0); + if (!btnc_inode) + return; + folio_batch_init(&fbatch); - while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - bh = head = page_buffers(pvec.pages[i]); + while (filemap_get_folios_tag(btnc_inode->i_mapping, &index, + (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + bh = head = folio_buffers(fbatch.folios[i]); do { - if (buffer_dirty(bh)) { + if (buffer_dirty(bh) && + !buffer_async_write(bh)) { get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -707,7 +787,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, bh = bh->b_this_page; } while (bh != head); } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } } @@ -717,7 +797,7 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs, { struct nilfs_inode_info *ii, *n; struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; - unsigned nv = 0; + unsigned int nv = 0; while (!list_empty(head)) { spin_lock(&nilfs->ns_inode_lock); @@ -745,6 +825,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs, } } +static void nilfs_iput_work_func(struct work_struct *work) +{ + struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info, + sc_iput_work); + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; + + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0); +} + static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, struct nilfs_root *root) { @@ -795,68 +884,6 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) nilfs_mdt_clear_dirty(nilfs->ns_dat); } -static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) -{ - struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - int err; - - /* XXX: this interface will be changed */ - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, - &raw_cp, &bh_cp); - if (likely(!err)) { - /* The following code is duplicated with cpfile. But, it is - needed to collect the checkpoint even if it was not newly - created */ - mark_buffer_dirty(bh_cp); - nilfs_mdt_mark_dirty(nilfs->ns_cpfile); - nilfs_cpfile_put_checkpoint( - nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - } else - WARN_ON(err == -EINVAL || err == -ENOENT); - - return err; -} - -static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) -{ - struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - int err; - - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, - &raw_cp, &bh_cp); - if (unlikely(err)) { - WARN_ON(err == -EINVAL || err == -ENOENT); - goto failed_ibh; - } - raw_cp->cp_snapshot_list.ssl_next = 0; - raw_cp->cp_snapshot_list.ssl_prev = 0; - raw_cp->cp_inodes_count = - cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); - raw_cp->cp_blocks_count = - cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); - raw_cp->cp_nblk_inc = - cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); - raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); - raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); - - if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) - nilfs_checkpoint_clear_minor(raw_cp); - else - nilfs_checkpoint_set_minor(raw_cp); - - nilfs_write_inode_common(sci->sc_root->ifile, - &raw_cp->cp_ifile_inode, 1); - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - return 0; - - failed_ibh: - return err; -} - static void nilfs_fill_in_file_bmap(struct inode *ifile, struct nilfs_inode_info *ii) @@ -870,7 +897,7 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile, raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, ibh); nilfs_bmap_write(ii->i_bmap, raw_inode); - nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); + nilfs_ifile_unmap_inode(raw_inode); } } @@ -884,31 +911,64 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) } } +/** + * nilfs_write_root_mdt_inode - export root metadata inode information to + * the on-disk inode + * @inode: inode object of the root metadata file + * @raw_inode: on-disk inode + * + * nilfs_write_root_mdt_inode() writes inode information and bmap data of + * @inode to the inode area of the metadata file allocated on the super root + * block created to finalize the log. Since super root blocks are configured + * each time, this function zero-fills the unused area of @raw_inode. + */ +static void nilfs_write_root_mdt_inode(struct inode *inode, + struct nilfs_inode *raw_inode) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + nilfs_write_inode_common(inode, raw_inode); + + /* zero-fill unused portion of raw_inode */ + raw_inode->i_xattr = 0; + raw_inode->i_pad = 0; + memset((void *)raw_inode + sizeof(*raw_inode), 0, + nilfs->ns_inode_size - sizeof(*raw_inode)); + + nilfs_bmap_write(NILFS_I(inode)->i_bmap, raw_inode); +} + static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; - unsigned isz, srsz; + unsigned int isz, srsz; bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; + + lock_buffer(bh_sr); raw_sr = (struct nilfs_super_root *)bh_sr->b_data; isz = nilfs->ns_inode_size; srsz = NILFS_SR_BYTES(isz); + raw_sr->sr_sum = 0; /* Ensure initialization within this update */ raw_sr->sr_bytes = cpu_to_le16(srsz); raw_sr->sr_nongc_ctime = cpu_to_le64(nilfs_doing_gc() ? nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr + - NILFS_SR_DAT_OFFSET(isz), 1); - nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + - NILFS_SR_CPFILE_OFFSET(isz), 1); - nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + - NILFS_SR_SUFILE_OFFSET(isz), 1); + nilfs_write_root_mdt_inode(nilfs->ns_dat, (void *)raw_sr + + NILFS_SR_DAT_OFFSET(isz)); + nilfs_write_root_mdt_inode(nilfs->ns_cpfile, (void *)raw_sr + + NILFS_SR_CPFILE_OFFSET(isz)); + nilfs_write_root_mdt_inode(nilfs->ns_sufile, (void *)raw_sr + + NILFS_SR_SUFILE_OFFSET(isz)); + memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); + set_buffer_uptodate(bh_sr); + unlock_buffer(bh_sr); } static void nilfs_redirty_inodes(struct list_head *head) @@ -929,7 +989,7 @@ static void nilfs_drop_collected_inodes(struct list_head *head) if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) continue; - clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state); + clear_bit(NILFS_I_INODE_SYNC, &ii->i_state); set_bit(NILFS_I_UPDATED, &ii->i_state); } } @@ -974,7 +1034,7 @@ static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, struct inode *inode, - struct nilfs_sc_operations *sc_ops) + const struct nilfs_sc_operations *sc_ops) { LIST_HEAD(data_buffers); LIST_HEAD(node_buffers); @@ -1046,15 +1106,68 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, return err; } +/** + * nilfs_free_segments - free the segments given by an array of segment numbers + * @nilfs: nilfs object + * @segnumv: array of segment numbers to be freed + * @nsegs: number of segments to be freed in @segnumv + * + * nilfs_free_segments() wraps nilfs_sufile_freev() and + * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file + * (sufile) to free all segments given by @segnumv and @nsegs at once. If + * it fails midway, it cancels the changes so that none of the segments are + * freed. If @nsegs is 0, this function does nothing. + * + * The freeing of segments is not finalized until the writing of a log with + * a super root block containing this sufile change is complete, and it can + * be canceled with nilfs_sufile_cancel_freev() until then. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, + size_t nsegs) +{ + size_t ndone; + int ret; + + if (!nsegs) + return 0; + + ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); + if (unlikely(ret)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, + NULL); + /* + * If a segment usage of the segments to be freed is in a + * hole block, nilfs_sufile_freev() will return -ENOENT. + * In this case, -EINVAL should be returned to the caller + * since there is something wrong with the given segment + * number array. This error can only occur during GC, so + * there is no need to worry about it propagating to other + * callers (such as fsync). + */ + if (ret == -ENOENT) { + nilfs_err(nilfs->ns_sb, + "The segment usage entry %llu to be freed is invalid (in a hole)", + (unsigned long long)segnumv[ndone]); + ret = -EINVAL; + } + } + return ret; +} + static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; - size_t ndone; int err = 0; - switch (sci->sc_stage.scnt) { + switch (nilfs_sc_cstage_get(sci)) { case NILFS_ST_INIT: /* Pre-processes */ sci->sc_stage.flags = 0; @@ -1063,7 +1176,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_nblk_inc = 0; sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; if (mode == SC_LSEG_DSYNC) { - sci->sc_stage.scnt = NILFS_ST_DSYNC; + nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); goto dsync_mode; } } @@ -1071,10 +1184,11 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.dirty_file_ptr = NULL; sci->sc_stage.gc_inode_ptr = NULL; if (mode == SC_FLUSH_DAT) { - sci->sc_stage.scnt = NILFS_ST_DAT; + nilfs_sc_cstage_set(sci, NILFS_ST_DAT); goto dat_stage; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); + fallthrough; case NILFS_ST_GC: if (nilfs_doing_gc()) { head = &sci->sc_gc_inodes; @@ -1095,7 +1209,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } sci->sc_stage.gc_inode_ptr = NULL; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); + fallthrough; case NILFS_ST_FILE: head = &sci->sc_dirty_files; ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, @@ -1117,45 +1232,44 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } sci->sc_stage.dirty_file_ptr = NULL; if (mode == SC_FLUSH_FILE) { - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; } - sci->sc_stage.scnt++; + nilfs_sc_cstage_inc(sci); sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; - /* Fall through */ + fallthrough; case NILFS_ST_IFILE: err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; + nilfs_sc_cstage_inc(sci); /* Creating a checkpoint */ - err = nilfs_segctor_create_checkpoint(sci); + err = nilfs_cpfile_create_checkpoint(nilfs->ns_cpfile, + nilfs->ns_cno); if (unlikely(err)) break; - /* Fall through */ + fallthrough; case NILFS_ST_CPFILE: err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); + fallthrough; case NILFS_ST_SUFILE: - err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, - sci->sc_nfreesegs, &ndone); - if (unlikely(err)) { - nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, ndone, - NULL); + err = nilfs_free_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (unlikely(err)) break; - } sci->sc_stage.flags |= NILFS_CF_SUFREED; err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); + fallthrough; case NILFS_ST_DAT: dat_stage: err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, @@ -1163,10 +1277,11 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) break; if (mode == SC_FLUSH_DAT) { - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); + fallthrough; case NILFS_ST_SR: if (mode == SC_LSEG_SR) { /* Appending a super root */ @@ -1176,7 +1291,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } /* End of a logical segment */ sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; case NILFS_ST_DSYNC: dsync_mode: @@ -1189,7 +1304,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) break; sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; case NILFS_ST_DONE: return 0; @@ -1205,6 +1320,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) * nilfs_segctor_begin_construction - setup segment buffer to make a new log * @sci: nilfs_sc_info * @nilfs: nilfs object + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) @@ -1337,8 +1454,10 @@ static void nilfs_free_incomplete_logs(struct list_head *logs, if (atomic_read(&segbuf->sb_err)) { /* Case 1: The first segment failed */ if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) - /* Case 1a: Partial segment appended into an existing - segment */ + /* + * Case 1a: Partial segment appended into an existing + * segment + */ nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, segbuf->sb_fseg_end); else /* Case 1b: New full segment */ @@ -1434,25 +1553,29 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, goto failed; /* The current segment is filled up */ - if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) + if (mode != SC_LSEG_SR || + nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE) break; nilfs_clear_logs(&sci->sc_segbufs); - err = nilfs_segctor_extend_segments(sci, nilfs, nadd); - if (unlikely(err)) - return err; - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, NULL); WARN_ON(err); /* do not happen */ + sci->sc_stage.flags &= ~NILFS_CF_SUFREED; } + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } + nilfs_segctor_zeropad_segsum(sci); nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); return 0; @@ -1478,7 +1601,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, sector_t blocknr; unsigned long nfinfo = segbuf->sb_sum.nfinfo; unsigned long nblocks = 0, ndatablk = 0; - struct nilfs_sc_operations *sc_op = NULL; + const struct nilfs_sc_operations *sc_op = NULL; struct nilfs_segsum_pointer ssp; struct nilfs_finfo *finfo = NULL; union nilfs_binfo binfo; @@ -1503,7 +1626,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, nblocks = le32_to_cpu(finfo->fi_nblocks); ndatablk = le32_to_cpu(finfo->fi_ndatablk); - inode = bh->b_page->mapping->host; + inode = bh->b_folio->mapping->host; if (mode == SC_LSEG_DSYNC) sc_op = &nilfs_sc_dsync_ops; @@ -1556,65 +1679,95 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) return 0; } -static void nilfs_begin_page_io(struct page *page) +static void nilfs_begin_folio_io(struct folio *folio) { - if (!page || PageWriteback(page)) - /* For split b-tree node pages, this function may be called - twice. We ignore the 2nd or later calls by this check. */ + if (!folio || folio_test_writeback(folio)) + /* + * For split b-tree node pages, this function may be called + * twice. We ignore the 2nd or later calls by this check. + */ return; - lock_page(page); - clear_page_dirty_for_io(page); - set_page_writeback(page); - unlock_page(page); + folio_lock(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; + struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh->b_page != bd_page) { - if (bd_page) { - lock_page(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); - } - bd_page = bh->b_page; + if (bh == segbuf->sb_super_root) + break; + set_buffer_async_write(bh); + if (bh->b_folio != fs_folio) { + nilfs_begin_folio_io(fs_folio); + fs_folio = bh->b_folio; } } + } + nilfs_begin_folio_io(fs_folio); - list_for_each_entry(bh, &segbuf->sb_payload_buffers, + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - if (bh == segbuf->sb_super_root) { - if (bh->b_page != bd_page) { - lock_page(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); - bd_page = bh->b_page; - } - break; - } - if (bh->b_page != fs_page) { - nilfs_begin_page_io(fs_page); - fs_page = bh->b_page; + mark_buffer_dirty(bh); + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); } + bd_folio = bh->b_folio; } } - if (bd_page) { - lock_page(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + mark_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); } - nilfs_begin_page_io(fs_page); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -1627,17 +1780,18 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, return ret; } -static void nilfs_end_page_io(struct page *page, int err) +static void nilfs_end_folio_io(struct folio *folio, int err) { - if (!page) + if (!folio) return; - if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { + if (buffer_nilfs_node(folio_buffers(folio)) && + !folio_test_writeback(folio)) { /* * For b-tree node pages, this function may be called twice * or more because they might be split in a segment. */ - if (PageDirty(page)) { + if (folio_test_dirty(folio)) { /* * For pages holding split b-tree node buffers, dirty * flag on the buffers may be cleared discretely. @@ -1645,30 +1799,24 @@ static void nilfs_end_page_io(struct page *page, int err) * remaining buffers, and it must be cancelled if * all the buffers get cleaned later. */ - lock_page(page); - if (nilfs_page_buffers_clean(page)) - __nilfs_clear_page_dirty(page); - unlock_page(page); + folio_lock(folio); + if (nilfs_folio_buffers_clean(folio)) + __nilfs_clear_folio_dirty(folio); + folio_unlock(folio); } return; } - if (!err) { - if (!nilfs_page_buffers_clean(page)) - __set_page_dirty_nobuffers(page); - ClearPageError(page); - } else { - __set_page_dirty_nobuffers(page); - SetPageError(page); - } + if (err || !nilfs_folio_buffers_clean(folio)) + filemap_dirty_folio(folio->mapping, folio); - end_page_writeback(page); + folio_end_writeback(folio); } static void nilfs_abort_logs(struct list_head *logs, int err) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; + struct folio *bd_folio = NULL, *fs_folio = NULL; struct buffer_head *bh; if (list_empty(logs)) @@ -1677,32 +1825,35 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - if (bh->b_page != bd_page) { - if (bd_page) - end_page_writeback(bd_page); - bd_page = bh->b_page; + clear_buffer_uptodate(bh); + if (bh->b_folio != bd_folio) { + if (bd_folio) + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { if (bh == segbuf->sb_super_root) { - if (bh->b_page != bd_page) { - end_page_writeback(bd_page); - bd_page = bh->b_page; + clear_buffer_uptodate(bh); + if (bh->b_folio != bd_folio) { + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } break; } - if (bh->b_page != fs_page) { - nilfs_end_page_io(fs_page, err); - fs_page = bh->b_page; + clear_buffer_async_write(bh); + if (bh->b_folio != fs_folio) { + nilfs_end_folio_io(fs_folio, err); + fs_folio = bh->b_folio; } } } - if (bd_page) - end_page_writeback(bd_page); + if (bd_folio) + folio_end_writeback(bd_folio); - nilfs_end_page_io(fs_page, err); + nilfs_end_folio_io(fs_folio, err); } static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, @@ -1716,6 +1867,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, nilfs_abort_logs(&logs, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); + if (list_empty(&logs)) + return; /* if the first segment buffer preparation failed */ + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); nilfs_free_incomplete_logs(&logs, nilfs); @@ -1744,7 +1898,7 @@ static void nilfs_set_next_segment(struct the_nilfs *nilfs, static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; + struct folio *bd_folio = NULL, *fs_folio = NULL; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int update_sr = false; @@ -1755,41 +1909,45 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); - if (bh->b_page != bd_page) { - if (bd_page) - end_page_writeback(bd_page); - bd_page = bh->b_page; + if (bh->b_folio != bd_folio) { + if (bd_folio) + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } } /* - * We assume that the buffers which belong to the same page + * We assume that the buffers which belong to the same folio * continue over the buffer list. - * Under this assumption, the last BHs of pages is - * identifiable by the discontinuity of bh->b_page - * (page != fs_page). + * Under this assumption, the last BHs of folios is + * identifiable by the discontinuity of bh->b_folio + * (folio != fs_folio). * * For B-tree node blocks, however, this assumption is not - * guaranteed. The cleanup code of B-tree node pages needs + * guaranteed. The cleanup code of B-tree node folios needs * special care. */ list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - clear_buffer_delay(bh); - clear_buffer_nilfs_volatile(bh); - clear_buffer_nilfs_redirected(bh); + const unsigned long set_bits = BIT(BH_Uptodate); + const unsigned long clear_bits = + (BIT(BH_Dirty) | BIT(BH_Async_Write) | + BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | + BIT(BH_NILFS_Redirected)); + if (bh == segbuf->sb_super_root) { - if (bh->b_page != bd_page) { - end_page_writeback(bd_page); - bd_page = bh->b_page; + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } update_sr = true; break; } - if (bh->b_page != fs_page) { - nilfs_end_page_io(fs_page, 0); - fs_page = bh->b_page; + set_mask_bits(&bh->b_state, clear_bits, set_bits); + if (bh->b_folio != fs_folio) { + nilfs_end_folio_io(fs_folio, 0); + fs_folio = bh->b_folio; } } @@ -1803,13 +1961,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) } } /* - * Since pages may continue over multiple segment buffers, - * end of the last page must be checked outside of the loop. + * Since folios may continue over multiple segment buffers, + * end of the last folio must be checked outside of the loop. */ - if (bd_page) - end_page_writeback(bd_page); + if (bd_folio) + folio_end_writeback(bd_folio); - nilfs_end_page_io(fs_page, 0); + nilfs_end_folio_io(fs_folio, 0); nilfs_drop_collected_inodes(&sci->sc_dirty_files); @@ -1824,6 +1982,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_set_next_segment(nilfs, segbuf); if (update_sr) { + nilfs->ns_flushed_device = 0; nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, segbuf->sb_sum.seg_seq, nilfs->ns_cno++); @@ -1864,12 +2023,11 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, err = nilfs_ifile_get_inode_block( ifile, ii->vfs_inode.i_ino, &ibh); if (unlikely(err)) { - nilfs_warning(sci->sc_super, __func__, - "failed to get inode block.\n"); + nilfs_warn(sci->sc_super, + "log writer: error %d getting inode block (ino=%lu)", + err, ii->vfs_inode.i_ino); return err; } - mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(ifile); spin_lock(&nilfs->ns_inode_lock); if (likely(!ii->i_bh)) ii->i_bh = ibh; @@ -1878,6 +2036,10 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, goto retry; } + // Always redirty the buffer to avoid race condition + mark_buffer_dirty(ii->i_bh); + nilfs_mdt_mark_dirty(ifile); + clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); @@ -1890,8 +2052,9 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; + int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE); + int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { @@ -1902,9 +2065,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; - list_move_tail(&ii->i_dirty, &ti->ti_garbage); + list_del_init(&ii->i_dirty); + if (!ii->vfs_inode.i_nlink || during_mount) { + /* + * Defer calling iput() to avoid deadlocks if + * i_nlink == 0 or mount is not yet finished. + */ + list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); + defer_iput = true; + } else { + spin_unlock(&nilfs->ns_inode_lock); + iput(&ii->vfs_inode); + spin_lock(&nilfs->ns_inode_lock); + } } spin_unlock(&nilfs->ns_inode_lock); + + if (defer_iput) + schedule_work(&sci->sc_iput_work); } /* @@ -1915,7 +2093,10 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int err; - sci->sc_stage.scnt = NILFS_ST_INIT; + if (sb_rdonly(sci->sc_super)) + return -EROFS; + + nilfs_sc_cstage_set(sci, NILFS_ST_INIT); sci->sc_cno = nilfs->ns_cno; err = nilfs_segctor_collect_dirty_files(sci, nilfs); @@ -1933,17 +2114,17 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) err = nilfs_segctor_begin_construction(sci, nilfs); if (unlikely(err)) - goto out; + goto failed; /* Update time stamp */ - sci->sc_seg_ctime = get_seconds(); + sci->sc_seg_ctime = ktime_get_real_seconds(); err = nilfs_segctor_collect(sci, nilfs, mode); if (unlikely(err)) goto failed; /* Avoid empty segment */ - if (sci->sc_stage.scnt == NILFS_ST_DONE && + if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE && nilfs_segbuf_empty(sci->sc_curseg)) { nilfs_segctor_abort_construction(sci, nilfs, 1); goto out; @@ -1957,8 +2138,12 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_fill_in_file_bmap(sci); if (mode == SC_LSEG_SR && - sci->sc_stage.scnt >= NILFS_ST_CPFILE) { - err = nilfs_segctor_fill_in_checkpoint(sci); + nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) { + err = nilfs_cpfile_finalize_checkpoint( + nilfs->ns_cpfile, nilfs->ns_cno, sci->sc_root, + sci->sc_nblk_inc + sci->sc_nblk_this_inc, + sci->sc_seg_ctime, + !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)); if (unlikely(err)) goto failed_to_write; @@ -1967,17 +2152,14 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) goto failed_to_write; - if (sci->sc_stage.scnt == NILFS_ST_DONE || - nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { + if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE || + nilfs->ns_blocksize_bits != PAGE_SHIFT) { /* * At this point, we avoid double buffering * for blocksize < pagesize because page dirty @@ -1989,17 +2171,16 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (err) goto failed_to_write; } - } while (sci->sc_stage.scnt != NILFS_ST_DONE); + } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE); out: nilfs_segctor_drop_written_files(sci, nilfs); return err; failed_to_write: - if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_redirty_inodes(&sci->sc_dirty_files); - failed: + if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE) + nilfs_redirty_inodes(&sci->sc_dirty_files); if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); nilfs_segctor_abort_construction(sci, nilfs, err); @@ -2018,8 +2199,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) { spin_lock(&sci->sc_state_lock); if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { - sci->sc_timer.expires = jiffies + sci->sc_interval; - add_timer(&sci->sc_timer); + if (sci->sc_task) { + sci->sc_timer.expires = jiffies + sci->sc_interval; + add_timer(&sci->sc_timer); + } sci->sc_state |= NILFS_SEGCTOR_COMMIT; } spin_unlock(&sci->sc_state_lock); @@ -2028,34 +2211,18 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) { spin_lock(&sci->sc_state_lock); - if (!(sci->sc_flush_request & (1 << bn))) { + if (!(sci->sc_flush_request & BIT(bn))) { unsigned long prev_req = sci->sc_flush_request; - sci->sc_flush_request |= (1 << bn); + sci->sc_flush_request |= BIT(bn); if (!prev_req) wake_up(&sci->sc_wait_daemon); } spin_unlock(&sci->sc_state_lock); } -/** - * nilfs_flush_segment - trigger a segment construction for resource control - * @sb: super block - * @ino: inode number of the file to be flushed out. - */ -void nilfs_flush_segment(struct super_block *sb, ino_t ino) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - struct nilfs_sc_info *sci = nilfs->ns_writer; - - if (!sci || nilfs_doing_construction()) - return; - nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); - /* assign bit 0 to data files */ -} - struct nilfs_segctor_wait_request { - wait_queue_t wq; + wait_queue_entry_t wq; __u32 seq; int err; atomic_t done; @@ -2066,19 +2233,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) struct nilfs_segctor_wait_request wait_req; int err = 0; - spin_lock(&sci->sc_state_lock); init_wait(&wait_req.wq); wait_req.err = 0; atomic_set(&wait_req.done, 0); + init_waitqueue_entry(&wait_req.wq, current); + + /* + * To prevent a race issue where completion notifications from the + * log writer thread are missed, increment the request sequence count + * "sc_seq_request" and insert a wait queue entry using the current + * sequence number into the "sc_wait_request" queue at the same time + * within the lock section of "sc_state_lock". + */ + spin_lock(&sci->sc_state_lock); wait_req.seq = ++sci->sc_seq_request; + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); spin_unlock(&sci->sc_state_lock); - init_waitqueue_entry(&wait_req.wq, current); - add_wait_queue(&sci->sc_wait_request, &wait_req.wq); - set_current_state(TASK_INTERRUPTIBLE); wake_up(&sci->sc_wait_daemon); for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Synchronize only while the log writer thread is alive. + * Leave flushing out after the log writer thread exits to + * the cleanup work in nilfs_segctor_destroy(). + */ + if (!sci->sc_task) + break; + if (atomic_read(&wait_req.done)) { err = wait_req.err; break; @@ -2094,16 +2278,15 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) return err; } -static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force) { struct nilfs_segctor_wait_request *wrq, *n; unsigned long flags; spin_lock_irqsave(&sci->sc_wait_request.lock, flags); - list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, - wq.task_list) { + list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && - nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) { wrq->err = err; atomic_set(&wrq->done, 1); } @@ -2120,34 +2303,27 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) * nilfs_construct_segment - construct a logical segment * @sb: super block * - * Return Value: On success, 0 is retured. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ int nilfs_construct_segment(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_transaction_info *ti; - int err; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; /* A call inside transactions causes a deadlock. */ BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); - err = nilfs_segctor_sync(sci); - return err; + return nilfs_segctor_sync(sci); } /** @@ -2157,18 +2333,13 @@ int nilfs_construct_segment(struct super_block *sb) * @start: start byte offset * @end: end byte offset (inclusive) * - * Return Value: On success, 0 is retured. On errors, one of the following - * negative error code is returned. - * - * %-EROFS - Read only filesystem. - * - * %-EIO - I/O error - * - * %-ENOSPC - No space left on device (only in a panic state). - * - * %-ERESTARTSYS - Interrupted. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No space left on device (only in a panic state). + * * %-ERESTARTSYS - Interrupted. + * * %-EROFS - Read only filesystem. */ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, loff_t start, loff_t end) @@ -2179,13 +2350,13 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, struct nilfs_transaction_info ti; int err = 0; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; nilfs_transaction_lock(sb, &ti, 0); ii = NILFS_I(inode); - if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || + if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) || nilfs_test_opt(nilfs, STRICT_ORDER) || test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || nilfs_discontinued(nilfs)) { @@ -2207,13 +2378,15 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, sci->sc_dsync_end = end; err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); + if (!err) + nilfs->ns_flushed_device = 0; nilfs_transaction_unlock(sb); return err; } #define FLUSH_FILE_BIT (0x1) /* data file only */ -#define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ +#define FLUSH_DAT_BIT BIT(NILFS_DAT_INO) /* DAT only */ /** * nilfs_segctor_accept - record accepted sequence count of log-write requests @@ -2221,10 +2394,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, */ static void nilfs_segctor_accept(struct nilfs_sc_info *sci) { + bool thread_is_alive; + spin_lock(&sci->sc_state_lock); sci->sc_seq_accepted = sci->sc_seq_request; + thread_is_alive = (bool)sci->sc_task; spin_unlock(&sci->sc_state_lock); - del_timer_sync(&sci->sc_timer); + + /* + * This function does not race with the log writer thread's + * termination. Therefore, deleting sc_timer, which should not be + * done after the log writer thread exits, can be done safely outside + * the area protected by sc_state_lock. + */ + if (thread_is_alive) + timer_delete_sync(&sci->sc_timer); } /** @@ -2241,7 +2425,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) if (mode == SC_LSEG_SR) { sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; sci->sc_seq_done = sci->sc_seq_accepted; - nilfs_segctor_wakeup(sci, err); + nilfs_segctor_wakeup(sci, err, false); sci->sc_flush_request = 0; } else { if (mode == SC_FLUSH_FILE) @@ -2250,7 +2434,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) sci->sc_flush_request &= ~FLUSH_DAT_BIT; /* re-enable timer if checkpoint creation was not done */ - if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task && time_before(jiffies, sci->sc_timer.expires)) add_timer(&sci->sc_timer); } @@ -2261,6 +2445,8 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) * nilfs_segctor_construct - form logs and write them to disk * @sci: segment constructor object * @mode: mode of log forming + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) { @@ -2297,10 +2483,11 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) return err; } -static void nilfs_construction_timeout(unsigned long data) +static void nilfs_construction_timeout(struct timer_list *t) { - struct task_struct *p = (struct task_struct *)data; - wake_up_process(p); + struct nilfs_sc_info *sci = timer_container_of(sci, t, sc_timer); + + wake_up_process(sci->sc_task); } static void @@ -2313,7 +2500,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) continue; list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } @@ -2352,8 +2539,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, if (likely(!err)) break; - nilfs_warning(sb, __func__, - "segment construction failed. (err=%d)", err); + nilfs_warn(sb, "error %d cleaning segments", err); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(sci->sc_interval); } @@ -2361,9 +2547,9 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, sci->sc_nfreesegs); if (ret) { - printk(KERN_WARNING - "NILFS warning: error %d on discard request, " - "turning discards off for the device\n", ret); + nilfs_warn(sb, + "error %d on discard request, turning discards off for the device", + ret); nilfs_clear_opt(nilfs, DISCARD); } } @@ -2397,7 +2583,6 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) { int mode = 0; - int err; spin_lock(&sci->sc_state_lock); mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? @@ -2405,7 +2590,7 @@ static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) spin_unlock(&sci->sc_state_lock); if (mode) { - err = nilfs_segctor_do_construct(sci, mode); + nilfs_segctor_do_construct(sci, mode); spin_lock(&sci->sc_state_lock); sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? @@ -2428,123 +2613,85 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) } /** - * nilfs_segctor_thread - main loop of the segment constructor thread. + * nilfs_log_write_required - determine whether log writing is required + * @sci: nilfs_sc_info struct + * @modep: location for storing log writing mode + * + * Return: true if log writing is required, false otherwise. If log writing + * is required, the mode is stored in the location pointed to by @modep. + */ +static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) +{ + bool timedout, ret = true; + + spin_lock(&sci->sc_state_lock); + timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer.expires)); + if (timedout || sci->sc_seq_request != sci->sc_seq_done) + *modep = SC_LSEG_SR; + else if (sci->sc_flush_request) + *modep = nilfs_segctor_flush_mode(sci); + else + ret = false; + + spin_unlock(&sci->sc_state_lock); + return ret; +} + +/** + * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. * - * nilfs_segctor_thread() initializes a timer and serves as a daemon - * to execute segment constructions. + * nilfs_segctor_thread() is the main loop function of the log writer kernel + * thread, which determines whether log writing is necessary, and if so, + * performs the log write in the background, or waits if not. It is also + * used to decide the background writeback of the superblock. + * + * Return: Always 0. */ static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; - sci->sc_timer.data = (unsigned long)current; - sci->sc_timer.function = nilfs_construction_timeout; + nilfs_info(sci->sc_super, + "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", + sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); - /* start sync. */ - sci->sc_task = current; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ - printk(KERN_INFO - "segctord starting. Construction interval = %lu seconds, " - "CP frequency < %lu seconds\n", - sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); + set_freezable(); - spin_lock(&sci->sc_state_lock); - loop: - for (;;) { + while (!kthread_should_stop()) { + DEFINE_WAIT(wait); + bool should_write; int mode; - if (sci->sc_state & NILFS_SEGCTOR_QUIT) - goto end_thread; - - if (timeout || sci->sc_seq_request != sci->sc_seq_done) - mode = SC_LSEG_SR; - else if (!sci->sc_flush_request) - break; - else - mode = nilfs_segctor_flush_mode(sci); - - spin_unlock(&sci->sc_state_lock); - nilfs_segctor_thread_construct(sci, mode); - spin_lock(&sci->sc_state_lock); - timeout = 0; - } - - - if (freezing(current)) { - spin_unlock(&sci->sc_state_lock); - try_to_freeze(); - spin_lock(&sci->sc_state_lock); - } else { - DEFINE_WAIT(wait); - int should_sleep = 1; + if (freezing(current)) { + try_to_freeze(); + continue; + } prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE); - - if (sci->sc_seq_request != sci->sc_seq_done) - should_sleep = 0; - else if (sci->sc_flush_request) - should_sleep = 0; - else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) - should_sleep = time_before(jiffies, - sci->sc_timer.expires); - - if (should_sleep) { - spin_unlock(&sci->sc_state_lock); + should_write = nilfs_log_write_required(sci, &mode); + if (!should_write) schedule(); - spin_lock(&sci->sc_state_lock); - } finish_wait(&sci->sc_wait_daemon, &wait); - timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer.expires)); if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); - } - goto loop; - end_thread: - spin_unlock(&sci->sc_state_lock); + if (should_write) + nilfs_segctor_thread_construct(sci, mode); + } /* end sync. */ + spin_lock(&sci->sc_state_lock); sci->sc_task = NULL; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ - return 0; -} - -static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) -{ - struct task_struct *t; - - t = kthread_run(nilfs_segctor_thread, sci, "segctord"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); - - printk(KERN_ERR "NILFS: error %d creating segctord thread\n", - err); - return err; - } - wait_event(sci->sc_wait_task, sci->sc_task != NULL); + timer_shutdown_sync(&sci->sc_timer); + spin_unlock(&sci->sc_state_lock); return 0; } -static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) - __acquires(&sci->sc_state_lock) - __releases(&sci->sc_state_lock) -{ - sci->sc_state |= NILFS_SEGCTOR_QUIT; - - while (sci->sc_task) { - wake_up(&sci->sc_wait_daemon); - spin_unlock(&sci->sc_state_lock); - wait_event(sci->sc_wait_task, sci->sc_task == NULL); - spin_lock(&sci->sc_state_lock); - } -} - /* * Setup & clean-up functions */ @@ -2565,13 +2712,13 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); - init_waitqueue_head(&sci->sc_wait_task); spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - init_timer(&sci->sc_timer); + INIT_LIST_HEAD(&sci->sc_iput_queue); + INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; @@ -2588,8 +2735,10 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) { int ret, retrycount = NILFS_SC_CLEANUP_RETRY; - /* The segctord thread was stopped and its timer was removed. - But some tasks remain. */ + /* + * The segctord thread was stopped and its timer was removed. + * But some tasks remain. + */ do { struct nilfs_transaction_info ti; @@ -2597,7 +2746,9 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) ret = nilfs_segctor_construct(sci, SC_LSEG_SR); nilfs_transaction_unlock(sci->sc_super); - } while (ret && retrycount-- > 0); + flush_work(&sci->sc_iput_work); + + } while (ret && ret != -EROFS && retrycount-- > 0); } /** @@ -2615,21 +2766,46 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) up_write(&nilfs->ns_segctor_sem); + if (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + if (kthread_stop(sci->sc_task)) { + spin_lock(&sci->sc_state_lock); + sci->sc_task = NULL; + timer_shutdown_sync(&sci->sc_timer); + spin_unlock(&sci->sc_state_lock); + } + } + spin_lock(&sci->sc_state_lock); - nilfs_segctor_kill_thread(sci); flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + /* + * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can + * be called from delayed iput() via nilfs_evict_inode() and can race + * with the above log writer thread termination. + */ + nilfs_segctor_wakeup(sci, 0, true); + + if (flush_work(&sci->sc_iput_work)) + flag = true; + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); if (!list_empty(&sci->sc_dirty_files)) { - nilfs_warning(sci->sc_super, __func__, - "dirty file(s) after the final construction\n"); + nilfs_warn(sci->sc_super, + "disposed unprocessed dirty file(s) when stopping log writer"); nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } + if (!list_empty(&sci->sc_iput_queue)) { + nilfs_warn(sci->sc_super, + "disposed unprocessed inode(s) in iput queue when stopping log writer"); + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); + } + WARN_ON(!list_empty(&sci->sc_segbufs)); WARN_ON(!list_empty(&sci->sc_write_logs)); @@ -2637,7 +2813,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) down_write(&nilfs->ns_segctor_sem); - del_timer_sync(&sci->sc_timer); kfree(sci); } @@ -2649,35 +2824,45 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * This allocates a log writer object, initializes it, and starts the * log writer. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINTR - Log writer thread creation failed due to interruption. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci; + struct task_struct *t; int err; if (nilfs->ns_writer) { /* - * This happens if the filesystem was remounted - * read/write after nilfs_error degenerated it into a - * read-only mount. + * This happens if the filesystem is made read-only by + * __nilfs_error or nilfs_remount and then remounted + * read/write. In these cases, reuse the existing + * writer. */ - nilfs_detach_log_writer(sb); + return 0; } - nilfs->ns_writer = nilfs_segctor_new(sb, root); - if (!nilfs->ns_writer) + sci = nilfs_segctor_new(sb, root); + if (unlikely(!sci)) return -ENOMEM; - err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (err) { - kfree(nilfs->ns_writer); - nilfs->ns_writer = NULL; + nilfs->ns_writer = sci; + t = kthread_create(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + err = PTR_ERR(t); + nilfs_err(sb, "error %d creating segctord thread", err); + nilfs_detach_log_writer(sb); + return err; } - return err; + sci->sc_task = t; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); + + wake_up_process(sci->sc_task); + return 0; } /** @@ -2697,16 +2882,18 @@ void nilfs_detach_log_writer(struct super_block *sb) nilfs_segctor_destroy(nilfs->ns_writer); nilfs->ns_writer = NULL; } + set_nilfs_purging(nilfs); /* Force to free the list of dirty files */ spin_lock(&nilfs->ns_inode_lock); if (!list_empty(&nilfs->ns_dirty_files)) { list_splice_init(&nilfs->ns_dirty_files, &garbage_list); - nilfs_warning(sb, __func__, - "Hit dirty file after stopped log writer\n"); + nilfs_warn(sb, + "disposed unprocessed dirty file(s) when detaching log writer"); } spin_unlock(&nilfs->ns_inode_lock); up_write(&nilfs->ns_segctor_sem); nilfs_dispose_list(nilfs, &garbage_list, 1); + clear_nilfs_purging(nilfs); } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 38a1d0013314..4b39ed43ae72 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * segment.h - NILFS Segment constructor prototypes and definitions + * NILFS Segment constructor prototypes and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. * */ #ifndef _NILFS_SEGMENT_H @@ -26,7 +13,7 @@ #include <linux/types.h> #include <linux/fs.h> #include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> +#include <linux/workqueue.h> #include "nilfs.h" struct nilfs_root; @@ -35,10 +22,10 @@ struct nilfs_root; * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root - * @ri_ri_cno: Number of the last checkpoint + * @ri_cno: Number of the last checkpoint * @ri_lsegs_start: Region for roll-forwarding (start block number) * @ri_lsegs_end: Region for roll-forwarding (end block number) - * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start * @ri_used_segments: List of segments to be mark active * @ri_pseg_start: Block number of the last partial segment * @ri_seq: Sequence number on the last partial segment @@ -66,14 +53,15 @@ struct nilfs_recovery_info { /** * struct nilfs_cstage - Context of collection stage - * @scnt: Stage count + * @scnt: Stage count, must be accessed via wrappers: + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() * @flags: State flags * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file * @gc_inode_ptr: Pointer on the list of gc-inodes */ struct nilfs_cstage { int scnt; - unsigned flags; + unsigned int flags; struct nilfs_inode_info *dirty_file_ptr; struct nilfs_inode_info *gc_inode_ptr; }; @@ -82,7 +70,7 @@ struct nilfs_segment_buffer; struct nilfs_segsum_pointer { struct buffer_head *bh; - unsigned offset; /* offset in bytes */ + unsigned int offset; /* offset in bytes */ }; /** @@ -92,6 +80,8 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written + * @sc_iput_queue: list of inodes for which iput should be done + * @sc_iput_work: work struct to defer iput call * @sc_freesegs: array of segment numbers to be freed * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -115,9 +105,8 @@ struct nilfs_segsum_pointer { * @sc_flush_request: inode bitmap of metadata files to be flushed * @sc_wait_request: Client request queue * @sc_wait_daemon: Daemon wait queue - * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter - * @sc_seq_accept: Accepted request count + * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -135,6 +124,8 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; + struct list_head sc_iput_queue; + struct work_struct sc_iput_work; __u64 *sc_freesegs; size_t sc_nfreesegs; @@ -156,7 +147,7 @@ struct nilfs_sc_info { unsigned long sc_blk_cnt; unsigned long sc_datablk_cnt; unsigned long sc_nblk_this_inc; - time_t sc_seg_ctime; + time64_t sc_seg_ctime; __u64 sc_cno; unsigned long sc_flags; @@ -166,7 +157,6 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_request; wait_queue_head_t sc_wait_daemon; - wait_queue_head_t sc_wait_task; __u32 sc_seq_request; __u32 sc_seq_accepted; @@ -187,31 +177,40 @@ enum { NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */ NILFS_SC_UNCLOSED, /* Logical segment is not closed */ NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */ - NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a - checkpoint */ - NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files - other than DAT, cpfile, sufile, or files - moved by GC */ + NILFS_SC_PRIOR_FLUSH, /* + * Requesting immediate flush without making a + * checkpoint + */ + NILFS_SC_HAVE_DELTA, /* + * Next checkpoint will have update of files + * other than DAT, cpfile, sufile, or files + * moved by GC. + */ }; /* sc_state */ -#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ /* * Constant parameters */ -#define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when - destroying segctord */ +#define NILFS_SC_CLEANUP_RETRY 3 /* + * Retry count of construction when + * destroying segctord + */ /* * Default values of timeout, in seconds. */ -#define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks. - It triggers construction of a - logical segment with a super root */ -#define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root - creation */ +#define NILFS_SC_DEFAULT_TIMEOUT 5 /* + * Timeout value of dirty blocks. + * It triggers construction of a + * logical segment with a super root. + */ +#define NILFS_SC_DEFAULT_SR_FREQ 30 /* + * Maximum frequency of super root + * creation + */ /* * The default threshold amount of data, in block counts. @@ -227,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); -extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 3127e9f438a7..83f93337c01b 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -1,24 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * sufile.c - NILFS segment usage file. + * NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. - * Revised by Ryusuke Konishi <ryusuke@osrg.net>. + * Written by Koji Sato. + * Revised by Ryusuke Konishi. */ #include <linux/kernel.h> @@ -26,10 +13,11 @@ #include <linux/string.h> #include <linux/buffer_head.h> #include <linux/errno.h> -#include <linux/nilfs2_fs.h> #include "mdt.h" #include "sufile.h" +#include <trace/events/nilfs2.h> + /** * struct nilfs_sufile_info - on-memory private data of sufile * @mi: on-memory private data of metadata file @@ -59,7 +47,8 @@ static unsigned long nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; - do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); + + t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile)); return (unsigned long)t; } @@ -67,6 +56,7 @@ static unsigned long nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; + return do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); } @@ -80,19 +70,35 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, max - curr + 1); } -static struct nilfs_segment_usage * -nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, - struct buffer_head *bh, void *kaddr) +/** + * nilfs_sufile_segment_usage_offset - calculate the byte offset of a segment + * usage entry in the folio containing it + * @sufile: segment usage file inode + * @segnum: number of segment usage + * @bh: buffer head of block containing segment usage indexed by @segnum + * + * Return: Byte offset in the folio of the segment usage entry. + */ +static size_t nilfs_sufile_segment_usage_offset(const struct inode *sufile, + __u64 segnum, + struct buffer_head *bh) { - return kaddr + bh_offset(bh) + + return offset_in_folio(bh->b_folio, bh->b_data) + nilfs_sufile_get_offset(sufile, segnum) * NILFS_MDT(sufile)->mi_entry_size; } -static inline int nilfs_sufile_get_header_block(struct inode *sufile, - struct buffer_head **bhp) +static int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(sufile->i_sb, + "missing header block in segment usage metadata"); + err = -EIO; + } + return err; } static inline int @@ -115,13 +121,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { struct nilfs_sufile_header *header; - void *kaddr; - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); } @@ -129,6 +133,8 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, /** * nilfs_sufile_get_ncleansegs - return the number of clean segments * @sufile: inode of segment usage file + * + * Return: Number of clean segments. */ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) { @@ -151,17 +157,13 @@ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) * of successfully modified segments from the head is stored in the * place @ndone points to. * - * Return Value: On success, zero is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - Given segment usage is in hole block (may be returned if - * @create is zero) - * - * %-EINVAL - Invalid segment usage number + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment usage number + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Given segment usage is in hole block (may be returned if + * @create is zero) + * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, int create, size_t *ndone, @@ -181,9 +183,9 @@ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, down_write(&NILFS_MDT(sufile)->mi_sem); for (seg = segnumv; seg < segnumv + nsegs; seg++) { if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { - printk(KERN_WARNING - "%s: invalid segment number: %llu\n", __func__, - (unsigned long long)*seg); + nilfs_warn(sufile->i_sb, + "%s: invalid segment number: %llu", + __func__, (unsigned long long)*seg); nerr++; } } @@ -240,8 +242,8 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, int ret; if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { - printk(KERN_WARNING "%s: invalid segment number: %llu\n", - __func__, (unsigned long long)segnum); + nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu", + __func__, (unsigned long long)segnum); return -EINVAL; } down_write(&NILFS_MDT(sufile)->mi_sem); @@ -268,10 +270,7 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, * @start: minimum segment number of allocatable region (inclusive) * @end: maximum segment number of allocatable region (inclusive) * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-ERANGE - invalid segment region + * Return: 0 on success, or %-ERANGE if segment range is invalid. */ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) { @@ -296,17 +295,14 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) * @sufile: inode of segment usage file * @segnump: pointer to segment number * - * Description: nilfs_sufile_alloc() allocates a clean segment. - * - * Return Value: On success, 0 is returned and the segment number of the - * allocated segment is stored in the place pointed by @segnump. On error, one - * of the following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Description: nilfs_sufile_alloc() allocates a clean segment, and stores + * its segment number in the place pointed to by @segnump. * - * %-ENOSPC - No clean segment left. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - No clean segment left. */ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) { @@ -316,8 +312,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; + size_t offset; void *kaddr; - unsigned long nsegments, ncleansegs, nsus, cnt; + unsigned long nsegments, nsus, cnt; int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); @@ -325,11 +322,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); - ncleansegs = le64_to_cpu(header->sh_ncleansegs); + header = kmap_local_folio(header_bh->b_folio, 0); last_alloc = le64_to_cpu(header->sh_last_alloc); - kunmap_atomic(kaddr); + kunmap_local(header); nsegments = nilfs_sufile_get_nsegments(sufile); maxsegnum = sui->allocmax; @@ -358,13 +353,15 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) break; /* never happens */ } } + trace_nilfs2_segment_usage_check(sufile, segnum, cnt); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); nsus = nilfs_sufile_segment_usages_in_block( sufile, segnum, maxsegnum); @@ -373,14 +370,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); - kunmap_atomic(kaddr); + kunmap_local(header); sui->ncleansegs--; mark_buffer_dirty(header_bh); @@ -388,10 +384,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; + + trace_nilfs2_segment_usage_allocated(sufile, segnum); + goto out_header; } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(su_bh); } @@ -411,18 +410,18 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (unlikely(!nilfs_segment_usage_clean(su))) { - printk(KERN_WARNING "%s: segment %llu must be clean\n", - __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr); + nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", + __func__, (unsigned long long)segnum); + kunmap_local(su); return; } nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); + kunmap_local(su); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; @@ -436,14 +435,14 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int clean, dirty; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); - if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); + if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) && su->su_nblocks == cpu_to_le32(0)) { - kunmap_atomic(kaddr); + kunmap_local(su); return; } clean = nilfs_segment_usage_clean(su); @@ -452,8 +451,8 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, /* make the segment garbage */ su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); - su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); - kunmap_atomic(kaddr); + su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)); + kunmap_local(su); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; @@ -467,47 +466,94 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int sudirty; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_clean(su)) { - printk(KERN_WARNING "%s: segment %llu is already clean\n", - __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr); + nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", + __func__, (unsigned long long)segnum); + kunmap_local(su); return; } - WARN_ON(nilfs_segment_usage_error(su)); - WARN_ON(!nilfs_segment_usage_dirty(su)); + if (unlikely(nilfs_segment_usage_error(su))) + nilfs_warn(sufile->i_sb, "free segment %llu marked in error", + (unsigned long long)segnum); sudirty = nilfs_segment_usage_dirty(su); + if (unlikely(!sudirty)) + nilfs_warn(sufile->i_sb, "free unallocated segment %llu", + (unsigned long long)segnum); + nilfs_segment_usage_set_clean(su); - kunmap_atomic(kaddr); + kunmap_local(su); mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); NILFS_SUI(sufile)->ncleansegs++; nilfs_mdt_mark_dirty(sufile); + + trace_nilfs2_segment_usage_freed(sufile, segnum); } /** * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; + size_t offset; + struct nilfs_segment_usage *su; int ret; + down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (!ret) { + if (unlikely(ret)) { + if (ret == -ENOENT) { + nilfs_error(sufile->i_sb, + "segment usage for segment %llu is unreadable due to a hole block", + (unsigned long long)segnum); + ret = -EIO; + } + goto out_sem; + } + + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); + if (unlikely(nilfs_segment_usage_error(su))) { + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + + kunmap_local(su); + brelse(bh); + if (nilfs_segment_is_active(nilfs, segnum)) { + nilfs_error(sufile->i_sb, + "active segment %llu is erroneous", + (unsigned long long)segnum); + } else { + /* + * Segments marked erroneous are never allocated by + * nilfs_sufile_alloc(); only active segments, ie, + * the segments indexed by ns_segnum or ns_nextnum, + * can be erroneous here. + */ + WARN_ON_ONCE(1); + } + ret = -EIO; + } else { + nilfs_segment_usage_set_dirty(su); + kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); } +out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } @@ -517,13 +563,15 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) * @segnum: segment number * @nblocks: number of live blocks in the segment * @modtime: modification time (option) + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, - unsigned long nblocks, time_t modtime) + unsigned long nblocks, time64_t modtime) { struct buffer_head *bh; struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); @@ -531,13 +579,18 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - WARN_ON(nilfs_segment_usage_error(su)); - if (modtime) + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); + if (modtime) { + /* + * Check segusage error and set su_lastmod only when updating + * this entry with a valid timestamp, not for cancellation. + */ + WARN_ON_ONCE(nilfs_segment_usage_error(su)); su->su_lastmod = cpu_to_le64(modtime); + } su->su_nblocks = cpu_to_le32(nblocks); - kunmap_atomic(kaddr); + kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); @@ -551,25 +604,21 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, /** * nilfs_sufile_get_stat - get segment usage statistics * @sufile: inode of segment usage file - * @stat: pointer to a structure of segment usage statistics - * - * Description: nilfs_sufile_get_stat() returns information about segment - * usage. - * - * Return Value: On success, 0 is returned, and segment usage information is - * stored in the place pointed by @stat. On error, one of the following - * negative error codes is returned. + * @sustat: pointer to a structure of segment usage statistics * - * %-EIO - I/O error. + * Description: nilfs_sufile_get_stat() retrieves segment usage statistics + * and stores them in the location pointed to by @sustat. * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; - void *kaddr; int ret; down_read(&NILFS_MDT(sufile)->mi_sem); @@ -578,8 +627,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); @@ -588,7 +636,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(header_bh); out_sem: @@ -601,18 +649,18 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; - void *kaddr; + size_t offset; int suclean; - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_error(su)) { - kunmap_atomic(kaddr); + kunmap_local(su); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); - kunmap_atomic(kaddr); + kunmap_local(su); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); @@ -623,22 +671,18 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, } /** - * nilfs_sufile_truncate_range - truncate range of segment array - * @sufile: inode of segment usage file - * @start: start segment number (inclusive) - * @end: end segment number (inclusive) - * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid number of segments specified - * - * %-EBUSY - Dirty or active segments are present in the range - */ + * nilfs_sufile_truncate_range - truncate range of segment array + * @sufile: inode of segment usage file + * @start: start segment number (inclusive) + * @end: end segment number (inclusive) + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Dirty or active segments are present in the range. + * * %-EINVAL - Invalid number of segments specified. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ static int nilfs_sufile_truncate_range(struct inode *sufile, __u64 start, __u64 end) { @@ -650,7 +694,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, unsigned long segusages_per_block; unsigned long nsegs, ncleaned; __u64 segnum; - void *kaddr; + size_t offset; ssize_t n, nc; int ret; int j; @@ -681,16 +725,16 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, /* hole */ continue; } - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kmap_local_folio(su_bh->b_folio, offset); su2 = su; for (j = 0; j < n; j++, su = (void *)su + susz) { if ((le32_to_cpu(su->su_flags) & - ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || + ~BIT(NILFS_SEGMENT_USAGE_ERROR)) || nilfs_segment_is_active(nilfs, segnum + j)) { ret = -EBUSY; - kunmap_atomic(kaddr); + kunmap_local(su2); brelse(su_bh); goto out_header; } @@ -702,7 +746,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, nc++; } } - kunmap_atomic(kaddr); + kunmap_local(su2); if (nc > 0) { mark_buffer_dirty(su_bh); ncleaned += nc; @@ -732,16 +776,12 @@ out: * @sufile: inode of segment usage file * @newnsegs: new number of segments * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOSPC - Enough free space is not left for shrinking - * - * %-EBUSY - Dirty or active segments exist in the region to be truncated + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EBUSY - Dirty or active segments exist in the region to be truncated. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + * * %-ENOSPC - Enough free space is not left for shrinking. */ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) { @@ -749,7 +789,6 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct nilfs_sufile_info *sui = NILFS_SUI(sufile); - void *kaddr; unsigned long nsegs, nrsvsegs; int ret = 0; @@ -776,12 +815,20 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) goto out_header; sui->ncleansegs -= nsegs - newnsegs; + + /* + * If the sufile is successfully truncated, immediately adjust + * the segment allocation space while locking the semaphore + * "mi_sem" so that nilfs_sufile_alloc() never allocates + * segments in the truncated space. + */ + sui->allocmax = newnsegs - 1; + sui->allocmin = 0; } - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); - kunmap_atomic(kaddr); + kunmap_local(header); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); @@ -795,30 +842,27 @@ out: } /** - * nilfs_sufile_get_suinfo - + * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @buf: array of suinfo - * @sisz: byte size of suinfo - * @nsi: size of suinfo array - * - * Description: - * - * Return Value: On success, 0 is returned and .... On error, one of the - * following negative error codes is returned. + * @buf: array of suinfo + * @sisz: byte size of suinfo + * @nsi: size of suinfo array * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: Count of segment usage info items stored in the output buffer on + * success, or one of the following negative error codes on failure: + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, - unsigned sisz, size_t nsi) + unsigned int sisz, size_t nsi) { struct buffer_head *su_bh; struct nilfs_segment_usage *su; struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + size_t offset; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -846,20 +890,20 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, continue; } - kaddr = kmap_atomic(su_bh->b_page); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); for (j = 0; j < n; j++, su = (void *)su + susz, si = (void *)si + sisz) { si->sui_lastmod = le64_to_cpu(su->su_lastmod); si->sui_nblocks = le32_to_cpu(su->su_nblocks); si->sui_flags = le32_to_cpu(su->su_flags) & - ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); + ~BIT(NILFS_SEGMENT_USAGE_ACTIVE); if (nilfs_segment_is_active(nilfs, segnum + j)) si->sui_flags |= - (1UL << NILFS_SEGMENT_USAGE_ACTIVE); + BIT(NILFS_SEGMENT_USAGE_ACTIVE); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(su_bh); } ret = nsegs; @@ -870,11 +914,295 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, } /** + * nilfs_sufile_set_suinfo - sets segment usage info + * @sufile: inode of segment usage file + * @buf: array of suinfo_update + * @supsz: byte size of suinfo_update + * @nsup: size of suinfo_update array + * + * Description: Takes an array of nilfs_suinfo_update structs and updates + * segment usage accordingly. Only the fields indicated by the sup_flags + * are updated. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid values in input (segment number, flags or nblocks). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, + unsigned int supsz, size_t nsup) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh, *bh; + struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; + struct nilfs_segment_usage *su; + size_t offset; + unsigned long blkoff, prev_blkoff; + int cleansi, cleansu, dirtysi, dirtysu; + long ncleaned = 0, ndirtied = 0; + int ret = 0; + + if (unlikely(nsup == 0)) + return ret; + + for (sup = buf; sup < supend; sup = (void *)sup + supsz) { + if (sup->sup_segnum >= nilfs->ns_nsegments + || (sup->sup_flags & + (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) + || (nilfs_suinfo_update_nblocks(sup) && + sup->sup_sui.sui_nblocks > + nilfs->ns_blocks_per_segment)) + return -EINVAL; + } + + down_write(&NILFS_MDT(sufile)->mi_sem); + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + sup = buf; + blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); + ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); + if (ret < 0) + goto out_header; + + for (;;) { + offset = nilfs_sufile_segment_usage_offset( + sufile, sup->sup_segnum, bh); + su = kmap_local_folio(bh->b_folio, offset); + + if (nilfs_suinfo_update_lastmod(sup)) + su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); + + if (nilfs_suinfo_update_nblocks(sup)) + su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); + + if (nilfs_suinfo_update_flags(sup)) { + /* + * Active flag is a virtual flag projected by running + * nilfs kernel code - drop it not to write it to + * disk. + */ + sup->sup_sui.sui_flags &= + ~BIT(NILFS_SEGMENT_USAGE_ACTIVE); + + cleansi = nilfs_suinfo_clean(&sup->sup_sui); + cleansu = nilfs_segment_usage_clean(su); + dirtysi = nilfs_suinfo_dirty(&sup->sup_sui); + dirtysu = nilfs_segment_usage_dirty(su); + + if (cleansi && !cleansu) + ++ncleaned; + else if (!cleansi && cleansu) + --ncleaned; + + if (dirtysi && !dirtysu) + ++ndirtied; + else if (!dirtysi && dirtysu) + --ndirtied; + + su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); + } + + kunmap_local(su); + + sup = (void *)sup + supsz; + if (sup >= supend) + break; + + prev_blkoff = blkoff; + blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); + if (blkoff == prev_blkoff) + continue; + + /* get different block */ + mark_buffer_dirty(bh); + put_bh(bh); + ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); + if (unlikely(ret < 0)) + goto out_mark; + } + mark_buffer_dirty(bh); + put_bh(bh); + + out_mark: + if (ncleaned || ndirtied) { + nilfs_sufile_mod_counter(header_bh, (u64)ncleaned, + (u64)ndirtied); + NILFS_SUI(sufile)->ncleansegs += ncleaned; + } + nilfs_mdt_mark_dirty(sufile); + out_header: + put_bh(header_bh); + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_trim_fs() - trim ioctl handle function + * @sufile: inode of segment usage file + * @range: fstrim_range structure + * + * start: First Byte to trim + * len: number of Bytes to trim from start + * minlen: minimum extent length in Bytes + * + * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes + * from start to start+len. start is rounded up to the next block boundary + * and start+len is rounded down. For each clean segment blkdev_issue_discard + * function is invoked. + * + * Return: 0 on success, or a negative error code on failure. + */ +int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *su_bh; + struct nilfs_segment_usage *su; + size_t offset; + void *kaddr; + size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; + sector_t seg_start, seg_end, start_block, end_block; + sector_t start = 0, nblocks = 0; + u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0; + int ret = 0; + unsigned int sects_per_block; + + sects_per_block = (1 << nilfs->ns_blocksize_bits) / + bdev_logical_block_size(nilfs->ns_bdev); + len = range->len >> nilfs->ns_blocksize_bits; + minlen = range->minlen >> nilfs->ns_blocksize_bits; + max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment); + + if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits) + return -EINVAL; + + start_block = (range->start + nilfs->ns_blocksize - 1) >> + nilfs->ns_blocksize_bits; + + /* + * range->len can be very large (actually, it is set to + * ULLONG_MAX by default) - truncate upper end of the range + * carefully so as not to overflow. + */ + if (max_blocks - start_block < len) + end_block = max_blocks - 1; + else + end_block = start_block + len - 1; + + segnum = nilfs_get_segnum_of_block(nilfs, start_block); + segnum_end = nilfs_get_segnum_of_block(nilfs, end_block); + + down_read(&NILFS_MDT(sufile)->mi_sem); + + while (segnum <= segnum_end) { + n = nilfs_sufile_segment_usages_in_block(sufile, segnum, + segnum_end); + + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, + &su_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out_sem; + /* hole */ + segnum += n; + continue; + } + + offset = nilfs_sufile_segment_usage_offset(sufile, segnum, + su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, offset); + for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { + if (!nilfs_segment_usage_clean(su)) + continue; + + nilfs_get_segment_range(nilfs, segnum, &seg_start, + &seg_end); + + if (!nblocks) { + /* start new extent */ + start = seg_start; + nblocks = seg_end - seg_start + 1; + continue; + } + + if (start + nblocks == seg_start) { + /* add to previous extent */ + nblocks += seg_end - seg_start + 1; + continue; + } + + /* discard previous extent */ + if (start < start_block) { + nblocks -= start_block - start; + start = start_block; + } + + if (nblocks >= minlen) { + kunmap_local(kaddr); + + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS); + if (ret < 0) { + put_bh(su_bh); + goto out_sem; + } + + ndiscarded += nblocks; + offset = nilfs_sufile_segment_usage_offset( + sufile, segnum, su_bh); + su = kaddr = kmap_local_folio(su_bh->b_folio, + offset); + } + + /* start new extent */ + start = seg_start; + nblocks = seg_end - seg_start + 1; + } + kunmap_local(kaddr); + put_bh(su_bh); + } + + + if (nblocks) { + /* discard last extent */ + if (start < start_block) { + nblocks -= start_block - start; + start = start_block; + } + if (start + nblocks > end_block + 1) + nblocks = end_block - start + 1; + + if (nblocks >= minlen) { + ret = blkdev_issue_discard(nilfs->ns_bdev, + start * sects_per_block, + nblocks * sects_per_block, + GFP_NOFS); + if (!ret) + ndiscarded += nblocks; + } + } + +out_sem: + up_read(&NILFS_MDT(sufile)->mi_sem); + + range->len = ndiscarded << nilfs->ns_blocksize_bits; + return ret; +} + +/** * nilfs_sufile_read - read or get sufile inode * @sb: super block instance * @susize: size of a segment usage entry * @raw_inode: on-disk sufile inode * @inodep: buffer to store the inode + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep) @@ -883,13 +1211,22 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_sufile_info *sui; struct buffer_head *header_bh; struct nilfs_sufile_header *header; - void *kaddr; int err; + if (susize > sb->s_blocksize) { + nilfs_err(sb, "too large segment usage size: %zu bytes", + susize); + return -EINVAL; + } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { + nilfs_err(sb, "too small segment usage size: %zu bytes", + susize); + return -EINVAL; + } + sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); if (unlikely(!sufile)) return -ENOMEM; - if (!(sufile->i_state & I_NEW)) + if (!(inode_state_read_once(sufile) & I_NEW)) goto out; err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); @@ -903,15 +1240,20 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, if (err) goto failed; - err = nilfs_sufile_get_header_block(sufile, &header_bh); - if (err) + err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "missing header block in segment usage metadata"); + err = -EINVAL; + } goto failed; + } sui = NILFS_SUI(sufile); - kaddr = kmap_atomic(header_bh->b_page); - header = kaddr + bh_offset(header_bh); + header = kmap_local_folio(header_bh->b_folio, 0); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_atomic(kaddr); + kunmap_local(header); brelse(header_bh); sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index e84bc5b51fc1..cd6f28ab3521 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * sufile.h - NILFS segment usage file. + * NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. + * Written by Koji Sato. */ #ifndef _NILFS_SUFILE_H @@ -25,7 +12,6 @@ #include <linux/fs.h> #include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> #include "mdt.h" @@ -40,10 +26,11 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end); int nilfs_sufile_alloc(struct inode *, __u64 *); int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, - unsigned long nblocks, time_t modtime); + unsigned long nblocks, time64_t modtime); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); -ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, +ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned int, size_t); +ssize_t nilfs_sufile_set_suinfo(struct inode *, void *, unsigned int, size_t); int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, void (*dofunc)(struct inode *, __u64, @@ -65,11 +52,14 @@ void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep); +int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range); /** * nilfs_sufile_scrap - make a segment garbage * @sufile: inode of segment usage file * @segnum: segment number to be freed + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) { @@ -80,6 +70,8 @@ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) * nilfs_sufile_free - free segment * @sufile: inode of segment usage file * @segnum: segment number to be freed + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) { @@ -92,6 +84,8 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) * @segnumv: array of segment numbers * @nsegs: size of @segnumv array * @ndone: place to store the number of freed segments + * + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, size_t nsegs, size_t *ndone) @@ -107,8 +101,7 @@ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, * @nsegs: size of @segnumv array * @ndone: place to store the number of cancelled segments * - * Return Value: On success, 0 is returned. On error, a negative error codes - * is returned. + * Return: 0 on success, or a negative error code on failure. */ static inline int nilfs_sufile_cancel_freev(struct inode *sufile, __u64 *segnumv, size_t nsegs, @@ -126,14 +119,11 @@ static inline int nilfs_sufile_cancel_freev(struct inode *sufile, * Description: nilfs_sufile_set_error() marks the segment specified by * @segnum as erroneous. The error segment will never be used again. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - Invalid segment usage number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) { diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index af3ba0478cdf..badc2cbc895e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * super.c - NILFS module and super block management. + * NILFS module and super block management. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. */ /* * linux/fs/ext2/super.c @@ -42,12 +29,13 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/parser.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include "nilfs.h" #include "export.h" #include "mdt.h" @@ -73,7 +61,28 @@ struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); -static int nilfs_remount(struct super_block *sb, int *flags, char *data); + +void __nilfs_msg(struct super_block *sb, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int level; + + va_start(args, fmt); + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); + vaf.va = &args; + + if (sb) + printk("%c%cNILFS (%s): %pV\n", + KERN_SOH_ASCII, level, sb->s_id, &vaf); + else + printk("%c%cNILFS: %pV\n", + KERN_SOH_ASCII, level, &vaf); + + va_end(args); +} static void nilfs_set_error(struct super_block *sb) { @@ -95,19 +104,24 @@ static void nilfs_set_error(struct super_block *sb) } /** - * nilfs_error() - report failure condition on a filesystem + * __nilfs_error() - report failure condition on a filesystem + * @sb: super block instance + * @function: name of calling function + * @fmt: format string for message to be output + * @...: optional arguments to @fmt + * + * __nilfs_error() sets an ERROR_FS flag on the superblock as well as + * reporting an error message. This function should be called when + * NILFS detects incoherences or defects of meta data on disk. * - * nilfs_error() sets an ERROR_FS flag on the superblock as well as - * reporting an error message. It should be called when NILFS detects - * incoherences or defects of meta data on disk. As for sustainable - * errors such as a single-shot I/O error, nilfs_warning() or the printk() - * function should be used instead. + * This implements the body of nilfs_error() macro. Normally, + * nilfs_error() should be used. As for sustainable errors such as a + * single-shot I/O error, nilfs_err() should be used instead. * - * The segment constructor must not call this function because it can - * kill itself. + * Callers should not add a trailing newline since this will do it. */ -void nilfs_error(struct super_block *sb, const char *function, - const char *fmt, ...) +void __nilfs_error(struct super_block *sb, const char *function, + const char *fmt, ...) { struct the_nilfs *nilfs = sb->s_fs_info; struct va_format vaf; @@ -123,12 +137,12 @@ void nilfs_error(struct super_block *sb, const char *function, va_end(args); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { nilfs_set_error(sb); if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; } } @@ -137,56 +151,30 @@ void nilfs_error(struct super_block *sb, const char *function, sb->s_id); } -void nilfs_warning(struct super_block *sb, const char *function, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n", - sb->s_id, function, &vaf); - - va_end(args); -} - - struct inode *nilfs_alloc_inode(struct super_block *sb) { struct nilfs_inode_info *ii; - ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS); + ii = alloc_inode_sb(sb, nilfs_inode_cachep, GFP_NOFS); if (!ii) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_type = 0; ii->i_cno = 0; - ii->vfs_inode.i_version = 1; - nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi); + ii->i_assoc_inode = NULL; + ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } -static void nilfs_i_callback(struct rcu_head *head) +static void nilfs_free_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); - struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + if (nilfs_is_metadata_file_inode(inode)) + nilfs_mdt_destroy(inode); - if (mdi) { - kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ - kfree(mdi); - } kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } -void nilfs_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, nilfs_i_callback); -} - static int nilfs_sync_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; @@ -196,14 +184,13 @@ static int nilfs_sync_super(struct super_block *sb, int flag) set_buffer_dirty(nilfs->ns_sbh[0]); if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], - WRITE_SYNC | WRITE_FLUSH_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } if (unlikely(err)) { - printk(KERN_ERR - "NILFS: unable to write superblock (err=%d)\n", err); + nilfs_err(sb, "unable to write superblock: err=%d", err); if (err == -EIO && nilfs->ns_sbh[1]) { /* * sbp[0] points to newer log than sbp[1], @@ -273,13 +260,12 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); } else { - printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", - sb->s_id); + nilfs_crit(sb, "superblock broke"); return NULL; } } else if (sbp[1] && sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { - memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); } if (flip && sbp[1]) @@ -292,10 +278,10 @@ int nilfs_commit_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; - time_t t; + time64_t t; /* nilfs->ns_sem must be locked by the caller. */ - t = get_seconds(); + t = ktime_get_real_seconds(); nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; @@ -310,6 +296,9 @@ int nilfs_commit_super(struct super_block *sb, int flag) nilfs->ns_sbsize)); } clear_nilfs_sb_dirty(nilfs); + nilfs->ns_flushed_device = 1; + /* make sure store to ns_flushed_device cannot be reordered */ + smp_wmb(); return nilfs_sync_super(sb, flag); } @@ -320,6 +309,8 @@ int nilfs_commit_super(struct super_block *sb, int flag) * This function restores state flags in the on-disk super block. * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the * filesystem was not clean previously. + * + * Return: 0 on success, %-EIO if I/O error or superblock is corrupted. */ int nilfs_cleanup_super(struct super_block *sb) { @@ -350,6 +341,8 @@ int nilfs_cleanup_super(struct super_block *sb) * nilfs_move_2nd_super - relocate secondary super block * @sb: super block instance * @sb2off: new offset of the secondary super block (in bytes) + * + * Return: 0 on success, or a negative error code on failure. */ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) { @@ -358,7 +351,7 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) struct nilfs_super_block *nsbp; sector_t blocknr, newblocknr; unsigned long offset; - int sb2i = -1; /* array index of the secondary superblock */ + int sb2i; /* array index of the secondary superblock */ int ret = 0; /* nilfs->ns_sem must be locked by the caller. */ @@ -369,6 +362,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 0; blocknr = nilfs->ns_sbh[0]->b_blocknr; + } else { + sb2i = -1; + blocknr = 0; } if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) goto out; /* super block location is unchanged */ @@ -378,17 +374,38 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) offset = sb2off & (nilfs->ns_blocksize - 1); nsbh = sb_getblk(sb, newblocknr); if (!nsbh) { - printk(KERN_WARNING - "NILFS warning: unable to move secondary superblock " - "to block %llu\n", (unsigned long long)newblocknr); + nilfs_warn(sb, + "unable to move secondary superblock to block %llu", + (unsigned long long)newblocknr); ret = -EIO; goto out; } nsbp = (void *)nsbh->b_data + offset; - memset(nsbp, 0, nilfs->ns_blocksize); + lock_buffer(nsbh); if (sb2i >= 0) { + /* + * The position of the second superblock only changes by 4KiB, + * which is larger than the maximum superblock data size + * (= 1KiB), so there is no need to use memmove() to allow + * overlap between source and destination. + */ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); + + /* + * Zero fill after copy to avoid overwriting in case of move + * within the same block. + */ + memset(nsbh->b_data, 0, offset); + memset((void *)nsbp + nilfs->ns_sbsize, 0, + nsbh->b_size - offset - nilfs->ns_sbsize); + } else { + memset(nsbh->b_data, 0, nsbh->b_size); + } + set_buffer_uptodate(nsbh); + unlock_buffer(nsbh); + + if (sb2i >= 0) { brelse(nilfs->ns_sbh[sb2i]); nilfs->ns_sbh[sb2i] = nsbh; nilfs->ns_sbp[sb2i] = nsbp; @@ -407,6 +424,8 @@ out: * nilfs_resize_fs - resize the filesystem * @sb: super block instance * @newsize: new size of the filesystem (in bytes) + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) { @@ -417,11 +436,20 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) int ret; ret = -ERANGE; - devsize = i_size_read(sb->s_bdev->bd_inode); + devsize = bdev_nr_bytes(sb->s_bdev); if (newsize > devsize) goto out; /* + * Prevent underflow in second superblock position calculation. + * The exact minimum size check is done in nilfs_sufile_resize(). + */ + if (newsize < 4096) { + ret = -ENOSPC; + goto out; + } + + /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. @@ -430,7 +458,7 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) sb2off = NILFS_SB2_OFFSET_BYTES(newsize); newnsegs = sb2off >> nilfs->ns_blocksize_bits; - do_div(newnsegs, nilfs->ns_blocks_per_segment); + newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment); ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); up_write(&nilfs->ns_segctor_sem); @@ -480,12 +508,13 @@ static void nilfs_put_super(struct super_block *sb) nilfs_detach_log_writer(sb); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); @@ -514,6 +543,9 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) } up_write(&nilfs->ns_sem); + if (!err) + err = nilfs_flush_device(nilfs); + return err; } @@ -522,8 +554,6 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; - struct nilfs_checkpoint *raw_cp; - struct buffer_head *bh_cp; int err = -ENOMEM; root = nilfs_find_or_create_root( @@ -535,39 +565,19 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, - &bh_cp); + err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size); up_read(&nilfs->ns_segctor_sem); - if (unlikely(err)) { - if (err == -ENOENT || err == -EINVAL) { - printk(KERN_ERR - "NILFS: Invalid checkpoint " - "(checkpoint number=%llu)\n", - (unsigned long long)cno); - err = -EINVAL; - } + if (unlikely(err)) goto failed; - } - - err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size, - &raw_cp->cp_ifile_inode, &root->ifile); - if (err) - goto failed_bh; - - atomic64_set(&root->inodes_count, - le64_to_cpu(raw_cp->cp_inodes_count)); - atomic64_set(&root->blocks_count, - le64_to_cpu(raw_cp->cp_blocks_count)); - - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); reuse: *rootp = root; return 0; - failed_bh: - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); failed: + if (err == -EINVAL) + nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", + (unsigned long long)cno); nilfs_put_root(root); return err; @@ -578,7 +588,7 @@ static int nilfs_freeze(struct super_block *sb) struct the_nilfs *nilfs = sb->s_fs_info; int err; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; /* Mark super block clean */ @@ -592,7 +602,7 @@ static int nilfs_unfreeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; down_write(&nilfs->ns_sem); @@ -604,7 +614,7 @@ static int nilfs_unfreeze(struct super_block *sb) static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; + struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; struct the_nilfs *nilfs = root->nilfs; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; @@ -639,9 +649,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) err = nilfs_ifile_count_free_inodes(root->ifile, &nmaxinodes, &nfreeinodes); if (unlikely(err)) { - printk(KERN_WARNING - "NILFS warning: fail to count free inodes: err %d.\n", - err); + nilfs_warn(sb, "failed to count free inodes: err=%d", err); if (err == -ERANGE) { /* * If nilfs_palloc_count_max_entries() returns @@ -665,8 +673,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = nmaxinodes; buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; - buf->f_fsid.val[0] = (u32)id; - buf->f_fsid.val[1] = (u32)(id >> 32); + buf->f_fsid = u64_to_fsid(id); return 0; } @@ -675,7 +682,7 @@ static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; - struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; + struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; if (!nilfs_test_opt(nilfs, BARRIER)) seq_puts(seq, ",nobarrier"); @@ -697,7 +704,7 @@ static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry) static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, - .destroy_inode = nilfs_destroy_inode, + .free_inode = nilfs_free_inode, .dirty_inode = nilfs_dirty_inode, .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, @@ -705,105 +712,98 @@ static const struct super_operations nilfs_sops = { .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, - .remount_fs = nilfs_remount, .show_options = nilfs_show_options }; enum { - Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, - Opt_discard, Opt_nodiscard, Opt_err, + Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery, + Opt_discard, }; -static match_table_t tokens = { - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_snapshot, "cp=%u"}, - {Opt_order, "order=%s"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_err, NULL} +static const struct constant_table nilfs_param_err[] = { + {"continue", NILFS_MOUNT_ERRORS_CONT}, + {"panic", NILFS_MOUNT_ERRORS_PANIC}, + {"remount-ro", NILFS_MOUNT_ERRORS_RO}, + {} }; -static int parse_options(char *options, struct super_block *sb, int is_remount) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - char *p; - substring_t args[MAX_OPT_ARGS]; - - if (!options) - return 1; +static const struct fs_parameter_spec nilfs_param_spec[] = { + fsparam_enum ("errors", Opt_err, nilfs_param_err), + fsparam_flag_no ("barrier", Opt_barrier), + fsparam_u64 ("cp", Opt_snapshot), + fsparam_string ("order", Opt_order), + fsparam_flag ("norecovery", Opt_norecovery), + fsparam_flag_no ("discard", Opt_discard), + {} +}; - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; +struct nilfs_fs_context { + unsigned long ns_mount_opt; + __u64 cno; +}; - token = match_token(p, tokens, args); - switch (token) { - case Opt_barrier: - nilfs_set_opt(nilfs, BARRIER); - break; - case Opt_nobarrier: +static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct nilfs_fs_context *nilfs = fc->fs_private; + int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, nilfs_param_spec, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_barrier: + if (result.negated) nilfs_clear_opt(nilfs, BARRIER); - break; - case Opt_order: - if (strcmp(args[0].from, "relaxed") == 0) - /* Ordered data semantics */ - nilfs_clear_opt(nilfs, STRICT_ORDER); - else if (strcmp(args[0].from, "strict") == 0) - /* Strict in-order semantics */ - nilfs_set_opt(nilfs, STRICT_ORDER); - else - return 0; - break; - case Opt_err_panic: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC); - break; - case Opt_err_ro: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO); - break; - case Opt_err_cont: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT); - break; - case Opt_snapshot: - if (is_remount) { - printk(KERN_ERR - "NILFS: \"%s\" option is invalid " - "for remount.\n", p); - return 0; - } - break; - case Opt_norecovery: - nilfs_set_opt(nilfs, NORECOVERY); - break; - case Opt_discard: - nilfs_set_opt(nilfs, DISCARD); - break; - case Opt_nodiscard: - nilfs_clear_opt(nilfs, DISCARD); - break; - default: - printk(KERN_ERR - "NILFS: Unrecognized mount option \"%s\"\n", p); - return 0; + else + nilfs_set_opt(nilfs, BARRIER); + break; + case Opt_order: + if (strcmp(param->string, "relaxed") == 0) + /* Ordered data semantics */ + nilfs_clear_opt(nilfs, STRICT_ORDER); + else if (strcmp(param->string, "strict") == 0) + /* Strict in-order semantics */ + nilfs_set_opt(nilfs, STRICT_ORDER); + else + return -EINVAL; + break; + case Opt_err: + nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE; + nilfs->ns_mount_opt |= result.uint_32; + break; + case Opt_snapshot: + if (is_remount) { + struct super_block *sb = fc->root->d_sb; + + nilfs_err(sb, + "\"%s\" option is invalid for remount", + param->key); + return -EINVAL; + } + if (result.uint_64 == 0) { + nilfs_err(NULL, + "invalid option \"cp=0\": invalid checkpoint number 0"); + return -EINVAL; } + nilfs->cno = result.uint_64; + break; + case Opt_norecovery: + nilfs_set_opt(nilfs, NORECOVERY); + break; + case Opt_discard: + if (result.negated) + nilfs_clear_opt(nilfs, DISCARD); + else + nilfs_set_opt(nilfs, DISCARD); + break; + default: + return -EINVAL; } - return 1; -} -static inline void -nilfs_set_default_options(struct super_block *sb, - struct nilfs_super_block *sbp) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - - nilfs->ns_mount_opt = - NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + return 0; } static int nilfs_setup_super(struct super_block *sb, int is_mount) @@ -825,19 +825,17 @@ static int nilfs_setup_super(struct super_block *sb, int is_mount) mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); if (nilfs->ns_mount_state & NILFS_ERROR_FS) { - printk(KERN_WARNING - "NILFS warning: mounting fs with errors\n"); + nilfs_warn(sb, "mounting fs with errors"); #if 0 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { - printk(KERN_WARNING - "NILFS warning: maximal mount count reached\n"); + nilfs_warn(sb, "maximal mount count reached"); #endif } if (!max_mnt_count) sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); - sbp[0]->s_mtime = cpu_to_le64(get_seconds()); + sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds()); skip_mount_setup: sbp[0]->s_state = @@ -862,9 +860,8 @@ struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } -int nilfs_store_magic_and_option(struct super_block *sb, - struct nilfs_super_block *sbp, - char *data) +int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp) { struct the_nilfs *nilfs = sb->s_fs_info; @@ -872,17 +869,15 @@ int nilfs_store_magic_and_option(struct super_block *sb, /* FS independent flags */ #ifdef NILFS_ATIME_DISABLE - sb->s_flags |= MS_NOATIME; + sb->s_flags |= SB_NOATIME; #endif - nilfs_set_default_options(sb, sbp); - nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); - return !parse_options(data, sb, 0) ? -EINVAL : 0 ; + return 0; } int nilfs_check_feature_compatibility(struct super_block *sb, @@ -893,17 +888,17 @@ int nilfs_check_feature_compatibility(struct super_block *sb, features = le64_to_cpu(sbp->s_feature_incompat) & ~NILFS_FEATURE_INCOMPAT_SUPP; if (features) { - printk(KERN_ERR "NILFS: couldn't mount because of unsupported " - "optional features (%llx)\n", - (unsigned long long)features); + nilfs_err(sb, + "couldn't mount because of unsupported optional features (%llx)", + (unsigned long long)features); return -EINVAL; } features = le64_to_cpu(sbp->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; - if (!(sb->s_flags & MS_RDONLY) && features) { - printk(KERN_ERR "NILFS: couldn't mount RDWR because of " - "unsupported optional features (%llx)\n", - (unsigned long long)features); + if (!sb_rdonly(sb) && features) { + nilfs_err(sb, + "couldn't mount RDWR because of unsupported optional features (%llx)", + (unsigned long long)features); return -EINVAL; } return 0; @@ -919,13 +914,13 @@ static int nilfs_get_root_dentry(struct super_block *sb, inode = nilfs_iget(sb, root, NILFS_ROOT_INO); if (IS_ERR(inode)) { - printk(KERN_ERR "NILFS: get root inode failed\n"); ret = PTR_ERR(inode); + nilfs_err(sb, "error %d getting root inode", ret); goto out; } if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { iput(inode); - printk(KERN_ERR "NILFS: corrupt root inode.\n"); + nilfs_err(sb, "corrupt root inode"); ret = -EINVAL; goto out; } @@ -942,7 +937,7 @@ static int nilfs_get_root_dentry(struct super_block *sb, iput(inode); } } else { - dentry = d_obtain_alias(inode); + dentry = d_obtain_root(inode); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto failed_dentry; @@ -953,7 +948,7 @@ static int nilfs_get_root_dentry(struct super_block *sb, return ret; failed_dentry: - printk(KERN_ERR "NILFS: get root dentry failed\n"); + nilfs_err(sb, "error %d getting root dentry", ret); goto out; } @@ -973,18 +968,18 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = (ret == -ENOENT) ? -EINVAL : ret; goto out; } else if (!ret) { - printk(KERN_ERR "NILFS: The specified checkpoint is " - "not a snapshot (checkpoint number=%llu).\n", - (unsigned long long)cno); + nilfs_err(s, + "The specified checkpoint is not a snapshot (checkpoint number=%llu)", + (unsigned long long)cno); ret = -EINVAL; goto out; } ret = nilfs_attach_checkpoint(s, cno, false, &root); if (ret) { - printk(KERN_ERR "NILFS: error loading snapshot " - "(checkpoint number=%llu).\n", - (unsigned long long)cno); + nilfs_err(s, + "error %d while loading snapshot (checkpoint number=%llu)", + ret, (unsigned long long)cno); goto out; } ret = nilfs_get_root_dentry(s, root, root_dentry); @@ -994,23 +989,16 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, return ret; } -static int nilfs_tree_was_touched(struct dentry *root_dentry) -{ - return d_count(root_dentry) > 1; -} - /** - * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint + * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint * @root_dentry: root dentry of the tree to be shrunk * - * This function returns true if the tree was in-use. + * Return: true if the tree was in-use, false otherwise. */ -static int nilfs_try_to_shrink_tree(struct dentry *root_dentry) +static bool nilfs_tree_is_busy(struct dentry *root_dentry) { - if (have_submounts(root_dentry)) - return true; shrink_dcache_parent(root_dentry); - return nilfs_tree_was_touched(root_dentry); + return d_count(root_dentry) > 1; } int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) @@ -1021,7 +1009,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) struct dentry *dentry; int ret; - if (cno < 0 || cno > nilfs->ns_cno) + if (cno > nilfs->ns_cno) return false; if (cno >= nilfs_last_cno(nilfs)) @@ -1034,8 +1022,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) if (inode) { dentry = d_find_alias(inode); if (dentry) { - if (nilfs_tree_was_touched(dentry)) - ret = nilfs_try_to_shrink_tree(dentry); + ret = nilfs_tree_is_busy(dentry); dput(dentry); } iput(inode); @@ -1048,53 +1035,61 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block - * @data: mount options - * @silent: silent mode flag + * @fc: filesystem context * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. + * + * Return: 0 on success, or a negative error code on failure. */ static int -nilfs_fill_super(struct super_block *sb, void *data, int silent) +nilfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct the_nilfs *nilfs; struct nilfs_root *fsroot; - struct backing_dev_info *bdi; + struct nilfs_fs_context *ctx = fc->fs_private; __u64 cno; int err; - nilfs = alloc_nilfs(sb->s_bdev); + nilfs = alloc_nilfs(sb); if (!nilfs) return -ENOMEM; sb->s_fs_info = nilfs; - err = init_nilfs(nilfs, sb, (char *)data); + err = init_nilfs(nilfs, sb); if (err) goto failed_nilfs; + /* Copy in parsed mount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; + sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; - sb->s_bdi = bdi ? : &default_backing_dev_info; + sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi); err = load_nilfs(nilfs, sb); if (err) goto failed_nilfs; + super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, + sizeof(nilfs->ns_sbp[0]->s_uuid)); + super_set_sysfs_name_bdev(sb); + cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { - printk(KERN_ERR "NILFS: error loading last checkpoint " - "(checkpoint number=%llu).\n", (unsigned long long)cno); + nilfs_err(sb, + "error %d while loading last checkpoint (checkpoint number=%llu)", + err, (unsigned long long)cno); goto failed_unload; } - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { err = nilfs_attach_log_writer(sb, fsroot); if (err) goto failed_checkpoint; @@ -1106,7 +1101,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) nilfs_put_root(fsroot); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); @@ -1121,6 +1116,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) nilfs_put_root(fsroot); failed_unload: + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); @@ -1130,37 +1126,26 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) return err; } -static int nilfs_remount(struct super_block *sb, int *flags, char *data) +static int nilfs_reconfigure(struct fs_context *fc) { + struct nilfs_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; - unsigned long old_sb_flags; - unsigned long old_mount_opt; int err; - old_sb_flags = sb->s_flags; - old_mount_opt = nilfs->ns_mount_opt; - - if (!parse_options(data, sb, 1)) { - err = -EINVAL; - goto restore_opts; - } - sb->s_flags = (sb->s_flags & ~MS_POSIXACL); + sync_filesystem(sb); err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { - printk(KERN_WARNING "NILFS (device %s): couldn't " - "remount because the filesystem is in an " - "incomplete recovery state.\n", sb->s_id); - goto restore_opts; + nilfs_warn(sb, + "couldn't remount because the filesystem is in an incomplete recovery state"); + goto ignore_opts; } - - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & MS_RDONLY) { - /* Shutting down log writer */ - nilfs_detach_log_writer(sb); - sb->s_flags |= MS_RDONLY; + if (fc->sb_flags & SB_RDONLY) { + sb->s_flags |= SB_RDONLY; /* * Remounting a valid RW partition RDONLY, so set @@ -1183,206 +1168,148 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) ~NILFS_FEATURE_COMPAT_RO_SUPP; up_read(&nilfs->ns_sem); if (features) { - printk(KERN_WARNING "NILFS (device %s): couldn't " - "remount RDWR because of unsupported optional " - "features (%llx)\n", - sb->s_id, (unsigned long long)features); + nilfs_warn(sb, + "couldn't remount RDWR because of unsupported optional features (%llx)", + (unsigned long long)features); err = -EROFS; - goto restore_opts; + goto ignore_opts; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; - root = NILFS_I(sb->s_root->d_inode)->i_root; + root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); - if (err) - goto restore_opts; + if (err) { + sb->s_flags |= SB_RDONLY; + goto ignore_opts; + } down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } out: - return 0; - - restore_opts: - sb->s_flags = old_sb_flags; - nilfs->ns_mount_opt = old_mount_opt; - return err; -} + sb->s_flags = (sb->s_flags & ~SB_POSIXACL); + /* Copy over parsed remount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; -struct nilfs_super_data { - struct block_device *bdev; - __u64 cno; - int flags; -}; - -/** - * nilfs_identify - pre-read mount options needed to identify mount instance - * @data: mount options - * @sd: nilfs_super_data - */ -static int nilfs_identify(char *data, struct nilfs_super_data *sd) -{ - char *p, *options = data; - substring_t args[MAX_OPT_ARGS]; - int token; - int ret = 0; - - do { - p = strsep(&options, ","); - if (p != NULL && *p) { - token = match_token(p, tokens, args); - if (token == Opt_snapshot) { - if (!(sd->flags & MS_RDONLY)) { - ret++; - } else { - sd->cno = simple_strtoull(args[0].from, - NULL, 0); - /* - * No need to see the end pointer; - * match_token() has done syntax - * checking. - */ - if (sd->cno == 0) - ret++; - } - } - if (ret) - printk(KERN_ERR - "NILFS: invalid mount option: %s\n", p); - } - if (!options) - break; - BUG_ON(options == data); - *(options - 1) = ','; - } while (!ret); - return ret; -} - -static int nilfs_set_bdev_super(struct super_block *s, void *data) -{ - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; return 0; -} -static int nilfs_test_bdev_super(struct super_block *s, void *data) -{ - return (void *)s->s_bdev == data; + ignore_opts: + return err; } -static struct dentry * -nilfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int +nilfs_get_tree(struct fs_context *fc) { - struct nilfs_super_data sd; + struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; - struct dentry *root_dentry; - int err, s_new = false; - - if (!(flags & MS_RDONLY)) - mode |= FMODE_WRITE; - - sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); - if (IS_ERR(sd.bdev)) - return ERR_CAST(sd.bdev); - - sd.cno = 0; - sd.flags = flags; - if (nilfs_identify((char *)data, &sd)) { - err = -EINVAL; - goto failed; - } + dev_t dev; + int err; - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - mutex_lock(&sd.bdev->bd_fsfreeze_mutex); - if (sd.bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); - err = -EBUSY; - goto failed; - } - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, - sd.bdev); - mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); - if (IS_ERR(s)) { - err = PTR_ERR(s); - goto failed; + if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) { + nilfs_err(NULL, + "invalid option \"cp=%llu\": read-only option is not specified", + ctx->cno); + return -EINVAL; } - if (!s->s_root) { - char b[BDEVNAME_SIZE]; - - s_new = true; + err = lookup_bdev(fc->source, &dev); + if (err) + return err; - /* New superblock instance created */ - s->s_mode = mode; - strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); - sb_set_blocksize(s, block_size(sd.bdev)); + s = sget_dev(fc, dev); + if (IS_ERR(s)) + return PTR_ERR(s); - err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (!s->s_root) { + err = setup_bdev_super(s, fc->sb_flags, fc); + if (!err) + err = nilfs_fill_super(s, fc); if (err) goto failed_super; - s->s_flags |= MS_ACTIVE; - } else if (!sd.cno) { - int busy = false; - - if (nilfs_tree_was_touched(s->s_root)) { - busy = nilfs_try_to_shrink_tree(s->s_root); - if (busy && (flags ^ s->s_flags) & MS_RDONLY) { - printk(KERN_ERR "NILFS: the device already " - "has a %s mount.\n", - (s->s_flags & MS_RDONLY) ? - "read-only" : "read/write"); + s->s_flags |= SB_ACTIVE; + } else if (!ctx->cno) { + if (nilfs_tree_is_busy(s->s_root)) { + if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { + nilfs_err(s, + "the device already has a %s mount.", + sb_rdonly(s) ? "read-only" : "read/write"); err = -EBUSY; goto failed_super; } - } - if (!busy) { + } else { /* - * Try remount to setup mount states if the current + * Try reconfigure to setup mount states if the current * tree is not mounted and only snapshots use this sb. + * + * Since nilfs_reconfigure() requires fc->root to be + * set, set it first and release it on failure. */ - err = nilfs_remount(s, &flags, data); - if (err) + fc->root = dget(s->s_root); + err = nilfs_reconfigure(fc); + if (err) { + dput(fc->root); + fc->root = NULL; /* prevent double release */ goto failed_super; + } + return 0; } } - if (sd.cno) { - err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); + if (ctx->cno) { + struct dentry *root_dentry; + + err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry); if (err) goto failed_super; - } else { - root_dentry = dget(s->s_root); + fc->root = root_dentry; + return 0; } - if (!s_new) - blkdev_put(sd.bdev, mode); - - return root_dentry; + fc->root = dget(s->s_root); + return 0; failed_super: deactivate_locked_super(s); + return err; +} - failed: - if (!s_new) - blkdev_put(sd.bdev, mode); - return ERR_PTR(err); +static void nilfs_free_fc(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static const struct fs_context_operations nilfs_context_ops = { + .parse_param = nilfs_parse_param, + .get_tree = nilfs_get_tree, + .reconfigure = nilfs_reconfigure, + .free = nilfs_free_fc, +}; + +static int nilfs_init_fs_context(struct fs_context *fc) +{ + struct nilfs_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + fc->fs_private = ctx; + fc->ops = &nilfs_context_ops; + + return 0; } struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", - .mount = nilfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = nilfs_init_fs_context, + .parameters = nilfs_param_spec, }; MODULE_ALIAS_FS("nilfs2"); @@ -1394,8 +1321,6 @@ static void nilfs_inode_init_once(void *obj) #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif - address_space_init_once(&ii->i_btnode_cache); - ii->i_bmap = &ii->i_bmap_data; inode_init_once(&ii->vfs_inode); } @@ -1412,21 +1337,18 @@ static void nilfs_destroy_cachep(void) */ rcu_barrier(); - if (nilfs_inode_cachep) - kmem_cache_destroy(nilfs_inode_cachep); - if (nilfs_transaction_cachep) - kmem_cache_destroy(nilfs_transaction_cachep); - if (nilfs_segbuf_cachep) - kmem_cache_destroy(nilfs_segbuf_cachep); - if (nilfs_btree_path_cache) - kmem_cache_destroy(nilfs_btree_path_cache); + kmem_cache_destroy(nilfs_inode_cachep); + kmem_cache_destroy(nilfs_transaction_cachep); + kmem_cache_destroy(nilfs_segbuf_cachep); + kmem_cache_destroy(nilfs_btree_path_cache); } static int __init nilfs_init_cachep(void) { nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", sizeof(struct nilfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once); + SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, + nilfs_inode_init_once); if (!nilfs_inode_cachep) goto fail; @@ -1463,13 +1385,19 @@ static int __init init_nilfs_fs(void) if (err) goto fail; - err = register_filesystem(&nilfs_fs_type); + err = nilfs_sysfs_init(); if (err) goto free_cachep; + err = register_filesystem(&nilfs_fs_type); + if (err) + goto deinit_sysfs_entry; + printk(KERN_INFO "NILFS version 2 loaded\n"); return 0; +deinit_sysfs_entry: + nilfs_sysfs_exit(); free_cachep: nilfs_destroy_cachep(); fail: @@ -1479,6 +1407,7 @@ fail: static void __exit exit_nilfs_fs(void) { nilfs_destroy_cachep(); + nilfs_sysfs_exit(); unregister_filesystem(&nilfs_fs_type); } diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c new file mode 100644 index 000000000000..bc52afbfc5c7 --- /dev/null +++ b/fs/nilfs2/sysfs.c @@ -0,0 +1,1140 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Sysfs support implementation. + * + * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation. + * Copyright (C) 2014 HGST, Inc., a Western Digital Company. + * + * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com> + */ + +#include <linux/kobject.h> + +#include "nilfs.h" +#include "mdt.h" +#include "sufile.h" +#include "cpfile.h" +#include "sysfs.h" + +/* /sys/fs/<nilfs>/ */ +static struct kset *nilfs_kset; + +#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \ +static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \ + struct attribute *attr, char *buf) \ +{ \ + struct the_nilfs *nilfs = container_of(kobj->parent, \ + struct the_nilfs, \ + ns_##parent_name##_kobj); \ + struct nilfs_##name##_attr *a = container_of(attr, \ + struct nilfs_##name##_attr, \ + attr); \ + return a->show ? a->show(a, nilfs, buf) : 0; \ +} \ +static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \ + struct attribute *attr, \ + const char *buf, size_t len) \ +{ \ + struct the_nilfs *nilfs = container_of(kobj->parent, \ + struct the_nilfs, \ + ns_##parent_name##_kobj); \ + struct nilfs_##name##_attr *a = container_of(attr, \ + struct nilfs_##name##_attr, \ + attr); \ + return a->store ? a->store(a, nilfs, buf, len) : 0; \ +} \ +static const struct sysfs_ops nilfs_##name##_attr_ops = { \ + .show = nilfs_##name##_attr_show, \ + .store = nilfs_##name##_attr_store, \ +} + +#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \ +static void nilfs_##name##_attr_release(struct kobject *kobj) \ +{ \ + struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \ + struct nilfs_sysfs_##parent_name##_subgroups, \ + sg_##name##_kobj); \ + complete(&subgroups->sg_##name##_kobj_unregister); \ +} \ +static const struct kobj_type nilfs_##name##_ktype = { \ + .default_groups = nilfs_##name##_groups, \ + .sysfs_ops = &nilfs_##name##_attr_ops, \ + .release = nilfs_##name##_attr_release, \ +} + +#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \ +static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ +{ \ + struct kobject *parent; \ + struct kobject *kobj; \ + struct completion *kobj_unregister; \ + struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \ + int err; \ + subgroups = nilfs->ns_##parent_name##_subgroups; \ + kobj = &subgroups->sg_##name##_kobj; \ + kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \ + parent = &nilfs->ns_##parent_name##_kobj; \ + kobj->kset = nilfs_kset; \ + init_completion(kobj_unregister); \ + err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \ + #name); \ + if (err) \ + kobject_put(kobj); \ + return err; \ +} \ +static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ +{ \ + kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ +} + +/************************************************************************ + * NILFS snapshot attrs * + ************************************************************************/ + +static ssize_t +nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr, + struct nilfs_root *root, char *buf) +{ + return sysfs_emit(buf, "%llu\n", + (unsigned long long)atomic64_read(&root->inodes_count)); +} + +static ssize_t +nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr, + struct nilfs_root *root, char *buf) +{ + return sysfs_emit(buf, "%llu\n", + (unsigned long long)atomic64_read(&root->blocks_count)); +} + +static const char snapshot_readme_str[] = + "The group contains details about mounted snapshot.\n\n" + "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n" + "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n"; + +static ssize_t +nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr, + struct nilfs_root *root, char *buf) +{ + return sysfs_emit(buf, snapshot_readme_str); +} + +NILFS_SNAPSHOT_RO_ATTR(inodes_count); +NILFS_SNAPSHOT_RO_ATTR(blocks_count); +NILFS_SNAPSHOT_RO_ATTR(README); + +static struct attribute *nilfs_snapshot_attrs[] = { + NILFS_SNAPSHOT_ATTR_LIST(inodes_count), + NILFS_SNAPSHOT_ATTR_LIST(blocks_count), + NILFS_SNAPSHOT_ATTR_LIST(README), + NULL, +}; +ATTRIBUTE_GROUPS(nilfs_snapshot); + +static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct nilfs_root *root = + container_of(kobj, struct nilfs_root, snapshot_kobj); + struct nilfs_snapshot_attr *a = + container_of(attr, struct nilfs_snapshot_attr, attr); + + return a->show ? a->show(a, root, buf) : 0; +} + +static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct nilfs_root *root = + container_of(kobj, struct nilfs_root, snapshot_kobj); + struct nilfs_snapshot_attr *a = + container_of(attr, struct nilfs_snapshot_attr, attr); + + return a->store ? a->store(a, root, buf, len) : 0; +} + +static void nilfs_snapshot_attr_release(struct kobject *kobj) +{ + struct nilfs_root *root = container_of(kobj, struct nilfs_root, + snapshot_kobj); + complete(&root->snapshot_kobj_unregister); +} + +static const struct sysfs_ops nilfs_snapshot_attr_ops = { + .show = nilfs_snapshot_attr_show, + .store = nilfs_snapshot_attr_store, +}; + +static const struct kobj_type nilfs_snapshot_ktype = { + .default_groups = nilfs_snapshot_groups, + .sysfs_ops = &nilfs_snapshot_attr_ops, + .release = nilfs_snapshot_attr_release, +}; + +int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) +{ + struct the_nilfs *nilfs; + struct kobject *parent; + int err; + + nilfs = root->nilfs; + parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj; + root->snapshot_kobj.kset = nilfs_kset; + init_completion(&root->snapshot_kobj_unregister); + + if (root->cno == NILFS_CPTREE_CURRENT_CNO) { + err = kobject_init_and_add(&root->snapshot_kobj, + &nilfs_snapshot_ktype, + &nilfs->ns_dev_kobj, + "current_checkpoint"); + } else { + err = kobject_init_and_add(&root->snapshot_kobj, + &nilfs_snapshot_ktype, + parent, + "%llu", root->cno); + } + + if (err) + kobject_put(&root->snapshot_kobj); + + return err; +} + +void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) +{ + kobject_put(&root->snapshot_kobj); +} + +/************************************************************************ + * NILFS mounted snapshots attrs * + ************************************************************************/ + +static const char mounted_snapshots_readme_str[] = + "The mounted_snapshots group contains group for\n" + "every mounted snapshot.\n"; + +static ssize_t +nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr, + struct the_nilfs *nilfs, char *buf) +{ + return sysfs_emit(buf, mounted_snapshots_readme_str); +} + +NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README); + +static struct attribute *nilfs_mounted_snapshots_attrs[] = { + NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README), + NULL, +}; +ATTRIBUTE_GROUPS(nilfs_mounted_snapshots); + +NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev); +NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev); +NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev); + +/************************************************************************ + * NILFS checkpoints attrs * + ************************************************************************/ + +static ssize_t +nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 ncheckpoints; + struct nilfs_cpstat cpstat; + int err; + + down_read(&nilfs->ns_segctor_sem); + err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); + up_read(&nilfs->ns_segctor_sem); + if (err < 0) { + nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d", + err); + return err; + } + + ncheckpoints = cpstat.cs_ncps; + + return sysfs_emit(buf, "%llu\n", ncheckpoints); +} + +static ssize_t +nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 nsnapshots; + struct nilfs_cpstat cpstat; + int err; + + down_read(&nilfs->ns_segctor_sem); + err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); + up_read(&nilfs->ns_segctor_sem); + if (err < 0) { + nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d", + err); + return err; + } + + nsnapshots = cpstat.cs_nsss; + + return sysfs_emit(buf, "%llu\n", nsnapshots); +} + +static ssize_t +nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 last_cno; + + spin_lock(&nilfs->ns_last_segment_lock); + last_cno = nilfs->ns_last_cno; + spin_unlock(&nilfs->ns_last_segment_lock); + + return sysfs_emit(buf, "%llu\n", last_cno); +} + +static ssize_t +nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 cno; + + down_read(&nilfs->ns_segctor_sem); + cno = nilfs->ns_cno; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%llu\n", cno); +} + +static const char checkpoints_readme_str[] = + "The checkpoints group contains attributes that describe\n" + "details about volume's checkpoints.\n\n" + "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n" + "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n" + "(3) last_seg_checkpoint\n" + "\tshow checkpoint number of the latest segment.\n\n" + "(4) next_checkpoint\n\tshow next checkpoint number.\n\n"; + +static ssize_t +nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr, + struct the_nilfs *nilfs, char *buf) +{ + return sysfs_emit(buf, checkpoints_readme_str); +} + +NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number); +NILFS_CHECKPOINTS_RO_ATTR(snapshots_number); +NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint); +NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint); +NILFS_CHECKPOINTS_RO_ATTR(README); + +static struct attribute *nilfs_checkpoints_attrs[] = { + NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number), + NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number), + NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint), + NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint), + NILFS_CHECKPOINTS_ATTR_LIST(README), + NULL, +}; +ATTRIBUTE_GROUPS(nilfs_checkpoints); + +NILFS_DEV_INT_GROUP_OPS(checkpoints, dev); +NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev); +NILFS_DEV_INT_GROUP_FNS(checkpoints, dev); + +/************************************************************************ + * NILFS segments attrs * + ************************************************************************/ + +static ssize_t +nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + return sysfs_emit(buf, "%lu\n", nilfs->ns_nsegments); +} + +static ssize_t +nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + return sysfs_emit(buf, "%lu\n", nilfs->ns_blocks_per_segment); +} + +static ssize_t +nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + unsigned long ncleansegs; + + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + + return sysfs_emit(buf, "%lu\n", ncleansegs); +} + +static ssize_t +nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + struct nilfs_sustat sustat; + int err; + + down_read(&nilfs->ns_segctor_sem); + err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat); + up_read(&nilfs->ns_segctor_sem); + if (err < 0) { + nilfs_err(nilfs->ns_sb, "unable to get segment stat: err=%d", + err); + return err; + } + + return sysfs_emit(buf, "%llu\n", sustat.ss_ndirtysegs); +} + +static const char segments_readme_str[] = + "The segments group contains attributes that describe\n" + "details about volume's segments.\n\n" + "(1) segments_number\n\tshow number of segments on volume.\n\n" + "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n" + "(3) clean_segments\n\tshow count of clean segments.\n\n" + "(4) dirty_segments\n\tshow count of dirty segments.\n\n"; + +static ssize_t +nilfs_segments_README_show(struct nilfs_segments_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + return sysfs_emit(buf, segments_readme_str); +} + +NILFS_SEGMENTS_RO_ATTR(segments_number); +NILFS_SEGMENTS_RO_ATTR(blocks_per_segment); +NILFS_SEGMENTS_RO_ATTR(clean_segments); +NILFS_SEGMENTS_RO_ATTR(dirty_segments); +NILFS_SEGMENTS_RO_ATTR(README); + +static struct attribute *nilfs_segments_attrs[] = { + NILFS_SEGMENTS_ATTR_LIST(segments_number), + NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment), + NILFS_SEGMENTS_ATTR_LIST(clean_segments), + NILFS_SEGMENTS_ATTR_LIST(dirty_segments), + NILFS_SEGMENTS_ATTR_LIST(README), + NULL, +}; +ATTRIBUTE_GROUPS(nilfs_segments); + +NILFS_DEV_INT_GROUP_OPS(segments, dev); +NILFS_DEV_INT_GROUP_TYPE(segments, dev); +NILFS_DEV_INT_GROUP_FNS(segments, dev); + +/************************************************************************ + * NILFS segctor attrs * + ************************************************************************/ + +static ssize_t +nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + sector_t last_pseg; + + spin_lock(&nilfs->ns_last_segment_lock); + last_pseg = nilfs->ns_last_pseg; + spin_unlock(&nilfs->ns_last_segment_lock); + + return sysfs_emit(buf, "%llu\n", + (unsigned long long)last_pseg); +} + +static ssize_t +nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + u64 last_seq; + + spin_lock(&nilfs->ns_last_segment_lock); + last_seq = nilfs->ns_last_seq; + spin_unlock(&nilfs->ns_last_segment_lock); + + return sysfs_emit(buf, "%llu\n", last_seq); +} + +static ssize_t +nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 last_cno; + + spin_lock(&nilfs->ns_last_segment_lock); + last_cno = nilfs->ns_last_cno; + spin_unlock(&nilfs->ns_last_segment_lock); + + return sysfs_emit(buf, "%llu\n", last_cno); +} + +static ssize_t +nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + u64 seg_seq; + + down_read(&nilfs->ns_segctor_sem); + seg_seq = nilfs->ns_seg_seq; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%llu\n", seg_seq); +} + +static ssize_t +nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 segnum; + + down_read(&nilfs->ns_segctor_sem); + segnum = nilfs->ns_segnum; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%llu\n", segnum); +} + +static ssize_t +nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 nextnum; + + down_read(&nilfs->ns_segctor_sem); + nextnum = nilfs->ns_nextnum; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%llu\n", nextnum); +} + +static ssize_t +nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + unsigned long pseg_offset; + + down_read(&nilfs->ns_segctor_sem); + pseg_offset = nilfs->ns_pseg_offset; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%lu\n", pseg_offset); +} + +static ssize_t +nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + __u64 cno; + + down_read(&nilfs->ns_segctor_sem); + cno = nilfs->ns_cno; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%llu\n", cno); +} + +static ssize_t +nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + time64_t ctime; + + down_read(&nilfs->ns_segctor_sem); + ctime = nilfs->ns_ctime; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%ptTs\n", &ctime); +} + +static ssize_t +nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + time64_t ctime; + + down_read(&nilfs->ns_segctor_sem); + ctime = nilfs->ns_ctime; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%llu\n", ctime); +} + +static ssize_t +nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + time64_t nongc_ctime; + + down_read(&nilfs->ns_segctor_sem); + nongc_ctime = nilfs->ns_nongc_ctime; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%ptTs\n", &nongc_ctime); +} + +static ssize_t +nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + time64_t nongc_ctime; + + down_read(&nilfs->ns_segctor_sem); + nongc_ctime = nilfs->ns_nongc_ctime; + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%llu\n", nongc_ctime); +} + +static ssize_t +nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + u32 ndirtyblks; + + down_read(&nilfs->ns_segctor_sem); + ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks); + up_read(&nilfs->ns_segctor_sem); + + return sysfs_emit(buf, "%u\n", ndirtyblks); +} + +static const char segctor_readme_str[] = + "The segctor group contains attributes that describe\n" + "segctor thread activity details.\n\n" + "(1) last_pseg_block\n" + "\tshow start block number of the latest segment.\n\n" + "(2) last_seg_sequence\n" + "\tshow sequence value of the latest segment.\n\n" + "(3) last_seg_checkpoint\n" + "\tshow checkpoint number of the latest segment.\n\n" + "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n" + "(5) current_last_full_seg\n" + "\tshow index number of the latest full segment.\n\n" + "(6) next_full_seg\n" + "\tshow index number of the full segment index to be used next.\n\n" + "(7) next_pseg_offset\n" + "\tshow offset of next partial segment in the current full segment.\n\n" + "(8) next_checkpoint\n\tshow next checkpoint number.\n\n" + "(9) last_seg_write_time\n" + "\tshow write time of the last segment in human-readable format.\n\n" + "(10) last_seg_write_time_secs\n" + "\tshow write time of the last segment in seconds.\n\n" + "(11) last_nongc_write_time\n" + "\tshow write time of the last segment not for cleaner operation " + "in human-readable format.\n\n" + "(12) last_nongc_write_time_secs\n" + "\tshow write time of the last segment not for cleaner operation " + "in seconds.\n\n" + "(13) dirty_data_blocks_count\n" + "\tshow number of dirty data blocks.\n\n"; + +static ssize_t +nilfs_segctor_README_show(struct nilfs_segctor_attr *attr, + struct the_nilfs *nilfs, char *buf) +{ + return sysfs_emit(buf, segctor_readme_str); +} + +NILFS_SEGCTOR_RO_ATTR(last_pseg_block); +NILFS_SEGCTOR_RO_ATTR(last_seg_sequence); +NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint); +NILFS_SEGCTOR_RO_ATTR(current_seg_sequence); +NILFS_SEGCTOR_RO_ATTR(current_last_full_seg); +NILFS_SEGCTOR_RO_ATTR(next_full_seg); +NILFS_SEGCTOR_RO_ATTR(next_pseg_offset); +NILFS_SEGCTOR_RO_ATTR(next_checkpoint); +NILFS_SEGCTOR_RO_ATTR(last_seg_write_time); +NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs); +NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time); +NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs); +NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count); +NILFS_SEGCTOR_RO_ATTR(README); + +static struct attribute *nilfs_segctor_attrs[] = { + NILFS_SEGCTOR_ATTR_LIST(last_pseg_block), + NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence), + NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint), + NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence), + NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg), + NILFS_SEGCTOR_ATTR_LIST(next_full_seg), + NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset), + NILFS_SEGCTOR_ATTR_LIST(next_checkpoint), + NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time), + NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs), + NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time), + NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs), + NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count), + NILFS_SEGCTOR_ATTR_LIST(README), + NULL, +}; +ATTRIBUTE_GROUPS(nilfs_segctor); + +NILFS_DEV_INT_GROUP_OPS(segctor, dev); +NILFS_DEV_INT_GROUP_TYPE(segctor, dev); +NILFS_DEV_INT_GROUP_FNS(segctor, dev); + +/************************************************************************ + * NILFS superblock attrs * + ************************************************************************/ + +static ssize_t +nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + time64_t sbwtime; + + down_read(&nilfs->ns_sem); + sbwtime = nilfs->ns_sbwtime; + up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%ptTs\n", &sbwtime); +} + +static ssize_t +nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + time64_t sbwtime; + + down_read(&nilfs->ns_sem); + sbwtime = nilfs->ns_sbwtime; + up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%llu\n", sbwtime); +} + +static ssize_t +nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + unsigned int sbwcount; + + down_read(&nilfs->ns_sem); + sbwcount = nilfs->ns_sbwcount; + up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%u\n", sbwcount); +} + +static ssize_t +nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + unsigned int sb_update_freq; + + down_read(&nilfs->ns_sem); + sb_update_freq = nilfs->ns_sb_update_freq; + up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%u\n", sb_update_freq); +} + +static ssize_t +nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr, + struct the_nilfs *nilfs, + const char *buf, size_t count) +{ + unsigned int val; + int err; + + err = kstrtouint(skip_spaces(buf), 0, &val); + if (err) { + nilfs_err(nilfs->ns_sb, "unable to convert string: err=%d", + err); + return err; + } + + if (val < NILFS_SB_FREQ) { + val = NILFS_SB_FREQ; + nilfs_warn(nilfs->ns_sb, + "superblock update frequency cannot be lesser than 10 seconds"); + } + + down_write(&nilfs->ns_sem); + nilfs->ns_sb_update_freq = val; + up_write(&nilfs->ns_sem); + + return count; +} + +static const char sb_readme_str[] = + "The superblock group contains attributes that describe\n" + "superblock's details.\n\n" + "(1) sb_write_time\n\tshow previous write time of super block " + "in human-readable format.\n\n" + "(2) sb_write_time_secs\n\tshow previous write time of super block " + "in seconds.\n\n" + "(3) sb_write_count\n\tshow write count of super block.\n\n" + "(4) sb_update_frequency\n" + "\tshow/set interval of periodical update of superblock (in seconds).\n\n" + "\tYou can set preferable frequency of superblock update by command:\n\n" + "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n"; + +static ssize_t +nilfs_superblock_README_show(struct nilfs_superblock_attr *attr, + struct the_nilfs *nilfs, char *buf) +{ + return sysfs_emit(buf, sb_readme_str); +} + +NILFS_SUPERBLOCK_RO_ATTR(sb_write_time); +NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs); +NILFS_SUPERBLOCK_RO_ATTR(sb_write_count); +NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency); +NILFS_SUPERBLOCK_RO_ATTR(README); + +static struct attribute *nilfs_superblock_attrs[] = { + NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time), + NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs), + NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count), + NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency), + NILFS_SUPERBLOCK_ATTR_LIST(README), + NULL, +}; +ATTRIBUTE_GROUPS(nilfs_superblock); + +NILFS_DEV_INT_GROUP_OPS(superblock, dev); +NILFS_DEV_INT_GROUP_TYPE(superblock, dev); +NILFS_DEV_INT_GROUP_FNS(superblock, dev); + +/************************************************************************ + * NILFS device attrs * + ************************************************************************/ + +static +ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + struct nilfs_super_block *raw_sb; + u32 major; + u16 minor; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + major = le32_to_cpu(raw_sb->s_rev_level); + minor = le16_to_cpu(raw_sb->s_minor_rev_level); + up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%d.%d\n", major, minor); +} + +static +ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + return sysfs_emit(buf, "%u\n", nilfs->ns_blocksize); +} + +static +ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + struct nilfs_super_block *raw_sb; + u64 dev_size; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + dev_size = le64_to_cpu(raw_sb->s_dev_size); + up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%llu\n", dev_size); +} + +static +ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + sector_t free_blocks = 0; + + nilfs_count_free_blocks(nilfs, &free_blocks); + return sysfs_emit(buf, "%llu\n", + (unsigned long long)free_blocks); +} + +static +ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + struct nilfs_super_block *raw_sb; + ssize_t len; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid); + up_read(&nilfs->ns_sem); + + return len; +} + +static +ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + struct nilfs_super_block *raw_sb; + ssize_t len; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n", + raw_sb->s_volume_name); + up_read(&nilfs->ns_sem); + + return len; +} + +static const char dev_readme_str[] = + "The <device> group contains attributes that describe file system\n" + "partition's details.\n\n" + "(1) revision\n\tshow NILFS file system revision.\n\n" + "(2) blocksize\n\tshow volume block size in bytes.\n\n" + "(3) device_size\n\tshow volume size in bytes.\n\n" + "(4) free_blocks\n\tshow count of free blocks on volume.\n\n" + "(5) uuid\n\tshow volume's UUID.\n\n" + "(6) volume_name\n\tshow volume's name.\n\n"; + +static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr, + struct the_nilfs *nilfs, + char *buf) +{ + return sysfs_emit(buf, dev_readme_str); +} + +NILFS_DEV_RO_ATTR(revision); +NILFS_DEV_RO_ATTR(blocksize); +NILFS_DEV_RO_ATTR(device_size); +NILFS_DEV_RO_ATTR(free_blocks); +NILFS_DEV_RO_ATTR(uuid); +NILFS_DEV_RO_ATTR(volume_name); +NILFS_DEV_RO_ATTR(README); + +static struct attribute *nilfs_dev_attrs[] = { + NILFS_DEV_ATTR_LIST(revision), + NILFS_DEV_ATTR_LIST(blocksize), + NILFS_DEV_ATTR_LIST(device_size), + NILFS_DEV_ATTR_LIST(free_blocks), + NILFS_DEV_ATTR_LIST(uuid), + NILFS_DEV_ATTR_LIST(volume_name), + NILFS_DEV_ATTR_LIST(README), + NULL, +}; +ATTRIBUTE_GROUPS(nilfs_dev); + +static ssize_t nilfs_dev_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs, + ns_dev_kobj); + struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr, + attr); + + return a->show ? a->show(a, nilfs, buf) : 0; +} + +static ssize_t nilfs_dev_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs, + ns_dev_kobj); + struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr, + attr); + + return a->store ? a->store(a, nilfs, buf, len) : 0; +} + +static void nilfs_dev_attr_release(struct kobject *kobj) +{ + struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs, + ns_dev_kobj); + complete(&nilfs->ns_dev_kobj_unregister); +} + +static const struct sysfs_ops nilfs_dev_attr_ops = { + .show = nilfs_dev_attr_show, + .store = nilfs_dev_attr_store, +}; + +static const struct kobj_type nilfs_dev_ktype = { + .default_groups = nilfs_dev_groups, + .sysfs_ops = &nilfs_dev_attr_ops, + .release = nilfs_dev_attr_release, +}; + +int nilfs_sysfs_create_device_group(struct super_block *sb) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups); + int err; + + nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL); + if (unlikely(!nilfs->ns_dev_subgroups)) { + err = -ENOMEM; + nilfs_err(sb, "unable to allocate memory for device group"); + goto failed_create_device_group; + } + + nilfs->ns_dev_kobj.kset = nilfs_kset; + init_completion(&nilfs->ns_dev_kobj_unregister); + err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL, + "%s", sb->s_id); + if (err) + goto cleanup_dev_kobject; + + err = nilfs_sysfs_create_mounted_snapshots_group(nilfs); + if (err) + goto cleanup_dev_kobject; + + err = nilfs_sysfs_create_checkpoints_group(nilfs); + if (err) + goto delete_mounted_snapshots_group; + + err = nilfs_sysfs_create_segments_group(nilfs); + if (err) + goto delete_checkpoints_group; + + err = nilfs_sysfs_create_superblock_group(nilfs); + if (err) + goto delete_segments_group; + + err = nilfs_sysfs_create_segctor_group(nilfs); + if (err) + goto delete_superblock_group; + + return 0; + +delete_superblock_group: + nilfs_sysfs_delete_superblock_group(nilfs); + +delete_segments_group: + nilfs_sysfs_delete_segments_group(nilfs); + +delete_checkpoints_group: + nilfs_sysfs_delete_checkpoints_group(nilfs); + +delete_mounted_snapshots_group: + nilfs_sysfs_delete_mounted_snapshots_group(nilfs); + +cleanup_dev_kobject: + kobject_put(&nilfs->ns_dev_kobj); + kfree(nilfs->ns_dev_subgroups); + +failed_create_device_group: + return err; +} + +void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) +{ + nilfs_sysfs_delete_mounted_snapshots_group(nilfs); + nilfs_sysfs_delete_checkpoints_group(nilfs); + nilfs_sysfs_delete_segments_group(nilfs); + nilfs_sysfs_delete_superblock_group(nilfs); + nilfs_sysfs_delete_segctor_group(nilfs); + kobject_del(&nilfs->ns_dev_kobj); + kobject_put(&nilfs->ns_dev_kobj); + kfree(nilfs->ns_dev_subgroups); +} + +/************************************************************************ + * NILFS feature attrs * + ************************************************************************/ + +static ssize_t nilfs_feature_revision_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d.%d\n", + NILFS_CURRENT_REV, NILFS_MINOR_REV); +} + +static const char features_readme_str[] = + "The features group contains attributes that describe NILFS file\n" + "system driver features.\n\n" + "(1) revision\n\tshow current revision of NILFS file system driver.\n"; + +static ssize_t nilfs_feature_README_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, features_readme_str); +} + +NILFS_FEATURE_RO_ATTR(revision); +NILFS_FEATURE_RO_ATTR(README); + +static struct attribute *nilfs_feature_attrs[] = { + NILFS_FEATURE_ATTR_LIST(revision), + NILFS_FEATURE_ATTR_LIST(README), + NULL, +}; + +static const struct attribute_group nilfs_feature_attr_group = { + .name = "features", + .attrs = nilfs_feature_attrs, +}; + +int __init nilfs_sysfs_init(void) +{ + int err; + + nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj); + if (!nilfs_kset) { + err = -ENOMEM; + nilfs_err(NULL, "unable to create sysfs entry: err=%d", err); + goto failed_sysfs_init; + } + + err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group); + if (unlikely(err)) { + nilfs_err(NULL, "unable to create feature group: err=%d", err); + goto cleanup_sysfs_init; + } + + return 0; + +cleanup_sysfs_init: + kset_unregister(nilfs_kset); + +failed_sysfs_init: + return err; +} + +void nilfs_sysfs_exit(void) +{ + sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group); + kset_unregister(nilfs_kset); +} diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h new file mode 100644 index 000000000000..d370cd5cce3f --- /dev/null +++ b/fs/nilfs2/sysfs.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Sysfs support declarations. + * + * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation. + * Copyright (C) 2014 HGST, Inc., a Western Digital Company. + * + * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com> + */ + +#ifndef _NILFS_SYSFS_H +#define _NILFS_SYSFS_H + +#include <linux/sysfs.h> + +#define NILFS_ROOT_GROUP_NAME "nilfs2" + +/* + * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects + * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock + * @sg_superblock_kobj_unregister: completion state + * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor + * @sg_segctor_kobj_unregister: completion state + * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots + * @sg_mounted_snapshots_kobj_unregister: completion state + * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints + * @sg_checkpoints_kobj_unregister: completion state + * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments + * @sg_segments_kobj_unregister: completion state + */ +struct nilfs_sysfs_dev_subgroups { + /* /sys/fs/<nilfs>/<device>/superblock */ + struct kobject sg_superblock_kobj; + struct completion sg_superblock_kobj_unregister; + + /* /sys/fs/<nilfs>/<device>/segctor */ + struct kobject sg_segctor_kobj; + struct completion sg_segctor_kobj_unregister; + + /* /sys/fs/<nilfs>/<device>/mounted_snapshots */ + struct kobject sg_mounted_snapshots_kobj; + struct completion sg_mounted_snapshots_kobj_unregister; + + /* /sys/fs/<nilfs>/<device>/checkpoints */ + struct kobject sg_checkpoints_kobj; + struct completion sg_checkpoints_kobj_unregister; + + /* /sys/fs/<nilfs>/<device>/segments */ + struct kobject sg_segments_kobj; + struct completion sg_segments_kobj_unregister; +}; + +#define NILFS_KOBJ_ATTR_STRUCT(name) \ +struct nilfs_##name##_attr { \ + struct attribute attr; \ + ssize_t (*show)(struct kobject *, struct kobj_attribute *, \ + char *); \ + ssize_t (*store)(struct kobject *, struct kobj_attribute *, \ + const char *, size_t); \ +} + +NILFS_KOBJ_ATTR_STRUCT(feature); + +#define NILFS_DEV_ATTR_STRUCT(name) \ +struct nilfs_##name##_attr { \ + struct attribute attr; \ + ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \ + char *); \ + ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \ + const char *, size_t); \ +} + +NILFS_DEV_ATTR_STRUCT(dev); +NILFS_DEV_ATTR_STRUCT(segments); +NILFS_DEV_ATTR_STRUCT(mounted_snapshots); +NILFS_DEV_ATTR_STRUCT(checkpoints); +NILFS_DEV_ATTR_STRUCT(superblock); +NILFS_DEV_ATTR_STRUCT(segctor); + +#define NILFS_CP_ATTR_STRUCT(name) \ +struct nilfs_##name##_attr { \ + struct attribute attr; \ + ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \ + char *); \ + ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \ + const char *, size_t); \ +} + +NILFS_CP_ATTR_STRUCT(snapshot); + +#define NILFS_ATTR(type, name, mode, show, store) \ + static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \ + __ATTR(name, mode, show, store) + +#define NILFS_INFO_ATTR(type, name) \ + NILFS_ATTR(type, name, 0444, NULL, NULL) +#define NILFS_RO_ATTR(type, name) \ + NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL) +#define NILFS_RW_ATTR(type, name) \ + NILFS_ATTR(type, name, 0644, \ + nilfs_##type##_##name##_show, \ + nilfs_##type##_##name##_store) + +#define NILFS_FEATURE_INFO_ATTR(name) \ + NILFS_INFO_ATTR(feature, name) +#define NILFS_FEATURE_RO_ATTR(name) \ + NILFS_RO_ATTR(feature, name) +#define NILFS_FEATURE_RW_ATTR(name) \ + NILFS_RW_ATTR(feature, name) + +#define NILFS_DEV_INFO_ATTR(name) \ + NILFS_INFO_ATTR(dev, name) +#define NILFS_DEV_RO_ATTR(name) \ + NILFS_RO_ATTR(dev, name) +#define NILFS_DEV_RW_ATTR(name) \ + NILFS_RW_ATTR(dev, name) + +#define NILFS_SEGMENTS_RO_ATTR(name) \ + NILFS_RO_ATTR(segments, name) +#define NILFS_SEGMENTS_RW_ATTR(name) \ + NILFS_RW_ATTR(segs_info, name) + +#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \ + NILFS_RO_ATTR(mounted_snapshots, name) + +#define NILFS_CHECKPOINTS_RO_ATTR(name) \ + NILFS_RO_ATTR(checkpoints, name) +#define NILFS_CHECKPOINTS_RW_ATTR(name) \ + NILFS_RW_ATTR(checkpoints, name) + +#define NILFS_SNAPSHOT_INFO_ATTR(name) \ + NILFS_INFO_ATTR(snapshot, name) +#define NILFS_SNAPSHOT_RO_ATTR(name) \ + NILFS_RO_ATTR(snapshot, name) +#define NILFS_SNAPSHOT_RW_ATTR(name) \ + NILFS_RW_ATTR(snapshot, name) + +#define NILFS_SUPERBLOCK_RO_ATTR(name) \ + NILFS_RO_ATTR(superblock, name) +#define NILFS_SUPERBLOCK_RW_ATTR(name) \ + NILFS_RW_ATTR(superblock, name) + +#define NILFS_SEGCTOR_INFO_ATTR(name) \ + NILFS_INFO_ATTR(segctor, name) +#define NILFS_SEGCTOR_RO_ATTR(name) \ + NILFS_RO_ATTR(segctor, name) +#define NILFS_SEGCTOR_RW_ATTR(name) \ + NILFS_RW_ATTR(segctor, name) + +#define NILFS_FEATURE_ATTR_LIST(name) \ + (&nilfs_feature_attr_##name.attr) +#define NILFS_DEV_ATTR_LIST(name) \ + (&nilfs_dev_attr_##name.attr) +#define NILFS_SEGMENTS_ATTR_LIST(name) \ + (&nilfs_segments_attr_##name.attr) +#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \ + (&nilfs_mounted_snapshots_attr_##name.attr) +#define NILFS_CHECKPOINTS_ATTR_LIST(name) \ + (&nilfs_checkpoints_attr_##name.attr) +#define NILFS_SNAPSHOT_ATTR_LIST(name) \ + (&nilfs_snapshot_attr_##name.attr) +#define NILFS_SUPERBLOCK_ATTR_LIST(name) \ + (&nilfs_superblock_attr_##name.attr) +#define NILFS_SEGCTOR_ATTR_LIST(name) \ + (&nilfs_segctor_attr_##name.attr) + +#endif /* _NILFS_SYSFS_H */ diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 94c451ce6d24..d0bcf744c553 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * the_nilfs.c - the_nilfs shared structure. + * the_nilfs shared structure. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. * */ @@ -25,7 +12,7 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> -#include <linux/random.h> +#include <linux/log2.h> #include <linux/crc32.h> #include "nilfs.h" #include "segment.h" @@ -60,12 +47,12 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, /** * alloc_nilfs - allocate a nilfs object - * @bdev: block device to which the_nilfs is related + * @sb: super block instance * - * Return Value: On success, pointer to the_nilfs is returned. - * On error, NULL is returned. + * Return: a pointer to the allocated nilfs object on success, or NULL on + * failure. */ -struct the_nilfs *alloc_nilfs(struct block_device *bdev) +struct the_nilfs *alloc_nilfs(struct super_block *sb) { struct the_nilfs *nilfs; @@ -73,18 +60,19 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) if (!nilfs) return NULL; - nilfs->ns_bdev = bdev; + nilfs->ns_sb = sb; + nilfs->ns_bdev = sb->s_bdev; atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); mutex_init(&nilfs->ns_snapshot_mount_mutex); INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); - spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); init_rwsem(&nilfs->ns_segctor_sem); + nilfs->ns_sb_update_freq = NILFS_SB_FREQ; return nilfs; } @@ -110,8 +98,8 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; struct nilfs_inode *rawi; - unsigned dat_entry_size, segment_usage_size, checkpoint_size; - unsigned inode_size; + unsigned int dat_entry_size, segment_usage_size, checkpoint_size; + unsigned int inode_size; int err; err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1); @@ -177,6 +165,9 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) * containing a super root from a given super block, and initializes * relevant information on the nilfs object preparatory for log * scanning and recovery. + * + * Return: 0 on success, or %-EINVAL if current segment number is out + * of range. */ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) @@ -193,20 +184,57 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); nilfs->ns_cno = nilfs->ns_last_cno + 1; if (nilfs->ns_segnum >= nilfs->ns_nsegments) { - printk(KERN_ERR "NILFS invalid last segment number.\n"); + nilfs_err(nilfs->ns_sb, + "pointed segment number is out of range: segnum=%llu, nsegments=%lu", + (unsigned long long)nilfs->ns_segnum, + nilfs->ns_nsegments); ret = -EINVAL; } return ret; } /** + * nilfs_get_blocksize - get block size from raw superblock data + * @sb: super block instance + * @sbp: superblock raw data buffer + * @blocksize: place to store block size + * + * nilfs_get_blocksize() calculates the block size from the block size + * exponent information written in @sbp and stores it in @blocksize, + * or aborts with an error message if it's too large. + * + * Return: 0 on success, or %-EINVAL if the block size is too large. + */ +static int nilfs_get_blocksize(struct super_block *sb, + struct nilfs_super_block *sbp, int *blocksize) +{ + unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size); + + if (unlikely(shift_bits > + ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)) { + nilfs_err(sb, "too large filesystem blocksize: 2 ^ %u KiB", + shift_bits); + return -EINVAL; + } + *blocksize = BLOCK_SIZE << shift_bits; + return 0; +} + +/** * load_nilfs - load and recover the nilfs * @nilfs: the_nilfs structure to be released - * @sb: super block isntance used to recover past segment + * @sb: super block instance used to recover past segment * * load_nilfs() searches and load the latest super root, * attaches the last segment, and does recovery if needed. * The caller must call this exclusively for simultaneous mounts. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EINVAL - No valid segment found. + * * %-EIO - I/O error. + * * %-ENOMEM - Insufficient memory available. + * * %-EROFS - Read only device or RO compat mode (if recovery is required) */ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { @@ -217,12 +245,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) int err; if (!valid_fs) { - printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); - if (s_flags & MS_RDONLY) { - printk(KERN_INFO "NILFS: INFO: recovery " - "required for readonly filesystem.\n"); - printk(KERN_INFO "NILFS: write access will " - "be enabled during recovery.\n"); + nilfs_warn(sb, "mounting unchecked fs"); + if (s_flags & SB_RDONLY) { + nilfs_info(sb, + "recovery required for readonly filesystem"); + nilfs_info(sb, + "write access will be enabled during recovery"); } } @@ -237,13 +265,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto scan_error; if (!nilfs_valid_sb(sbp[1])) { - printk(KERN_WARNING - "NILFS warning: unable to fall back to spare" - "super block\n"); + nilfs_warn(sb, + "unable to fall back to spare super block"); goto scan_error; } - printk(KERN_INFO - "NILFS: try rollback from an earlier position\n"); + nilfs_info(sb, "trying rollback from an earlier position"); /* * restore super block with its spare and reconfigure @@ -254,12 +280,15 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); /* verify consistency between two super blocks */ - blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); + err = nilfs_get_blocksize(sb, sbp[0], &blocksize); + if (err) + goto scan_error; + if (blocksize != nilfs->ns_blocksize) { - printk(KERN_WARNING - "NILFS warning: blocksize differs between " - "two super blocks (%d != %d)\n", - blocksize, nilfs->ns_blocksize); + nilfs_warn(sb, + "blocksize differs between two super blocks (%d != %d)", + blocksize, nilfs->ns_blocksize); + err = -EINVAL; goto scan_error; } @@ -278,41 +307,44 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root); if (unlikely(err)) { - printk(KERN_ERR "NILFS: error loading super root.\n"); + nilfs_err(sb, "error %d while loading super root", err); goto failed; } + err = nilfs_sysfs_create_device_group(sb); + if (unlikely(err)) + goto sysfs_error; + if (valid_fs) goto skip_recovery; - if (s_flags & MS_RDONLY) { + if (s_flags & SB_RDONLY) { __u64 features; if (nilfs_test_opt(nilfs, NORECOVERY)) { - printk(KERN_INFO "NILFS: norecovery option specified. " - "skipping roll-forward recovery\n"); + nilfs_info(sb, + "norecovery option specified, skipping roll-forward recovery"); goto skip_recovery; } features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (features) { - printk(KERN_ERR "NILFS: couldn't proceed with " - "recovery because of unsupported optional " - "features (%llx)\n", - (unsigned long long)features); + nilfs_err(sb, + "couldn't proceed with recovery because of unsupported optional features (%llx)", + (unsigned long long)features); err = -EROFS; goto failed_unload; } if (really_read_only) { - printk(KERN_ERR "NILFS: write access " - "unavailable, cannot proceed.\n"); + nilfs_err(sb, + "write access unavailable, cannot proceed"); err = -EROFS; goto failed_unload; } - sb->s_flags &= ~MS_RDONLY; + sb->s_flags &= ~SB_RDONLY; } else if (nilfs_test_opt(nilfs, NORECOVERY)) { - printk(KERN_ERR "NILFS: recovery cancelled because norecovery " - "option was specified for a read/write mount\n"); + nilfs_err(sb, + "recovery cancelled because norecovery option was specified for a read/write mount"); err = -EINVAL; goto failed_unload; } @@ -327,11 +359,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) up_write(&nilfs->ns_sem); if (err) { - printk(KERN_ERR "NILFS: failed to update super block. " - "recovery unfinished.\n"); + nilfs_err(sb, + "error %d updating super block. recovery unfinished.", + err); goto failed_unload; } - printk(KERN_INFO "NILFS: recovery complete.\n"); + nilfs_info(sb, "recovery complete"); skip_recovery: nilfs_clear_recovery_info(&ri); @@ -339,10 +372,13 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) return 0; scan_error: - printk(KERN_ERR "NILFS: error searching super root.\n"); + nilfs_err(sb, "error %d while searching super root", err); goto failed; failed_unload: + nilfs_sysfs_delete_device_group(nilfs); + + sysfs_error: iput(nilfs->ns_cpfile); iput(nilfs->ns_sufile); iput(nilfs->ns_dat); @@ -368,6 +404,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) * nilfs_nrsvsegs - calculate the number of reserved segments * @nilfs: nilfs object * @nsegs: total number of segments + * + * Return: Number of reserved segments. */ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) { @@ -376,6 +414,20 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) 100)); } +/** + * nilfs_max_segment_count - calculate the maximum number of segments + * @nilfs: nilfs object + * + * Return: Maximum number of segments + */ +static u64 nilfs_max_segment_count(struct the_nilfs *nilfs) +{ + u64 max_count = U64_MAX; + + max_count = div64_ul(max_count, nilfs->ns_blocks_per_segment); + return min_t(u64, max_count, ULONG_MAX); +} + void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) { nilfs->ns_nsegments = nsegs; @@ -385,13 +437,14 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { + u64 nsegments, nblocks; + if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { - printk(KERN_ERR "NILFS: unsupported revision " - "(superblock rev.=%d.%d, current rev.=%d.%d). " - "Please check the version of mkfs.nilfs.\n", - le32_to_cpu(sbp->s_rev_level), - le16_to_cpu(sbp->s_minor_rev_level), - NILFS_CURRENT_REV, NILFS_MINOR_REV); + nilfs_err(nilfs->ns_sb, + "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).", + le32_to_cpu(sbp->s_rev_level), + le16_to_cpu(sbp->s_minor_rev_level), + NILFS_CURRENT_REV, NILFS_MINOR_REV); return -EINVAL; } nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes); @@ -399,11 +452,28 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, return -EINVAL; nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); + if (nilfs->ns_inode_size > nilfs->ns_blocksize) { + nilfs_err(nilfs->ns_sb, "too large inode size: %d bytes", + nilfs->ns_inode_size); + return -EINVAL; + } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) { + nilfs_err(nilfs->ns_sb, "too small inode size: %d bytes", + nilfs->ns_inode_size); + return -EINVAL; + } + nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); + if (nilfs->ns_first_ino < NILFS_USER_INO) { + nilfs_err(nilfs->ns_sb, + "too small lower limit for non-reserved inode numbers: %u", + nilfs->ns_first_ino); + return -EINVAL; + } nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { - printk(KERN_ERR "NILFS: too short segment.\n"); + nilfs_err(nilfs->ns_sb, "too short segment: %lu blocks", + nilfs->ns_blocks_per_segment); return -EINVAL; } @@ -412,11 +482,40 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, le32_to_cpu(sbp->s_r_segments_percentage); if (nilfs->ns_r_segments_percentage < 1 || nilfs->ns_r_segments_percentage > 99) { - printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n"); + nilfs_err(nilfs->ns_sb, + "invalid reserved segments percentage: %lu", + nilfs->ns_r_segments_percentage); + return -EINVAL; + } + + nsegments = le64_to_cpu(sbp->s_nsegments); + if (nsegments > nilfs_max_segment_count(nilfs)) { + nilfs_err(nilfs->ns_sb, + "segment count %llu exceeds upper limit (%llu segments)", + (unsigned long long)nsegments, + (unsigned long long)nilfs_max_segment_count(nilfs)); return -EINVAL; } - nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); + nblocks = sb_bdev_nr_blocks(nilfs->ns_sb); + if (nblocks) { + u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment; + /* + * To avoid failing to mount early device images without a + * second superblock, exclude that block count from the + * "min_block_count" calculation. + */ + + if (nblocks < min_block_count) { + nilfs_err(nilfs->ns_sb, + "total number of segment blocks %llu exceeds device size (%llu blocks)", + (unsigned long long)min_block_count, + (unsigned long long)nblocks); + return -EINVAL; + } + } + + nilfs_set_nsegments(nilfs, nsegments); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; } @@ -431,7 +530,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) return 0; bytes = le16_to_cpu(sbp->s_bytes); - if (bytes > BLOCK_SIZE) + if (bytes < sumoff + 4 || bytes > BLOCK_SIZE) return 0; crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, sumoff); @@ -441,11 +540,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) return crc == le32_to_cpu(sbp->s_sum); } -static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) +/** + * nilfs_sb2_bad_offset - check the location of the second superblock + * @sbp: superblock raw data buffer + * @offset: byte offset of second superblock calculated from device size + * + * nilfs_sb2_bad_offset() checks if the position on the second + * superblock is valid or not based on the filesystem parameters + * stored in @sbp. If @offset points to a location within the segment + * area, or if the parameters themselves are not normal, it is + * determined to be invalid. + * + * Return: true if invalid, false if valid. + */ +static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) { - return offset < ((le64_to_cpu(sbp->s_nsegments) * - le32_to_cpu(sbp->s_blocks_per_segment)) << - (le32_to_cpu(sbp->s_log_block_size) + 10)); + unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size); + u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); + u64 nsegments = le64_to_cpu(sbp->s_nsegments); + u64 index; + + if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS || + shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS) + return true; + + index = offset >> (shift_bits + BLOCK_SIZE_BITS); + do_div(index, blocks_per_segment); + return index < nsegments; } static void nilfs_release_super_block(struct the_nilfs *nilfs) @@ -487,8 +608,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, { struct nilfs_super_block **sbp = nilfs->ns_sbp; struct buffer_head **sbh = nilfs->ns_sbh; - u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); - int valid[2], swp = 0; + u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev); + int valid[2], swp = 0, older; + + if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { + nilfs_err(sb, "device size too small"); + return -EINVAL; + } + sb2off = NILFS_SB2_OFFSET_BYTES(devsize); sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, &sbh[0]); @@ -496,16 +623,16 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, if (!sbp[0]) { if (!sbp[1]) { - printk(KERN_ERR "NILFS: unable to read superblock\n"); + nilfs_err(sb, "unable to read superblock"); return -EIO; } - printk(KERN_WARNING - "NILFS warning: unable to read primary superblock " - "(blocksize = %d)\n", blocksize); + nilfs_warn(sb, + "unable to read primary superblock (blocksize = %d)", + blocksize); } else if (!sbp[1]) { - printk(KERN_WARNING - "NILFS warning: unable to read secondary superblock " - "(blocksize = %d)\n", blocksize); + nilfs_warn(sb, + "unable to read secondary superblock (blocksize = %d)", + blocksize); } /* @@ -527,20 +654,36 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, } if (!valid[swp]) { nilfs_release_super_block(nilfs); - printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n", - sb->s_id); + nilfs_err(sb, "couldn't find nilfs on the device"); return -EINVAL; } if (!valid[!swp]) - printk(KERN_WARNING "NILFS warning: broken superblock. " - "using spare superblock (blocksize = %d).\n", blocksize); + nilfs_warn(sb, + "broken superblock, retrying with spare superblock (blocksize = %d)", + blocksize); if (swp) nilfs_swap_super_block(nilfs); + /* + * Calculate the array index of the older superblock data. + * If one has been dropped, set index 0 pointing to the remaining one, + * otherwise set index 1 pointing to the old one (including if both + * are the same). + * + * Divided case valid[0] valid[1] swp -> older + * ------------------------------------------------------------- + * Both SBs are invalid 0 0 N/A (Error) + * SB1 is invalid 0 1 1 0 + * SB2 is invalid 1 0 0 0 + * SB2 is newer 1 1 1 0 + * SB2 is older or the same 1 1 0 1 + */ + older = valid[1] ^ swp; + nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); - nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); + nilfs->ns_prot_seq = le64_to_cpu(sbp[older]->s_last_seq); *sbpp = sbp[0]; return 0; } @@ -549,26 +692,22 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure * @sb: super block - * @data: mount options * * init_nilfs() performs common initialization per block device (e.g. * reading the super block, getting disk layout information, initializing * shared fields in the_nilfs). * - * Return Value: On success, 0 is returned. On error, a negative error - * code is returned. + * Return: 0 on success, or a negative error code on failure. */ -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { struct nilfs_super_block *sbp; int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { - printk(KERN_ERR "NILFS: unable to set blocksize\n"); + nilfs_err(sb, "unable to set blocksize"); err = -EINVAL; goto out; } @@ -576,7 +715,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto out; - err = nilfs_store_magic_and_option(sb, sbp, data); + err = nilfs_store_magic(sb, sbp); if (err) goto failed_sbh; @@ -584,11 +723,14 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto failed_sbh; - blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); - if (blocksize < NILFS_MIN_BLOCK_SIZE || - blocksize > NILFS_MAX_BLOCK_SIZE) { - printk(KERN_ERR "NILFS: couldn't mount because of unsupported " - "filesystem blocksize %d\n", blocksize); + err = nilfs_get_blocksize(sb, sbp, &blocksize); + if (err) + goto failed_sbh; + + if (blocksize < NILFS_MIN_BLOCK_SIZE) { + nilfs_err(sb, + "couldn't mount because of unsupported filesystem blocksize %d", + blocksize); err = -EINVAL; goto failed_sbh; } @@ -596,28 +738,30 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) int hw_blocksize = bdev_logical_block_size(sb->s_bdev); if (blocksize < hw_blocksize) { - printk(KERN_ERR - "NILFS: blocksize %d too small for device " - "(sector-size = %d).\n", - blocksize, hw_blocksize); + nilfs_err(sb, + "blocksize %d too small for device (sector-size = %d)", + blocksize, hw_blocksize); err = -EINVAL; goto failed_sbh; } nilfs_release_super_block(nilfs); - sb_set_blocksize(sb, blocksize); + if (!sb_set_blocksize(sb, blocksize)) { + nilfs_err(sb, "bad blocksize %d", blocksize); + err = -EINVAL; + goto out; + } err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); if (err) goto out; - /* not failed_sbh; sbh is released automatically - when reloading fails. */ + /* + * Not to failed_sbh; sbh is released automatically + * when reloading fails. + */ } nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; - get_random_bytes(&nilfs->ns_next_generation, - sizeof(nilfs->ns_next_generation)); - err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; @@ -633,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: @@ -664,7 +807,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); if (ret < 0) return ret; nblocks = 0; @@ -674,7 +817,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); return ret; } @@ -682,9 +825,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { unsigned long ncleansegs; - down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); - up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; return 0; } @@ -715,7 +856,7 @@ struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno) } else if (cno > root->cno) { n = n->rb_right; } else { - atomic_inc(&root->count); + refcount_inc(&root->count); spin_unlock(&nilfs->ns_cptree_lock); return root; } @@ -730,12 +871,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) { struct rb_node **p, *parent; struct nilfs_root *root, *new; + int err; root = nilfs_lookup_root(nilfs, cno); if (root) return root; - new = kmalloc(sizeof(*root), GFP_KERNEL); + new = kzalloc(sizeof(*root), GFP_KERNEL); if (!new) return NULL; @@ -753,7 +895,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) } else if (cno > root->cno) { p = &(*p)->rb_right; } else { - atomic_inc(&root->count); + refcount_inc(&root->count); spin_unlock(&nilfs->ns_cptree_lock); kfree(new); return root; @@ -763,7 +905,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) new->cno = cno; new->ifile = NULL; new->nilfs = nilfs; - atomic_set(&new->count, 1); + refcount_set(&new->count, 1); atomic64_set(&new->inodes_count, 0); atomic64_set(&new->blocks_count, 0); @@ -772,19 +914,25 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) spin_unlock(&nilfs->ns_cptree_lock); + err = nilfs_sysfs_create_snapshot_group(new); + if (err) { + kfree(new); + new = NULL; + } + return new; } void nilfs_put_root(struct nilfs_root *root) { - if (atomic_dec_and_test(&root->count)) { - struct the_nilfs *nilfs = root->nilfs; + struct the_nilfs *nilfs = root->nilfs; - spin_lock(&nilfs->ns_cptree_lock); + if (refcount_dec_and_lock(&root->count, &nilfs->ns_cptree_lock)) { rb_erase(&root->rb_node, &nilfs->ns_cptree); spin_unlock(&nilfs->ns_cptree_lock); - if (root->ifile) - iput(root->ifile); + + nilfs_sysfs_delete_snapshot_group(root); + iput(root->ifile); kfree(root); } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index de8cc53b4a5c..4776a70f01ae 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* - * the_nilfs.h - the_nilfs shared structure. + * the_nilfs shared structure. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> + * Written by Ryusuke Konishi. * */ @@ -31,8 +18,10 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/slab.h> +#include <linux/refcount.h> struct nilfs_sc_info; +struct nilfs_sysfs_dev_subgroups; /* the_nilfs struct */ enum { @@ -40,11 +29,14 @@ enum { THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_SB_DIRTY, /* super block is dirty */ + THE_NILFS_PURGING, /* disposing dirty files for cleanup */ }; /** * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags + * @ns_flushed_device: flag indicating if all volatile data was flushed + * @ns_sb: back pointer to super block instance * @ns_bdev: block device * @ns_sem: semaphore for shared states * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts @@ -54,6 +46,7 @@ enum { * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block * @ns_mount_state: file system state + * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds) * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next @@ -78,8 +71,6 @@ enum { * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_next_generation: next generation number for inodes - * @ns_next_gen_lock: lock protecting @ns_next_generation * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks @@ -95,10 +86,15 @@ enum { * @ns_inode_size: size of on-disk inode * @ns_first_ino: first not-special inode number * @ns_crc_seed: seed value of CRC32 calculation + * @ns_dev_kobj: /sys/fs/<nilfs>/<device> + * @ns_dev_kobj_unregister: completion state + * @ns_dev_subgroups: <device> subgroups pointer */ struct the_nilfs { unsigned long ns_flags; + int ns_flushed_device; + struct super_block *ns_sb; struct block_device *ns_bdev; struct rw_semaphore ns_sem; struct mutex ns_snapshot_mount_mutex; @@ -110,25 +106,23 @@ struct the_nilfs { */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; - time_t ns_sbwtime; - unsigned ns_sbwcount; - unsigned ns_sbsize; - unsigned ns_mount_state; + time64_t ns_sbwtime; + unsigned int ns_sbwcount; + unsigned int ns_sbsize; + unsigned int ns_mount_state; + unsigned int ns_sb_update_freq; /* - * Following fields are dedicated to a writable FS-instance. - * Except for the period seeking checkpoint, code outside the segment - * constructor must lock a segment semaphore while accessing these - * fields. - * The writable FS-instance is sole during a lifetime of the_nilfs. + * The following fields are updated by a writable FS-instance. + * These fields are protected by ns_segctor_sem outside load_nilfs(). */ u64 ns_seg_seq; __u64 ns_segnum; __u64 ns_nextnum; unsigned long ns_pseg_offset; __u64 ns_cno; - time_t ns_ctime; - time_t ns_nongc_ctime; + time64_t ns_ctime; + time64_t ns_nongc_ctime; atomic_t ns_ndirtyblks; /* @@ -165,10 +159,6 @@ struct the_nilfs { /* GC inode list */ struct list_head ns_gc_inodes; - /* Inode allocator */ - u32 ns_next_generation; - spinlock_t ns_next_gen_lock; - /* Mount options */ unsigned long ns_mount_opt; @@ -186,8 +176,13 @@ struct the_nilfs { unsigned long ns_nrsvsegs; unsigned long ns_first_data_block; int ns_inode_size; - int ns_first_ino; + unsigned int ns_first_ino; u32 ns_crc_seed; + + /* /sys/fs/<nilfs>/<device> */ + struct kobject ns_dev_kobj; + struct completion ns_dev_kobj_unregister; + struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups; }; #define THE_NILFS_FNS(bit, name) \ @@ -208,20 +203,16 @@ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) +THE_NILFS_FNS(PURGING, purging) /* * Mount option operations */ #define nilfs_clear_opt(nilfs, opt) \ - do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) + ((nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt) #define nilfs_set_opt(nilfs, opt) \ - do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0) + ((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt) #define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt) -#define nilfs_write_opt(nilfs, mask, opt) \ - do { (nilfs)->ns_mount_opt = \ - (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \ - NILFS_MOUNT_##opt); \ - } while (0) /** * struct nilfs_root - nilfs root object @@ -232,17 +223,23 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty) * @ifile: inode file * @inodes_count: number of inodes * @blocks_count: number of blocks + * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> + * @snapshot_kobj_unregister: completion state for kernel object */ struct nilfs_root { __u64 cno; struct rb_node rb_node; - atomic_t count; + refcount_t count; struct the_nilfs *nilfs; struct inode *ifile; atomic64_t inodes_count; atomic64_t blocks_count; + + /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */ + struct kobject snapshot_kobj; + struct completion snapshot_kobj_unregister; }; /* Special checkpoint number */ @@ -253,20 +250,23 @@ struct nilfs_root { static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) { - u64 t = get_seconds(); - return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ; + u64 t = ktime_get_real_seconds(); + + return t < nilfs->ns_sbwtime || + t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq; } static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) { int flip_bits = nilfs->ns_sbwcount & 0x0FL; + return (flip_bits != 0x08 && flip_bits != 0x0F); } void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); -struct the_nilfs *alloc_nilfs(struct block_device *bdev); +struct the_nilfs *alloc_nilfs(struct super_block *sb); void destroy_nilfs(struct the_nilfs *nilfs); -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb); int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); @@ -283,12 +283,12 @@ void nilfs_swap_super_block(struct the_nilfs *); static inline void nilfs_get_root(struct nilfs_root *root) { - atomic_inc(&root->count); + refcount_inc(&root->count); } static inline int nilfs_valid_fs(struct the_nilfs *nilfs) { - unsigned valid_fs; + unsigned int valid_fs; down_read(&nilfs->ns_sem); valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); @@ -353,4 +353,24 @@ static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n) return n == nilfs->ns_segnum || n == nilfs->ns_nextnum; } +static inline int nilfs_flush_device(struct the_nilfs *nilfs) +{ + int err; + + if (!nilfs_test_opt(nilfs, BARRIER) || nilfs->ns_flushed_device) + return 0; + + nilfs->ns_flushed_device = 1; + /* + * the store to ns_flushed_device must not be reordered after + * blkdev_issue_flush(). + */ + smp_wmb(); + + err = blkdev_issue_flush(nilfs->ns_bdev); + if (err != -EIO) + err = 0; + return err; +} + #endif /* _THE_NILFS_H */ |
