summaryrefslogtreecommitdiff
path: root/fs/nilfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nilfs2')
-rw-r--r--fs/nilfs2/Kconfig2
-rw-r--r--fs/nilfs2/alloc.c276
-rw-r--r--fs/nilfs2/alloc.h12
-rw-r--r--fs/nilfs2/bmap.c155
-rw-r--r--fs/nilfs2/bmap.h20
-rw-r--r--fs/nilfs2/btnode.c168
-rw-r--r--fs/nilfs2/btree.c76
-rw-r--r--fs/nilfs2/btree.h1
-rw-r--r--fs/nilfs2/cpfile.c715
-rw-r--r--fs/nilfs2/cpfile.h10
-rw-r--r--fs/nilfs2/dat.c202
-rw-r--r--fs/nilfs2/dir.c382
-rw-r--r--fs/nilfs2/direct.c13
-rw-r--r--fs/nilfs2/file.c46
-rw-r--r--fs/nilfs2/gcinode.c43
-rw-r--r--fs/nilfs2/ifile.c71
-rw-r--r--fs/nilfs2/ifile.h12
-rw-r--r--fs/nilfs2/inode.c296
-rw-r--r--fs/nilfs2/ioctl.c417
-rw-r--r--fs/nilfs2/mdt.c195
-rw-r--r--fs/nilfs2/namei.c143
-rw-r--r--fs/nilfs2/nilfs.h76
-rw-r--r--fs/nilfs2/page.c307
-rw-r--r--fs/nilfs2/page.h29
-rw-r--r--fs/nilfs2/recovery.c148
-rw-r--r--fs/nilfs2/segbuf.c35
-rw-r--r--fs/nilfs2/segment.c820
-rw-r--r--fs/nilfs2/segment.h11
-rw-r--r--fs/nilfs2/sufile.c386
-rw-r--r--fs/nilfs2/sufile.h22
-rw-r--r--fs/nilfs2/super.c519
-rw-r--r--fs/nilfs2/sysfs.c53
-rw-r--r--fs/nilfs2/sysfs.h8
-rw-r--r--fs/nilfs2/the_nilfs.c134
-rw-r--r--fs/nilfs2/the_nilfs.h16
35 files changed, 3248 insertions, 2571 deletions
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 254d102e79c9..7dae168e346e 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,7 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
config NILFS2_FS
tristate "NILFS2 file system support"
+ select BUFFER_HEAD
select CRC32
+ select LEGACY_DIRECT_IO
help
NILFS2 is a log-structured file system (LFS) supporting continuous
snapshotting. In addition to versioning capability of the entire
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 6ce8617b562d..6b506995818d 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -21,6 +21,8 @@
* nilfs_palloc_groups_per_desc_block - get the number of groups that a group
* descriptor block can maintain
* @inode: inode of metadata file using this allocator
+ *
+ * Return: Number of groups that a group descriptor block can maintain.
*/
static inline unsigned long
nilfs_palloc_groups_per_desc_block(const struct inode *inode)
@@ -32,6 +34,8 @@ nilfs_palloc_groups_per_desc_block(const struct inode *inode)
/**
* nilfs_palloc_groups_count - get maximum number of groups
* @inode: inode of metadata file using this allocator
+ *
+ * Return: Maximum number of groups.
*/
static inline unsigned long
nilfs_palloc_groups_count(const struct inode *inode)
@@ -43,6 +47,8 @@ nilfs_palloc_groups_count(const struct inode *inode)
* nilfs_palloc_init_blockgroup - initialize private variables for allocator
* @inode: inode of metadata file using this allocator
* @entry_size: size of the persistent object
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size)
{
@@ -78,6 +84,9 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size)
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
* @offset: pointer to store offset number in the group
+ *
+ * Return: Number of the group that contains the entry with the index
+ * specified by @nr.
*/
static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
unsigned long *offset)
@@ -93,8 +102,8 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
* @inode: inode of metadata file using this allocator
* @group: group number
*
- * nilfs_palloc_desc_blkoff() returns block offset of the descriptor
- * block which contains a descriptor of the specified group.
+ * Return: Index number in the metadata file of the descriptor block of
+ * the group specified by @group.
*/
static unsigned long
nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
@@ -111,6 +120,9 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
*
* nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap
* block used to allocate/deallocate entries in the specified group.
+ *
+ * Return: Index number in the metadata file of the bitmap block of
+ * the group specified by @group.
*/
static unsigned long
nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
@@ -125,6 +137,8 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
* nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
* @desc: pointer to descriptor structure for the group
* @lock: spin lock protecting @desc
+ *
+ * Return: Number of free entries written in the group descriptor @desc.
*/
static unsigned long
nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc,
@@ -143,6 +157,9 @@ nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc,
* @desc: pointer to descriptor structure for the group
* @lock: spin lock protecting @desc
* @n: delta to be added
+ *
+ * Return: Number of free entries after adjusting the group descriptor
+ * @desc.
*/
static u32
nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc,
@@ -161,6 +178,9 @@ nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc,
* nilfs_palloc_entry_blkoff - get block offset of an entry block
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
+ *
+ * Return: Index number in the metadata file of the block containing
+ * the entry specified by @nr.
*/
static unsigned long
nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
@@ -177,12 +197,14 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
* nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block
* @inode: inode of metadata file
* @bh: buffer head of the buffer to be initialized
- * @kaddr: kernel address mapped for the page including the buffer
+ * @from: kernel address mapped for a chunk of the block
+ *
+ * This function does not yet support the case where block size > PAGE_SIZE.
*/
static void nilfs_palloc_desc_block_init(struct inode *inode,
- struct buffer_head *bh, void *kaddr)
+ struct buffer_head *bh, void *from)
{
- struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh);
+ struct nilfs_palloc_group_desc *desc = from;
unsigned long n = nilfs_palloc_groups_per_desc_block(inode);
__le32 nfrees;
@@ -205,7 +227,8 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
int ret;
spin_lock(lock);
- if (prev->bh && blkoff == prev->blkoff) {
+ if (prev->bh && blkoff == prev->blkoff &&
+ likely(buffer_uptodate(prev->bh))) {
get_bh(prev->bh);
*bhp = prev->bh;
spin_unlock(lock);
@@ -235,6 +258,12 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
* @blkoff: block offset
* @prev: nilfs_bh_assoc struct of the last used buffer
* @lock: spin lock protecting @prev
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Non-existent block.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff,
struct nilfs_bh_assoc *prev,
@@ -255,6 +284,8 @@ static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff,
* @group: group number
* @create: create flag
* @bhp: pointer to store the resultant buffer head
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_get_desc_block(struct inode *inode,
unsigned long group,
@@ -274,6 +305,8 @@ static int nilfs_palloc_get_desc_block(struct inode *inode,
* @group: group number
* @create: create flag
* @bhp: pointer to store the resultant buffer head
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_get_bitmap_block(struct inode *inode,
unsigned long group,
@@ -291,6 +324,8 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode,
* nilfs_palloc_delete_bitmap_block - delete a bitmap block
* @inode: inode of metadata file using this allocator
* @group: group number
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_delete_bitmap_block(struct inode *inode,
unsigned long group)
@@ -309,6 +344,8 @@ static int nilfs_palloc_delete_bitmap_block(struct inode *inode,
* @nr: serial number of the entry (e.g. inode number)
* @create: create flag
* @bhp: pointer to store the resultant buffer head
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
int create, struct buffer_head **bhp)
@@ -325,6 +362,8 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
* nilfs_palloc_delete_entry_block - delete an entry block
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr)
{
@@ -336,38 +375,55 @@ static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr)
}
/**
- * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
+ * nilfs_palloc_group_desc_offset - calculate the byte offset of a group
+ * descriptor in the folio containing it
* @inode: inode of metadata file using this allocator
* @group: group number
- * @bh: buffer head of the buffer storing the group descriptor block
- * @kaddr: kernel address mapped for the page including the buffer
+ * @bh: buffer head of the group descriptor block
+ *
+ * Return: Byte offset in the folio of the group descriptor for @group.
*/
-static struct nilfs_palloc_group_desc *
-nilfs_palloc_block_get_group_desc(const struct inode *inode,
- unsigned long group,
- const struct buffer_head *bh, void *kaddr)
+static size_t nilfs_palloc_group_desc_offset(const struct inode *inode,
+ unsigned long group,
+ const struct buffer_head *bh)
{
- return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) +
- group % nilfs_palloc_groups_per_desc_block(inode);
+ return offset_in_folio(bh->b_folio, bh->b_data) +
+ sizeof(struct nilfs_palloc_group_desc) *
+ (group % nilfs_palloc_groups_per_desc_block(inode));
+}
+
+/**
+ * nilfs_palloc_bitmap_offset - calculate the byte offset of a bitmap block
+ * in the folio containing it
+ * @bh: buffer head of the bitmap block
+ *
+ * Return: Byte offset in the folio of the bitmap block for @bh.
+ */
+static size_t nilfs_palloc_bitmap_offset(const struct buffer_head *bh)
+{
+ return offset_in_folio(bh->b_folio, bh->b_data);
}
/**
- * nilfs_palloc_block_get_entry - get kernel address of an entry
+ * nilfs_palloc_entry_offset - calculate the byte offset of an entry in the
+ * folio containing it
* @inode: inode of metadata file using this allocator
- * @nr: serial number of the entry (e.g. inode number)
- * @bh: buffer head of the buffer storing the entry block
- * @kaddr: kernel address mapped for the page including the buffer
+ * @nr: serial number of the entry (e.g. inode number)
+ * @bh: buffer head of the entry block
+ *
+ * Return: Byte offset in the folio of the entry @nr.
*/
-void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
- const struct buffer_head *bh, void *kaddr)
+size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr,
+ const struct buffer_head *bh)
{
- unsigned long entry_offset, group_offset;
+ unsigned long entry_index_in_group, entry_index_in_block;
- nilfs_palloc_group(inode, nr, &group_offset);
- entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block;
+ nilfs_palloc_group(inode, nr, &entry_index_in_group);
+ entry_index_in_block = entry_index_in_group %
+ NILFS_MDT(inode)->mi_entries_per_block;
- return kaddr + bh_offset(bh) +
- entry_offset * NILFS_MDT(inode)->mi_entry_size;
+ return offset_in_folio(bh->b_folio, bh->b_data) +
+ entry_index_in_block * NILFS_MDT(inode)->mi_entry_size;
}
/**
@@ -376,11 +432,15 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
* @target: offset number of an entry in the group (start point)
* @bsize: size in bits
* @lock: spin lock protecting @bitmap
+ * @wrap: whether to wrap around
+ *
+ * Return: Offset number within the group of the found free entry, or
+ * %-ENOSPC if not found.
*/
static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
unsigned long target,
unsigned int bsize,
- spinlock_t *lock)
+ spinlock_t *lock, bool wrap)
{
int pos, end = bsize;
@@ -396,6 +456,8 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
end = target;
}
+ if (!wrap)
+ return -ENOSPC;
/* wrap around */
for (pos = 0; pos < end; pos++) {
@@ -415,6 +477,9 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
* @inode: inode of metadata file using this allocator
* @curr: current group number
* @max: maximum number of groups
+ *
+ * Return: Number of remaining descriptors (= groups) managed by the descriptor
+ * block.
*/
static unsigned long
nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
@@ -430,6 +495,8 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
* nilfs_palloc_count_desc_blocks - count descriptor blocks number
* @inode: inode of metadata file using this allocator
* @desc_blocks: descriptor blocks number [out]
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_count_desc_blocks(struct inode *inode,
unsigned long *desc_blocks)
@@ -450,6 +517,8 @@ static int nilfs_palloc_count_desc_blocks(struct inode *inode,
* MDT file growing
* @inode: inode of metadata file using this allocator
* @desc_blocks: known current descriptor blocks count
+ *
+ * Return: true if a group can be added in the metadata file, false if not.
*/
static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode,
unsigned long desc_blocks)
@@ -464,6 +533,12 @@ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode,
* @inode: inode of metadata file using this allocator
* @nused: current number of used entries
* @nmaxp: max number of entries [out]
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ERANGE - Number of entries in use is out of range.
*/
int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
{
@@ -494,14 +569,22 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
* nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
+ * @wrap: whether to wrap around
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - Entries exhausted (No entries available for allocation).
+ * * %-EROFS - Read only filesystem
*/
int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
- struct nilfs_palloc_req *req)
+ struct nilfs_palloc_req *req, bool wrap)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
- void *desc_kaddr, *bitmap_kaddr;
+ size_t doff, boff;
unsigned long group, maxgroup, ngroups;
unsigned long group_offset, maxgroup_offset;
unsigned long n, entries_per_group;
@@ -515,7 +598,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
entries_per_group = nilfs_palloc_entries_per_group(inode);
for (i = 0; i < ngroups; i += n) {
- if (group >= ngroups) {
+ if (group >= ngroups && wrap) {
/* wrap around */
group = 0;
maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
@@ -524,54 +607,64 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
if (ret < 0)
return ret;
- desc_kaddr = kmap(desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(
- inode, group, desc_bh, desc_kaddr);
+
+ doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh);
+ desc = kmap_local_folio(desc_bh->b_folio, doff);
n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
maxgroup);
- for (j = 0; j < n; j++, desc++, group++) {
+ for (j = 0; j < n; j++, group++, group_offset = 0) {
lock = nilfs_mdt_bgl_lock(inode, group);
- if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) {
- ret = nilfs_palloc_get_bitmap_block(
- inode, group, 1, &bitmap_bh);
- if (ret < 0)
- goto out_desc;
- bitmap_kaddr = kmap(bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
- pos = nilfs_palloc_find_available_slot(
- bitmap, group_offset,
- entries_per_group, lock);
- if (pos >= 0) {
- /* found a free entry */
- nilfs_palloc_group_desc_add_entries(
- desc, lock, -1);
- req->pr_entry_nr =
- entries_per_group * group + pos;
- kunmap(desc_bh->b_page);
- kunmap(bitmap_bh->b_page);
-
- req->pr_desc_bh = desc_bh;
- req->pr_bitmap_bh = bitmap_bh;
- return 0;
- }
- kunmap(bitmap_bh->b_page);
- brelse(bitmap_bh);
+ if (nilfs_palloc_group_desc_nfrees(&desc[j], lock) == 0)
+ continue;
+
+ kunmap_local(desc);
+ ret = nilfs_palloc_get_bitmap_block(inode, group, 1,
+ &bitmap_bh);
+ if (unlikely(ret < 0)) {
+ brelse(desc_bh);
+ return ret;
}
- group_offset = 0;
+ /*
+ * Re-kmap the folio containing the first (and
+ * subsequent) group descriptors.
+ */
+ desc = kmap_local_folio(desc_bh->b_folio, doff);
+
+ boff = nilfs_palloc_bitmap_offset(bitmap_bh);
+ bitmap = kmap_local_folio(bitmap_bh->b_folio, boff);
+ pos = nilfs_palloc_find_available_slot(
+ bitmap, group_offset, entries_per_group, lock,
+ wrap);
+ /*
+ * Since the search for a free slot in the second and
+ * subsequent bitmap blocks always starts from the
+ * beginning, the wrap flag only has an effect on the
+ * first search.
+ */
+ kunmap_local(bitmap);
+ if (pos >= 0)
+ goto found;
+
+ brelse(bitmap_bh);
}
- kunmap(desc_bh->b_page);
+ kunmap_local(desc);
brelse(desc_bh);
}
/* no entries left */
return -ENOSPC;
- out_desc:
- kunmap(desc_bh->b_page);
- brelse(desc_bh);
- return ret;
+found:
+ /* found a free entry */
+ nilfs_palloc_group_desc_add_entries(&desc[j], lock, -1);
+ req->pr_entry_nr = entries_per_group * group + pos;
+ kunmap_local(desc);
+
+ req->pr_desc_bh = desc_bh;
+ req->pr_bitmap_bh = bitmap_bh;
+ return 0;
}
/**
@@ -598,18 +691,18 @@ void nilfs_palloc_commit_alloc_entry(struct inode *inode,
void nilfs_palloc_commit_free_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
- struct nilfs_palloc_group_desc *desc;
unsigned long group, group_offset;
+ size_t doff, boff;
+ struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
- void *desc_kaddr, *bitmap_kaddr;
spinlock_t *lock;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
- desc_kaddr = kmap(req->pr_desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(inode, group,
- req->pr_desc_bh, desc_kaddr);
- bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
+ doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh);
+ desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff);
+
+ boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh);
+ bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff);
lock = nilfs_mdt_bgl_lock(inode, group);
if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
@@ -620,8 +713,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
else
nilfs_palloc_group_desc_add_entries(desc, lock, 1);
- kunmap(req->pr_bitmap_bh->b_page);
- kunmap(req->pr_desc_bh->b_page);
+ kunmap_local(bitmap);
+ kunmap_local(desc);
mark_buffer_dirty(req->pr_desc_bh);
mark_buffer_dirty(req->pr_bitmap_bh);
@@ -640,17 +733,17 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
struct nilfs_palloc_group_desc *desc;
- void *desc_kaddr, *bitmap_kaddr;
+ size_t doff, boff;
unsigned char *bitmap;
unsigned long group, group_offset;
spinlock_t *lock;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
- desc_kaddr = kmap(req->pr_desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(inode, group,
- req->pr_desc_bh, desc_kaddr);
- bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
+ doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh);
+ desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff);
+
+ boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh);
+ bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff);
lock = nilfs_mdt_bgl_lock(inode, group);
if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
@@ -661,8 +754,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
else
nilfs_palloc_group_desc_add_entries(desc, lock, 1);
- kunmap(req->pr_bitmap_bh->b_page);
- kunmap(req->pr_desc_bh->b_page);
+ kunmap_local(bitmap);
+ kunmap_local(desc);
brelse(req->pr_bitmap_bh);
brelse(req->pr_desc_bh);
@@ -676,6 +769,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
* nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the removal
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_palloc_prepare_free_entry(struct inode *inode,
struct nilfs_palloc_req *req)
@@ -720,13 +815,15 @@ void nilfs_palloc_abort_free_entry(struct inode *inode,
* @inode: inode of metadata file using this allocator
* @entry_nrs: array of entry numbers to be deallocated
* @nitems: number of entries stored in @entry_nrs
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
- void *desc_kaddr, *bitmap_kaddr;
+ size_t doff, boff;
unsigned long group, group_offset;
__u64 group_min_nr, last_nrs[8];
const unsigned long epg = nilfs_palloc_entries_per_group(inode);
@@ -754,8 +851,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
/* Get the first entry number of the group */
group_min_nr = (__u64)group * epg;
- bitmap_kaddr = kmap(bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
+ boff = nilfs_palloc_bitmap_offset(bitmap_bh);
+ bitmap = kmap_local_folio(bitmap_bh->b_folio, boff);
lock = nilfs_mdt_bgl_lock(inode, group);
j = i;
@@ -800,7 +897,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
entry_start = rounddown(group_offset, epb);
} while (true);
- kunmap(bitmap_bh->b_page);
+ kunmap_local(bitmap);
mark_buffer_dirty(bitmap_bh);
brelse(bitmap_bh);
@@ -814,11 +911,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
inode->i_ino);
}
- desc_kaddr = kmap_atomic(desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(
- inode, group, desc_bh, desc_kaddr);
+ doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh);
+ desc = kmap_local_folio(desc_bh->b_folio, doff);
nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n);
- kunmap_atomic(desc_kaddr);
+ kunmap_local(desc);
mark_buffer_dirty(desc_bh);
nilfs_mdt_mark_dirty(inode);
brelse(desc_bh);
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index b667e869ac07..046d876ea3e0 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -21,6 +21,8 @@
*
* The number of entries per group is defined by the number of bits
* that a bitmap block can maintain.
+ *
+ * Return: Number of entries per group.
*/
static inline unsigned long
nilfs_palloc_entries_per_group(const struct inode *inode)
@@ -31,13 +33,13 @@ nilfs_palloc_entries_per_group(const struct inode *inode)
int nilfs_palloc_init_blockgroup(struct inode *, unsigned int);
int nilfs_palloc_get_entry_block(struct inode *, __u64, int,
struct buffer_head **);
-void *nilfs_palloc_block_get_entry(const struct inode *, __u64,
- const struct buffer_head *, void *);
+size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr,
+ const struct buffer_head *bh);
int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *);
/**
- * nilfs_palloc_req - persistent allocator request and reply
+ * struct nilfs_palloc_req - persistent allocator request and reply
* @pr_entry_nr: entry number (vblocknr or inode number)
* @pr_desc_bh: buffer head of the buffer containing block group descriptors
* @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap
@@ -50,8 +52,8 @@ struct nilfs_palloc_req {
struct buffer_head *pr_entry_bh;
};
-int nilfs_palloc_prepare_alloc_entry(struct inode *,
- struct nilfs_palloc_req *);
+int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
+ struct nilfs_palloc_req *req, bool wrap);
void nilfs_palloc_commit_alloc_entry(struct inode *,
struct nilfs_palloc_req *);
void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 798a2c1b38c6..ccc1a7aa52d2 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -47,17 +47,14 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
* @ptrp: place to store the value associated to @key
*
* Description: nilfs_bmap_lookup_at_level() finds a record whose key
- * matches @key in the block at @level of the bmap.
- *
- * Return Value: On success, 0 is returned and the record associated with @key
- * is stored in the place pointed by @ptrp. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - A record associated with @key does not exist.
+ * matches @key in the block at @level of the bmap. The record associated
+ * with @key is stored in the place pointed to by @ptrp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - A record associated with @key does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
__u64 *ptrp)
@@ -67,20 +64,28 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
- if (ret < 0) {
- ret = nilfs_bmap_convert_error(bmap, __func__, ret);
+ if (ret < 0)
goto out;
- }
+
if (NILFS_BMAP_USE_VBN(bmap)) {
ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
&blocknr);
if (!ret)
*ptrp = blocknr;
+ else if (ret == -ENOENT) {
+ /*
+ * If there was no valid entry in DAT for the block
+ * address obtained by b_ops->bop_lookup, then pass
+ * internal code -EINVAL to nilfs_bmap_convert_error
+ * to treat it as metadata corruption.
+ */
+ ret = -EINVAL;
+ }
}
out:
up_read(&bmap->b_sem);
- return ret;
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
@@ -130,14 +135,11 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
* Description: nilfs_bmap_insert() inserts the new key-record pair specified
* by @key and @rec into @bmap.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EEXIST - A record associated with @key already exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EEXIST - A record associated with @key already exists.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec)
{
@@ -185,14 +187,11 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
* Description: nilfs_bmap_seek_key() seeks a valid key on @bmap
* starting from @start, and stores it to @keyp if found.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - No valid entry was found
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No valid entry was found.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp)
{
@@ -228,14 +227,11 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp)
* Description: nilfs_bmap_delete() deletes the key-record pair specified by
* @key from @bmap.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - A record associated with @key does not exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - A record associated with @key does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key)
{
@@ -282,12 +278,10 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key)
* Description: nilfs_bmap_truncate() removes key-record pairs whose keys are
* greater than or equal to @key from @bmap.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key)
{
@@ -322,12 +316,10 @@ void nilfs_bmap_clear(struct nilfs_bmap *bmap)
* Description: nilfs_bmap_propagate() marks the buffers that directly or
* indirectly refer to the block specified by @bh dirty.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
{
@@ -341,7 +333,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
}
/**
- * nilfs_bmap_lookup_dirty_buffers -
+ * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers
* @bmap: bmap
* @listp: pointer to buffer head list
*/
@@ -354,22 +346,22 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap,
/**
* nilfs_bmap_assign - assign a new block number to a block
- * @bmap: bmap
- * @bh: pointer to buffer head
+ * @bmap: bmap
+ * @bh: place to store a pointer to the buffer head to which a block
+ * address is assigned (in/out)
* @blocknr: block number
- * @binfo: block information
+ * @binfo: block information
*
* Description: nilfs_bmap_assign() assigns the block number @blocknr to the
- * buffer specified by @bh.
- *
- * Return Value: On success, 0 is returned and the buffer head of a newly
- * create buffer and the block information associated with the buffer are
- * stored in the place pointed by @bh and @binfo, respectively. On error, one
- * of the following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * buffer specified by @bh. The block information is stored in the memory
+ * pointed to by @binfo, and the buffer head may be replaced as a block
+ * address is assigned, in which case a pointer to the new buffer head is
+ * stored in the memory pointed to by @bh.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_assign(struct nilfs_bmap *bmap,
struct buffer_head **bh,
@@ -394,12 +386,10 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap,
* Description: nilfs_bmap_mark() marks the block specified by @key and @level
* as dirty.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
{
@@ -422,7 +412,7 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
* Description: nilfs_test_and_clear() is the atomic operation to test and
* clear the dirty state of @bmap.
*
- * Return Value: 1 is returned if @bmap is dirty, or 0 if clear.
+ * Return: 1 if @bmap is dirty, or 0 if clear.
*/
int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
{
@@ -442,15 +432,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
const struct buffer_head *bh)
{
- struct buffer_head *pbh;
- __u64 key;
-
- key = page_index(bh->b_page) << (PAGE_SHIFT -
- bmap->b_inode->i_blkbits);
- for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
- key++;
+ loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh);
- return key;
+ return pos >> bmap->b_inode->i_blkbits;
}
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
@@ -488,10 +472,10 @@ static struct lock_class_key nilfs_bmap_mdt_lock_key;
*
* Description: nilfs_bmap_read() initializes the bmap @bmap.
*
- * Return Value: On success, 0 is returned. On error, the following negative
- * error code is returned.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (corrupted bmap).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
{
@@ -540,13 +524,10 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
*/
void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
{
- down_write(&bmap->b_sem);
memcpy(raw_inode->i_bmap, bmap->b_u.u_data,
NILFS_INODE_BMAP_SIZE * sizeof(__le64));
if (bmap->b_inode->i_ino == NILFS_DAT_INO)
bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
-
- up_write(&bmap->b_sem);
}
void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index 608168a5cb88..4656df392722 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -44,6 +44,19 @@ struct nilfs_bmap_stats {
/**
* struct nilfs_bmap_operations - bmap operation table
+ * @bop_lookup: single block search operation
+ * @bop_lookup_contig: consecutive block search operation
+ * @bop_insert: block insertion operation
+ * @bop_delete: block delete operation
+ * @bop_clear: block mapping resource release operation
+ * @bop_propagate: operation to propagate dirty state towards the
+ * mapping root
+ * @bop_lookup_dirty_buffers: operation to collect dirty block buffers
+ * @bop_assign: disk block address assignment operation
+ * @bop_mark: operation to mark in-use blocks as dirty for
+ * relocation by GC
+ * @bop_seek_key: find valid block key operation
+ * @bop_last_key: find last valid block key operation
*/
struct nilfs_bmap_operations {
int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *);
@@ -66,7 +79,7 @@ struct nilfs_bmap_operations {
int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *);
int (*bop_last_key)(const struct nilfs_bmap *, __u64 *);
- /* The following functions are internal use only. */
+ /* private: internal use only */
int (*bop_check_insert)(const struct nilfs_bmap *, __u64);
int (*bop_check_delete)(struct nilfs_bmap *, __u64);
int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int);
@@ -74,9 +87,8 @@ struct nilfs_bmap_operations {
#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64))
-#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */)
-#define NILFS_BMAP_NEW_PTR_INIT \
- (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1))
+#define NILFS_BMAP_KEY_BIT BITS_PER_LONG
+#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1))
static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
{
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index e74fda212620..568367129092 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,6 +35,7 @@ void nilfs_init_btnc_inode(struct inode *btnc_inode)
ii->i_flags = 0;
memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap));
mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS);
+ btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
}
void nilfs_btnode_cache_clear(struct address_space *btnc)
@@ -51,22 +52,36 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
if (unlikely(!bh))
- return NULL;
+ return ERR_PTR(-ENOMEM);
if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
buffer_dirty(bh))) {
- brelse(bh);
- BUG();
+ /*
+ * The block buffer at the specified new address was already
+ * in use. This can happen if it is a virtual block number
+ * and has been reallocated due to corruption of the bitmap
+ * used to manage its allocation state (if not, the buffer
+ * clearing of an abandoned b-tree node is missing somewhere).
+ */
+ nilfs_error(inode->i_sb,
+ "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)",
+ (unsigned long long)blocknr, inode->i_ino);
+ goto failed;
}
memset(bh->b_data, 0, i_blocksize(inode));
- bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
set_buffer_uptodate(bh);
- unlock_page(bh->b_page);
- put_page(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
return bh;
+
+failed:
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
+ brelse(bh);
+ return ERR_PTR(-EIO);
}
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
@@ -75,7 +90,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
{
struct buffer_head *bh;
struct inode *inode = btnc->host;
- struct page *page;
+ struct folio *folio;
int err;
bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
@@ -83,7 +98,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
return -ENOMEM;
err = -EEXIST; /* internal code */
- page = bh->b_page;
+ folio = bh->b_folio;
if (buffer_uptodate(bh) || buffer_dirty(bh))
goto found;
@@ -118,7 +133,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
goto found;
}
set_buffer_mapped(bh);
- bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = pblocknr; /* set block address for read */
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
@@ -130,8 +144,8 @@ found:
*pbh = bh;
out_locked:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return err;
}
@@ -145,30 +159,52 @@ out_locked:
void nilfs_btnode_delete(struct buffer_head *bh)
{
struct address_space *mapping;
- struct page *page = bh->b_page;
- pgoff_t index = page_index(page);
+ struct folio *folio = bh->b_folio;
+ pgoff_t index = folio->index;
int still_dirty;
- get_page(page);
- lock_page(page);
- wait_on_page_writeback(page);
+ folio_get(folio);
+ folio_lock(folio);
+ folio_wait_writeback(folio);
nilfs_forget_buffer(bh);
- still_dirty = PageDirty(page);
- mapping = page->mapping;
- unlock_page(page);
- put_page(page);
+ still_dirty = folio_test_dirty(folio);
+ mapping = folio->mapping;
+ folio_unlock(folio);
+ folio_put(folio);
if (!still_dirty && mapping)
invalidate_inode_pages2_range(mapping, index, index);
}
/**
- * nilfs_btnode_prepare_change_key
- * prepare to move contents of the block for old key to one of new key.
- * the old buffer will not be removed, but might be reused for new buffer.
- * it might return -ENOMEM because of memory allocation errors,
- * and might return -EIO because of disk read errors.
+ * nilfs_btnode_prepare_change_key - prepare to change the search key of a
+ * b-tree node block
+ * @btnc: page cache in which the b-tree node block is buffered
+ * @ctxt: structure for exchanging context information for key change
+ *
+ * nilfs_btnode_prepare_change_key() prepares to move the contents of the
+ * b-tree node block of the old key given in the "oldkey" member of @ctxt to
+ * the position of the new key given in the "newkey" member of @ctxt in the
+ * page cache @btnc. Here, the key of the block is an index in units of
+ * blocks, and if the page and block sizes match, it matches the page index
+ * in the page cache.
+ *
+ * If the page size and block size match, this function attempts to move the
+ * entire folio, and in preparation for this, inserts the original folio into
+ * the new index of the cache. If this insertion fails or if the page size
+ * and block size are different, it falls back to a copy preparation using
+ * nilfs_btnode_create_block(), inserts a new block at the position
+ * corresponding to "newkey", and stores the buffer head pointer in the
+ * "newbh" member of @ctxt.
+ *
+ * Note that the current implementation does not support folio sizes larger
+ * than the page size.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_btnode_prepare_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
@@ -185,23 +221,23 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
ctxt->newbh = NULL;
if (inode->i_blkbits == PAGE_SHIFT) {
- struct page *opage = obh->b_page;
- lock_page(opage);
+ struct folio *ofolio = obh->b_folio;
+ folio_lock(ofolio);
retry:
- /* BUG_ON(oldkey != obh->b_page->index); */
- if (unlikely(oldkey != opage->index))
- NILFS_PAGE_BUG(opage,
+ /* BUG_ON(oldkey != obh->b_folio->index); */
+ if (unlikely(oldkey != ofolio->index))
+ NILFS_FOLIO_BUG(ofolio,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
xa_lock_irq(&btnc->i_pages);
- err = __xa_insert(&btnc->i_pages, newkey, opage, GFP_NOFS);
+ err = __xa_insert(&btnc->i_pages, newkey, ofolio, GFP_NOFS);
xa_unlock_irq(&btnc->i_pages);
/*
- * Note: page->index will not change to newkey until
+ * Note: folio->index will not change to newkey until
* nilfs_btnode_commit_change_key() will be called.
- * To protect the page in intermediate state, the page lock
+ * To protect the folio in intermediate state, the folio lock
* is held.
*/
if (!err)
@@ -213,40 +249,53 @@ retry:
if (!err)
goto retry;
/* fallback to copy mode */
- unlock_page(opage);
+ folio_unlock(ofolio);
}
nbh = nilfs_btnode_create_block(btnc, newkey);
- if (!nbh)
- return -ENOMEM;
+ if (IS_ERR(nbh))
+ return PTR_ERR(nbh);
BUG_ON(nbh == obh);
ctxt->newbh = nbh;
return 0;
failed_unlock:
- unlock_page(obh->b_page);
+ folio_unlock(obh->b_folio);
return err;
}
/**
- * nilfs_btnode_commit_change_key
- * commit the change_key operation prepared by prepare_change_key().
+ * nilfs_btnode_commit_change_key - commit the change of the search key of
+ * a b-tree node block
+ * @btnc: page cache in which the b-tree node block is buffered
+ * @ctxt: structure for exchanging context information for key change
+ *
+ * nilfs_btnode_commit_change_key() executes the key change based on the
+ * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid
+ * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move),
+ * this function removes the folio from the old index and completes the move.
+ * Otherwise, it copies the block data and inherited flag states of "oldbh"
+ * to "newbh" and clears the "oldbh" from the cache. In either case, the
+ * relocated buffer is marked as dirty.
+ *
+ * As with nilfs_btnode_prepare_change_key(), the current implementation does
+ * not support folio sizes larger than the page size.
*/
void nilfs_btnode_commit_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
{
struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
- struct page *opage;
+ struct folio *ofolio;
if (oldkey == newkey)
return;
if (nbh == NULL) { /* blocksize == pagesize */
- opage = obh->b_page;
- if (unlikely(oldkey != opage->index))
- NILFS_PAGE_BUG(opage,
+ ofolio = obh->b_folio;
+ if (unlikely(oldkey != ofolio->index))
+ NILFS_FOLIO_BUG(ofolio,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
@@ -257,8 +306,8 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
__xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY);
xa_unlock_irq(&btnc->i_pages);
- opage->index = obh->b_blocknr = newkey;
- unlock_page(opage);
+ ofolio->index = obh->b_blocknr = newkey;
+ folio_unlock(ofolio);
} else {
nilfs_copy_buffer(nbh, obh);
mark_buffer_dirty(nbh);
@@ -270,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
}
/**
- * nilfs_btnode_abort_change_key
- * abort the change_key operation prepared by prepare_change_key().
+ * nilfs_btnode_abort_change_key - abort the change of the search key of a
+ * b-tree node block
+ * @btnc: page cache in which the b-tree node block is buffered
+ * @ctxt: structure for exchanging context information for key change
+ *
+ * nilfs_btnode_abort_change_key() cancels the key change associated with the
+ * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs
+ * any necessary cleanup. If no valid block buffer is prepared in "newbh" of
+ * @ctxt, this function removes the folio from the destination index and aborts
+ * the move. Otherwise, it clears "newbh" from the cache.
+ *
+ * As with nilfs_btnode_prepare_change_key(), the current implementation does
+ * not support folio sizes larger than the page size.
*/
void nilfs_btnode_abort_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
@@ -284,7 +344,15 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
if (nbh == NULL) { /* blocksize == pagesize */
xa_erase_irq(&btnc->i_pages, newkey);
- unlock_page(ctxt->bh->b_page);
- } else
- brelse(nbh);
+ folio_unlock(ctxt->bh->b_folio);
+ } else {
+ /*
+ * When canceling a buffer that a prepare operation has
+ * allocated to copy a node block to another location, use
+ * nilfs_btnode_delete() to initialize and release the buffer
+ * so that the buffer flags will not be in an inconsistent
+ * state when it is reallocated.
+ */
+ nilfs_btnode_delete(nbh);
+ }
}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 40ce92a332fe..dd0c8e560ef6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
struct buffer_head *bh;
bh = nilfs_btnode_create_block(btnc, ptr);
- if (!bh)
- return -ENOMEM;
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
set_buffer_nilfs_volatile(bh);
*bhp = bh;
@@ -334,7 +334,7 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
* @inode: host inode of btree
* @blocknr: block number
*
- * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ * Return: 0 if normal, 1 if the node is broken.
*/
static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
size_t size, struct inode *inode,
@@ -350,7 +350,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
level >= NILFS_BTREE_LEVEL_MAX ||
(flags & NILFS_BTREE_NODE_ROOT) ||
- nchildren < 0 ||
+ nchildren <= 0 ||
nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
nilfs_crit(inode->i_sb,
"bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
@@ -366,7 +366,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
* @node: btree root node to be examined
* @inode: host inode of btree
*
- * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ * Return: 0 if normal, 1 if the root node is broken.
*/
static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
struct inode *inode)
@@ -381,7 +381,8 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
level >= NILFS_BTREE_LEVEL_MAX ||
nchildren < 0 ||
- nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
+ nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX ||
+ (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) {
nilfs_crit(inode->i_sb,
"bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
inode->i_ino, level, flags, nchildren);
@@ -398,7 +399,7 @@ int nilfs_btree_broken_node_block(struct buffer_head *bh)
if (buffer_nilfs_checked(bh))
return 0;
- inode = bh->b_page->mapping->host;
+ inode = bh->b_folio->mapping->host;
ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
bh->b_size, inode, bh->b_blocknr);
if (likely(!ret))
@@ -651,8 +652,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
* @minlevel: start level
* @nextkey: place to store the next valid key
*
- * Return Value: If a next key was found, 0 is returned. Otherwise,
- * -ENOENT is returned.
+ * Return: 0 if the next key was found, %-ENOENT if not found.
*/
static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
const struct nilfs_btree_path *path,
@@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
dat = nilfs_bmap_get_dat(btree);
ret = nilfs_dat_translate(dat, ptr, &blocknr);
if (ret < 0)
- goto out;
+ goto dat_error;
ptr = blocknr;
}
cnt = 1;
@@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
if (dat) {
ret = nilfs_dat_translate(dat, ptr2, &blocknr);
if (ret < 0)
- goto out;
+ goto dat_error;
ptr2 = blocknr;
}
if (ptr2 != ptr + cnt || ++cnt == maxblocks)
@@ -781,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
out:
nilfs_btree_free_path(path);
return ret;
+
+ dat_error:
+ if (ret == -ENOENT)
+ ret = -EINVAL; /* Notify bmap layer of metadata corruption */
+ goto out;
}
static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
@@ -1653,13 +1658,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
int nchildren, ret;
root = nilfs_btree_get_root(btree);
+ nchildren = nilfs_btree_node_get_nchildren(root);
+ if (unlikely(nchildren == 0))
+ return 0;
+
switch (nilfs_btree_height(btree)) {
case 2:
bh = NULL;
node = root;
break;
case 3:
- nchildren = nilfs_btree_node_get_nchildren(root);
if (nchildren > 1)
return 0;
ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
@@ -1668,12 +1676,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
if (ret < 0)
return ret;
node = (struct nilfs_btree_node *)bh->b_data;
+ nchildren = nilfs_btree_node_get_nchildren(node);
break;
default:
return 0;
}
- nchildren = nilfs_btree_node_get_nchildren(node);
maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
nextmaxkey = (nchildren > 1) ?
nilfs_btree_node_get_key(node, nchildren - 2) : 0;
@@ -1852,13 +1860,22 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
}
/**
- * nilfs_btree_convert_and_insert -
- * @bmap:
- * @key:
- * @ptr:
- * @keys:
- * @ptrs:
- * @n:
+ * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree
+ * @btree: NILFS B-tree structure
+ * @key: Key of the new entry to be inserted
+ * @ptr: Pointer (block number) associated with the key to be inserted
+ * @keys: Array of keys to be inserted in addition to @key
+ * @ptrs: Array of pointers associated with @keys
+ * @n: Number of keys and pointers in @keys and @ptrs
+ *
+ * This function is used to insert a new entry specified by @key and @ptr,
+ * along with additional entries specified by @keys and @ptrs arrays, into a
+ * NILFS B-tree.
+ * It prepares the necessary changes by allocating the required blocks and any
+ * necessary intermediate nodes. It converts configurations from other forms of
+ * block mapping (the one that currently exists is direct mapping) to a B-tree.
+ *
+ * Return: 0 on success or a negative error code on failure.
*/
int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
__u64 key, __u64 ptr,
@@ -2085,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
if (ret < 0) {
- if (unlikely(ret == -ENOENT))
+ if (unlikely(ret == -ENOENT)) {
nilfs_crit(btree->b_inode->i_sb,
"writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
btree->b_inode->i_ino,
(unsigned long long)key, level);
+ ret = -EINVAL;
+ }
goto out;
}
@@ -2150,7 +2169,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
struct address_space *btcache = btnc_inode->i_mapping;
struct list_head lists[NILFS_BTREE_LEVEL_MAX];
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct buffer_head *bh, *head;
pgoff_t index = 0;
int level, i;
@@ -2160,19 +2179,19 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
level++)
INIT_LIST_HEAD(&lists[level]);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
- while (pagevec_lookup_tag(&pvec, btcache, &index,
- PAGECACHE_TAG_DIRTY)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- bh = head = page_buffers(pvec.pages[i]);
+ while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ bh = head = folio_buffers(fbatch.folios[i]);
do {
if (buffer_dirty(bh))
nilfs_btree_add_dirty_buffer(btree,
lists, bh);
} while ((bh = bh->b_this_page) != head);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
@@ -2219,6 +2238,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
/* on-disk format */
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = level;
+ memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 92868e1a48ca..2a220f716c91 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -24,6 +24,7 @@
* @bp_index: index of child node
* @bp_oldreq: ptr end request for old ptr
* @bp_newreq: ptr alloc request for new ptr
+ * @bp_ctxt: context information for changing the key of a b-tree node block
* @bp_op: rebalance operation
*/
struct nilfs_btree_path {
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 9ebefb3acb0e..4bbdc832d7f2 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -28,7 +28,7 @@ nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno)
{
__u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
- do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
+ tcno = div64_ul(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
return (unsigned long)tcno;
}
@@ -68,54 +68,41 @@ static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile,
static unsigned int
nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile,
struct buffer_head *bh,
- void *kaddr,
unsigned int n)
{
- struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
+ struct nilfs_checkpoint *cp;
unsigned int count;
+ cp = kmap_local_folio(bh->b_folio,
+ offset_in_folio(bh->b_folio, bh->b_data));
count = le32_to_cpu(cp->cp_checkpoints_count) + n;
cp->cp_checkpoints_count = cpu_to_le32(count);
+ kunmap_local(cp);
return count;
}
static unsigned int
nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile,
struct buffer_head *bh,
- void *kaddr,
unsigned int n)
{
- struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
+ struct nilfs_checkpoint *cp;
unsigned int count;
+ cp = kmap_local_folio(bh->b_folio,
+ offset_in_folio(bh->b_folio, bh->b_data));
WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n);
count = le32_to_cpu(cp->cp_checkpoints_count) - n;
cp->cp_checkpoints_count = cpu_to_le32(count);
+ kunmap_local(cp);
return count;
}
-static inline struct nilfs_cpfile_header *
-nilfs_cpfile_block_get_header(const struct inode *cpfile,
- struct buffer_head *bh,
- void *kaddr)
-{
- return kaddr + bh_offset(bh);
-}
-
-static struct nilfs_checkpoint *
-nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno,
- struct buffer_head *bh,
- void *kaddr)
-{
- return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) *
- NILFS_MDT(cpfile)->mi_entry_size;
-}
-
static void nilfs_cpfile_block_init(struct inode *cpfile,
struct buffer_head *bh,
- void *kaddr)
+ void *from)
{
- struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
+ struct nilfs_checkpoint *cp = from;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
int n = nilfs_cpfile_checkpoints_per_block(cpfile);
@@ -125,10 +112,65 @@ static void nilfs_cpfile_block_init(struct inode *cpfile,
}
}
-static inline int nilfs_cpfile_get_header_block(struct inode *cpfile,
- struct buffer_head **bhp)
+/**
+ * nilfs_cpfile_checkpoint_offset - calculate the byte offset of a checkpoint
+ * entry in the folio containing it
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number
+ * @bh: buffer head of block containing checkpoint indexed by @cno
+ *
+ * Return: Byte offset in the folio of the checkpoint specified by @cno.
+ */
+static size_t nilfs_cpfile_checkpoint_offset(const struct inode *cpfile,
+ __u64 cno,
+ struct buffer_head *bh)
+{
+ return offset_in_folio(bh->b_folio, bh->b_data) +
+ nilfs_cpfile_get_offset(cpfile, cno) *
+ NILFS_MDT(cpfile)->mi_entry_size;
+}
+
+/**
+ * nilfs_cpfile_cp_snapshot_list_offset - calculate the byte offset of a
+ * checkpoint snapshot list in the folio
+ * containing it
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number
+ * @bh: buffer head of block containing checkpoint indexed by @cno
+ *
+ * Return: Byte offset in the folio of the checkpoint snapshot list specified
+ * by @cno.
+ */
+static size_t nilfs_cpfile_cp_snapshot_list_offset(const struct inode *cpfile,
+ __u64 cno,
+ struct buffer_head *bh)
+{
+ return nilfs_cpfile_checkpoint_offset(cpfile, cno, bh) +
+ offsetof(struct nilfs_checkpoint, cp_snapshot_list);
+}
+
+/**
+ * nilfs_cpfile_ch_snapshot_list_offset - calculate the byte offset of the
+ * snapshot list in the header
+ *
+ * Return: Byte offset in the folio of the checkpoint snapshot list
+ */
+static size_t nilfs_cpfile_ch_snapshot_list_offset(void)
+{
+ return offsetof(struct nilfs_cpfile_header, ch_snapshot_list);
+}
+
+static int nilfs_cpfile_get_header_block(struct inode *cpfile,
+ struct buffer_head **bhp)
{
- return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp);
+ int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp);
+
+ if (unlikely(err == -ENOENT)) {
+ nilfs_error(cpfile->i_sb,
+ "missing header block in checkpoint metadata");
+ err = -EIO;
+ }
+ return err;
}
static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
@@ -149,14 +191,11 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
* @cnop: place to store the next checkpoint number
* @bhp: place to store a pointer to buffer_head struct
*
- * Return Value: On success, it returns 0. On error, the following negative
- * error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-EIO - I/O error
- *
- * %-ENOENT - no block exists in the range.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - no block exists in the range.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile,
__u64 start_cno, __u64 end_cno,
@@ -187,106 +226,215 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
}
/**
- * nilfs_cpfile_get_checkpoint - get a checkpoint
- * @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- * @create: create flag
- * @cpp: pointer to a checkpoint
- * @bhp: pointer to a buffer head
+ * nilfs_cpfile_read_checkpoint - read a checkpoint entry in cpfile
+ * @cpfile: checkpoint file inode
+ * @cno: number of checkpoint entry to read
+ * @root: nilfs root object
+ * @ifile: ifile's inode to read and attach to @root
*
- * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint
- * specified by @cno. A new checkpoint will be created if @cno is the current
- * checkpoint number and @create is nonzero.
+ * This function imports checkpoint information from the checkpoint file and
+ * stores it to the inode file given by @ifile and the nilfs root object
+ * given by @root.
*
- * Return Value: On success, 0 is returned, and the checkpoint and the
- * buffer head of the buffer on which the checkpoint is located are stored in
- * the place pointed by @cpp and @bhp, respectively. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
+ */
+int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, struct inode *ifile)
+{
+ struct buffer_head *cp_bh;
+ struct nilfs_checkpoint *cp;
+ size_t offset;
+ int ret;
+
+ if (cno < 1 || cno > nilfs_mdt_cno(cpfile))
+ return -EINVAL;
+
+ down_read(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
+ if (unlikely(ret < 0)) {
+ if (ret == -ENOENT)
+ ret = -EINVAL;
+ goto out_sem;
+ }
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
+ if (nilfs_checkpoint_invalid(cp)) {
+ ret = -EINVAL;
+ goto put_cp;
+ }
+
+ ret = nilfs_read_inode_common(ifile, &cp->cp_ifile_inode);
+ if (unlikely(ret)) {
+ /*
+ * Since this inode is on a checkpoint entry, treat errors
+ * as metadata corruption.
+ */
+ nilfs_err(cpfile->i_sb,
+ "ifile inode (checkpoint number=%llu) corrupted",
+ (unsigned long long)cno);
+ ret = -EIO;
+ goto put_cp;
+ }
+
+ /* Configure the nilfs root object */
+ atomic64_set(&root->inodes_count, le64_to_cpu(cp->cp_inodes_count));
+ atomic64_set(&root->blocks_count, le64_to_cpu(cp->cp_blocks_count));
+ root->ifile = ifile;
+
+put_cp:
+ kunmap_local(cp);
+ brelse(cp_bh);
+out_sem:
+ up_read(&NILFS_MDT(cpfile)->mi_sem);
+ return ret;
+}
+
+/**
+ * nilfs_cpfile_create_checkpoint - create a checkpoint entry on cpfile
+ * @cpfile: checkpoint file inode
+ * @cno: number of checkpoint to set up
*
- * %-ENOENT - No such checkpoint.
+ * This function creates a checkpoint with the number specified by @cno on
+ * cpfile. If the specified checkpoint entry already exists due to a past
+ * failure, it will be reused without returning an error.
+ * In either case, the buffer of the block containing the checkpoint entry
+ * and the cpfile inode are made dirty for inclusion in the write log.
*
- * %-EINVAL - invalid checkpoint.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-EROFS - Read only filesystem
*/
-int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
- __u64 cno,
- int create,
- struct nilfs_checkpoint **cpp,
- struct buffer_head **bhp)
+int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno)
{
struct buffer_head *header_bh, *cp_bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
- void *kaddr;
+ size_t offset;
int ret;
- if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) ||
- (cno < nilfs_mdt_cno(cpfile) && create)))
- return -EINVAL;
+ if (WARN_ON_ONCE(cno < 1))
+ return -EIO;
down_write(&NILFS_MDT(cpfile)->mi_sem);
-
ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
- if (ret < 0)
+ if (unlikely(ret < 0))
goto out_sem;
- ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh);
- if (ret < 0)
+
+ ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh);
+ if (unlikely(ret < 0))
goto out_header;
- kaddr = kmap(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp)) {
- if (!create) {
- kunmap(cp_bh->b_page);
- brelse(cp_bh);
- ret = -ENOENT;
- goto out_header;
- }
/* a newly-created checkpoint */
nilfs_checkpoint_clear_invalid(cp);
+ kunmap_local(cp);
if (!nilfs_cpfile_is_in_first(cpfile, cno))
nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
- kaddr, 1);
- mark_buffer_dirty(cp_bh);
+ 1);
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh,
- kaddr);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_ncheckpoints, 1);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(header_bh);
- nilfs_mdt_mark_dirty(cpfile);
+ } else {
+ kunmap_local(cp);
}
- if (cpp != NULL)
- *cpp = cp;
- *bhp = cp_bh;
+ /* Force the buffer and the inode to become dirty */
+ mark_buffer_dirty(cp_bh);
+ brelse(cp_bh);
+ nilfs_mdt_mark_dirty(cpfile);
- out_header:
+out_header:
brelse(header_bh);
- out_sem:
+out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
- * nilfs_cpfile_put_checkpoint - put a checkpoint
- * @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- * @bh: buffer head
+ * nilfs_cpfile_finalize_checkpoint - fill in a checkpoint entry in cpfile
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number
+ * @root: nilfs root object
+ * @blkinc: number of blocks added by this checkpoint
+ * @ctime: checkpoint creation time
+ * @minor: minor checkpoint flag
*
- * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint
- * specified by @cno. @bh must be the buffer head which has been returned by
- * a previous call to nilfs_cpfile_get_checkpoint() with @cno.
+ * This function completes the checkpoint entry numbered by @cno in the
+ * cpfile with the data given by the arguments @root, @blkinc, @ctime, and
+ * @minor.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
*/
-void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno,
- struct buffer_head *bh)
+int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, __u64 blkinc,
+ time64_t ctime, bool minor)
{
- kunmap(bh->b_page);
- brelse(bh);
+ struct buffer_head *cp_bh;
+ struct nilfs_checkpoint *cp;
+ size_t offset;
+ int ret;
+
+ if (WARN_ON_ONCE(cno < 1))
+ return -EIO;
+
+ down_write(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
+ if (unlikely(ret < 0)) {
+ if (ret == -ENOENT)
+ goto error;
+ goto out_sem;
+ }
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
+ if (unlikely(nilfs_checkpoint_invalid(cp))) {
+ kunmap_local(cp);
+ brelse(cp_bh);
+ goto error;
+ }
+
+ cp->cp_snapshot_list.ssl_next = 0;
+ cp->cp_snapshot_list.ssl_prev = 0;
+ cp->cp_inodes_count = cpu_to_le64(atomic64_read(&root->inodes_count));
+ cp->cp_blocks_count = cpu_to_le64(atomic64_read(&root->blocks_count));
+ cp->cp_nblk_inc = cpu_to_le64(blkinc);
+ cp->cp_create = cpu_to_le64(ctime);
+ cp->cp_cno = cpu_to_le64(cno);
+
+ if (minor)
+ nilfs_checkpoint_set_minor(cp);
+ else
+ nilfs_checkpoint_clear_minor(cp);
+
+ nilfs_write_inode_common(root->ifile, &cp->cp_ifile_inode);
+ nilfs_bmap_write(NILFS_I(root->ifile)->i_bmap, &cp->cp_ifile_inode);
+
+ kunmap_local(cp);
+ brelse(cp_bh);
+out_sem:
+ up_write(&NILFS_MDT(cpfile)->mi_sem);
+ return ret;
+
+error:
+ nilfs_error(cpfile->i_sb,
+ "checkpoint finalization failed due to metadata corruption.");
+ ret = -EIO;
+ goto out_sem;
}
/**
@@ -299,14 +447,11 @@ void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno,
* the period from @start to @end, excluding @end itself. The checkpoints
* which have been already deleted are ignored.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - invalid checkpoints.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid checkpoints.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
__u64 start,
@@ -317,6 +462,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
struct nilfs_checkpoint *cp;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
__u64 cno;
+ size_t offset;
void *kaddr;
unsigned long tnicps;
int ret, ncps, nicps, nss, count, i;
@@ -347,9 +493,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
continue;
}
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(
- cpfile, cno, cp_bh, kaddr);
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kaddr = kmap_local_folio(cp_bh->b_folio, offset);
nicps = 0;
for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) {
if (nilfs_checkpoint_snapshot(cp)) {
@@ -359,43 +504,42 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
nicps++;
}
}
- if (nicps > 0) {
- tnicps += nicps;
- mark_buffer_dirty(cp_bh);
- nilfs_mdt_mark_dirty(cpfile);
- if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
- count =
- nilfs_cpfile_block_sub_valid_checkpoints(
- cpfile, cp_bh, kaddr, nicps);
- if (count == 0) {
- /* make hole */
- kunmap_atomic(kaddr);
- brelse(cp_bh);
- ret =
- nilfs_cpfile_delete_checkpoint_block(
- cpfile, cno);
- if (ret == 0)
- continue;
- nilfs_err(cpfile->i_sb,
- "error %d deleting checkpoint block",
- ret);
- break;
- }
- }
+ kunmap_local(kaddr);
+
+ if (nicps <= 0) {
+ brelse(cp_bh);
+ continue;
}
- kunmap_atomic(kaddr);
+ tnicps += nicps;
+ mark_buffer_dirty(cp_bh);
+ nilfs_mdt_mark_dirty(cpfile);
+ if (nilfs_cpfile_is_in_first(cpfile, cno)) {
+ brelse(cp_bh);
+ continue;
+ }
+
+ count = nilfs_cpfile_block_sub_valid_checkpoints(cpfile, cp_bh,
+ nicps);
brelse(cp_bh);
+ if (count)
+ continue;
+
+ /* Delete the block if there are no more valid checkpoints */
+ ret = nilfs_cpfile_delete_checkpoint_block(cpfile, cno);
+ if (unlikely(ret)) {
+ nilfs_err(cpfile->i_sb,
+ "error %d deleting checkpoint block", ret);
+ break;
+ }
}
if (tnicps > 0) {
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh,
- kaddr);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
}
brelse(header_bh);
@@ -429,6 +573,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
struct buffer_head *bh;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
__u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop;
+ size_t offset;
void *kaddr;
int n, ret;
int ncps, i;
@@ -447,8 +592,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
}
ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
- kaddr = kmap_atomic(bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh);
+ cp = kaddr = kmap_local_folio(bh->b_folio, offset);
for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
if (!nilfs_checkpoint_invalid(cp)) {
nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp,
@@ -457,7 +602,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
n++;
}
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
brelse(bh);
}
@@ -482,7 +627,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
struct nilfs_cpinfo *ci = buf;
__u64 curr = *cnop, next;
unsigned long curr_blkoff, next_blkoff;
- void *kaddr;
+ size_t offset;
int n = 0, ret;
down_read(&NILFS_MDT(cpfile)->mi_sem);
@@ -491,10 +636,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
ret = nilfs_cpfile_get_header_block(cpfile, &bh);
if (ret < 0)
goto out;
- kaddr = kmap_atomic(bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
+ header = kmap_local_folio(bh->b_folio, 0);
curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(bh);
if (curr == 0) {
ret = 0;
@@ -512,9 +656,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
ret = 0; /* No snapshots (started from a hole block) */
goto out;
}
- kaddr = kmap_atomic(bh->b_page);
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, curr, bh);
+ cp = kmap_local_folio(bh->b_folio, offset);
while (n < nci) {
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
curr = ~(__u64)0; /* Terminator */
if (unlikely(nilfs_checkpoint_invalid(cp) ||
!nilfs_checkpoint_snapshot(cp)))
@@ -526,9 +670,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
if (next == 0)
break; /* reach end of the snapshot list */
+ kunmap_local(cp);
next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
if (curr_blkoff != next_blkoff) {
- kunmap_atomic(kaddr);
brelse(bh);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
0, &bh);
@@ -536,12 +680,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
WARN_ON(ret == -ENOENT);
goto out;
}
- kaddr = kmap_atomic(bh->b_page);
}
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, next, bh);
+ cp = kmap_local_folio(bh->b_folio, offset);
curr = next;
curr_blkoff = next_blkoff;
}
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
brelse(bh);
*cnop = curr;
ret = n;
@@ -552,11 +697,29 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
}
/**
- * nilfs_cpfile_get_cpinfo -
- * @cpfile:
- * @cno:
- * @ci:
- * @nci:
+ * nilfs_cpfile_get_cpinfo - get information on checkpoints
+ * @cpfile: checkpoint file inode
+ * @cnop: place to pass a starting checkpoint number and receive a
+ * checkpoint number to continue the search
+ * @mode: mode of checkpoints that the caller wants to retrieve
+ * @buf: buffer for storing checkpoints' information
+ * @cisz: byte size of one checkpoint info item in array
+ * @nci: number of checkpoint info items to retrieve
+ *
+ * nilfs_cpfile_get_cpinfo() searches for checkpoints in @mode state
+ * starting from the checkpoint number stored in @cnop, and stores
+ * information about found checkpoints in @buf.
+ * The buffer pointed to by @buf must be large enough to store information
+ * for @nci checkpoints. If at least one checkpoint information is
+ * successfully retrieved, @cnop is updated to point to the checkpoint
+ * number to continue searching.
+ *
+ * Return: Count of checkpoint info items stored in the output buffer on
+ * success, or one of the following negative error codes on failure:
+ * * %-EINVAL - Invalid checkpoint mode.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Invalid checkpoint number specified.
*/
ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
@@ -573,9 +736,16 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
}
/**
- * nilfs_cpfile_delete_checkpoint -
- * @cpfile:
- * @cno:
+ * nilfs_cpfile_delete_checkpoint - delete a checkpoint
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number to delete
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Checkpoint in use (snapshot specified).
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No valid checkpoint found.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
{
@@ -594,26 +764,6 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1);
}
-static struct nilfs_snapshot_list *
-nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile,
- __u64 cno,
- struct buffer_head *bh,
- void *kaddr)
-{
- struct nilfs_cpfile_header *header;
- struct nilfs_checkpoint *cp;
- struct nilfs_snapshot_list *list;
-
- if (cno != 0) {
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
- list = &cp->cp_snapshot_list;
- } else {
- header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
- list = &header->ch_snapshot_list;
- }
- return list;
-}
-
static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
{
struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh;
@@ -622,94 +772,103 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
struct nilfs_snapshot_list *list;
__u64 curr, prev;
unsigned long curr_blkoff, prev_blkoff;
- void *kaddr;
+ size_t offset, curr_list_offset, prev_list_offset;
int ret;
if (cno == 0)
return -ENOENT; /* checkpoint number 0 is invalid */
down_write(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
+ if (unlikely(ret < 0))
+ goto out_sem;
+
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
if (ret < 0)
- goto out_sem;
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ goto out_header;
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp)) {
ret = -ENOENT;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
if (nilfs_checkpoint_snapshot(cp)) {
ret = 0;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
- ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
- if (ret < 0)
- goto out_cp;
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
+ /*
+ * Find the last snapshot before the checkpoint being changed to
+ * snapshot mode by going backwards through the snapshot list.
+ * Set "prev" to its checkpoint number, or 0 if not found.
+ */
+ header = kmap_local_folio(header_bh->b_folio, 0);
list = &header->ch_snapshot_list;
curr_bh = header_bh;
get_bh(curr_bh);
curr = 0;
curr_blkoff = 0;
+ curr_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
prev = le64_to_cpu(list->ssl_prev);
while (prev > cno) {
prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
curr = prev;
+ kunmap_local(list);
if (curr_blkoff != prev_blkoff) {
- kunmap_atomic(kaddr);
brelse(curr_bh);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
0, &curr_bh);
- if (ret < 0)
- goto out_header;
- kaddr = kmap_atomic(curr_bh->b_page);
+ if (unlikely(ret < 0))
+ goto out_cp;
}
+ curr_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, curr, curr_bh);
+ list = kmap_local_folio(curr_bh->b_folio, curr_list_offset);
curr_blkoff = prev_blkoff;
- cp = nilfs_cpfile_block_get_checkpoint(
- cpfile, curr, curr_bh, kaddr);
- list = &cp->cp_snapshot_list;
prev = le64_to_cpu(list->ssl_prev);
}
- kunmap_atomic(kaddr);
+ kunmap_local(list);
if (prev != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
&prev_bh);
if (ret < 0)
goto out_curr;
+
+ prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, prev, prev_bh);
} else {
prev_bh = header_bh;
get_bh(prev_bh);
+ prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
}
- kaddr = kmap_atomic(curr_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, curr, curr_bh, kaddr);
+ /* Update the list entry for the next snapshot */
+ list = kmap_local_folio(curr_bh->b_folio, curr_list_offset);
list->ssl_prev = cpu_to_le64(cno);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ /* Update the checkpoint being changed to a snapshot */
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
nilfs_checkpoint_set_snapshot(cp);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
- kaddr = kmap_atomic(prev_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, prev, prev_bh, kaddr);
+ /* Update the list entry for the previous snapshot */
+ list = kmap_local_folio(prev_bh->b_folio, prev_list_offset);
list->ssl_next = cpu_to_le64(cno);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
+ /* Update the statistics in the header */
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_nsnapshots, 1);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(prev_bh);
mark_buffer_dirty(curr_bh);
@@ -722,12 +881,12 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
out_curr:
brelse(curr_bh);
- out_header:
- brelse(header_bh);
-
out_cp:
brelse(cp_bh);
+ out_header:
+ brelse(header_bh);
+
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
@@ -740,79 +899,87 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
struct nilfs_checkpoint *cp;
struct nilfs_snapshot_list *list;
__u64 next, prev;
- void *kaddr;
+ size_t offset, next_list_offset, prev_list_offset;
int ret;
if (cno == 0)
return -ENOENT; /* checkpoint number 0 is invalid */
down_write(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
+ if (unlikely(ret < 0))
+ goto out_sem;
+
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
if (ret < 0)
- goto out_sem;
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ goto out_header;
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp)) {
ret = -ENOENT;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
if (!nilfs_checkpoint_snapshot(cp)) {
ret = 0;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
list = &cp->cp_snapshot_list;
next = le64_to_cpu(list->ssl_next);
prev = le64_to_cpu(list->ssl_prev);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
- ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
- if (ret < 0)
- goto out_cp;
if (next != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0,
&next_bh);
if (ret < 0)
- goto out_header;
+ goto out_cp;
+
+ next_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, next, next_bh);
} else {
next_bh = header_bh;
get_bh(next_bh);
+ next_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
}
if (prev != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
&prev_bh);
if (ret < 0)
goto out_next;
+
+ prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, prev, prev_bh);
} else {
prev_bh = header_bh;
get_bh(prev_bh);
+ prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
}
- kaddr = kmap_atomic(next_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, next, next_bh, kaddr);
+ /* Update the list entry for the next snapshot */
+ list = kmap_local_folio(next_bh->b_folio, next_list_offset);
list->ssl_prev = cpu_to_le64(prev);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(prev_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, prev, prev_bh, kaddr);
+ /* Update the list entry for the previous snapshot */
+ list = kmap_local_folio(prev_bh->b_folio, prev_list_offset);
list->ssl_next = cpu_to_le64(next);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ /* Update the snapshot being changed back to a plain checkpoint */
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
nilfs_checkpoint_clear_snapshot(cp);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
+ /* Update the statistics in the header */
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_nsnapshots, -1);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(next_bh);
mark_buffer_dirty(prev_bh);
@@ -825,39 +992,33 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
out_next:
brelse(next_bh);
- out_header:
- brelse(header_bh);
-
out_cp:
brelse(cp_bh);
+ out_header:
+ brelse(header_bh);
+
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
- * nilfs_cpfile_is_snapshot -
+ * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot
* @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- *
- * Description:
- *
- * Return Value: On success, 1 is returned if the checkpoint specified by
- * @cno is a snapshot, or 0 if not. On error, one of the following negative
- * error codes is returned.
+ * @cno: checkpoint number
*
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - No such checkpoint.
+ * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or
+ * one of the following negative error codes on failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No such checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
{
struct buffer_head *bh;
struct nilfs_checkpoint *cp;
- void *kaddr;
+ size_t offset;
int ret;
/*
@@ -871,13 +1032,14 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
if (ret < 0)
goto out;
- kaddr = kmap_atomic(bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh);
+ cp = kmap_local_folio(bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp))
ret = -ENOENT;
else
ret = nilfs_checkpoint_snapshot(cp);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
brelse(bh);
out:
@@ -894,14 +1056,11 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
* Description: nilfs_change_cpmode() changes the mode of the checkpoint
* specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - No such checkpoint.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No such checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
{
@@ -933,20 +1092,17 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
* @cpstat: pointer to a structure of checkpoint statistics
*
* Description: nilfs_cpfile_get_stat() returns information about checkpoints.
+ * The checkpoint statistics are stored in the location pointed to by @cpstat.
*
- * Return Value: On success, 0 is returned, and checkpoints information is
- * stored in the place pointed by @cpstat. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
{
struct buffer_head *bh;
struct nilfs_cpfile_header *header;
- void *kaddr;
int ret;
down_read(&NILFS_MDT(cpfile)->mi_sem);
@@ -954,12 +1110,11 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
ret = nilfs_cpfile_get_header_block(cpfile, &bh);
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
+ header = kmap_local_folio(bh->b_folio, 0);
cpstat->cs_cno = nilfs_mdt_cno(cpfile);
cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(bh);
out_sem:
@@ -973,6 +1128,8 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
* @cpsize: size of a checkpoint entry
* @raw_inode: on-disk cpfile inode
* @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
struct nilfs_inode *raw_inode, struct inode **inodep)
@@ -991,7 +1148,7 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO);
if (unlikely(!cpfile))
return -ENOMEM;
- if (!(cpfile->i_state & I_NEW))
+ if (!(inode_state_read_once(cpfile) & I_NEW))
goto out;
err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0);
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index edabb2dc5756..f5b1d59289eb 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -16,10 +16,12 @@
#include <linux/nilfs2_ondisk.h> /* nilfs_inode, nilfs_checkpoint */
-int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
- struct nilfs_checkpoint **,
- struct buffer_head **);
-void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *);
+int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, struct inode *ifile);
+int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno);
+int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, __u64 blkinc,
+ time64_t ctime, bool minor);
int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64);
int nilfs_cpfile_delete_checkpoint(struct inode *, __u64);
int nilfs_cpfile_change_cpmode(struct inode *, __u64, int);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 9930fa901039..674380837ab9 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
static int nilfs_dat_prepare_entry(struct inode *dat,
struct nilfs_palloc_req *req, int create)
{
- return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
- create, &req->pr_entry_bh);
+ int ret;
+
+ ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
+ create, &req->pr_entry_bh);
+ if (unlikely(ret == -ENOENT)) {
+ nilfs_err(dat->i_sb,
+ "DAT doesn't have a block to manage vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ /*
+ * Return internal code -EINVAL to notify bmap layer of
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
static void nilfs_dat_commit_entry(struct inode *dat,
@@ -62,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
- ret = nilfs_palloc_prepare_alloc_entry(dat, req);
+ ret = nilfs_palloc_prepare_alloc_entry(dat, req, true);
if (ret < 0)
return ret;
@@ -76,15 +89,15 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
entry->de_blocknr = cpu_to_le64(0);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
nilfs_palloc_commit_alloc_entry(dat, req);
nilfs_dat_commit_entry(dat, req);
@@ -100,15 +113,15 @@ static void nilfs_dat_commit_free(struct inode *dat,
struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
entry->de_blocknr = cpu_to_le64(0);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
nilfs_dat_commit_entry(dat, req);
@@ -123,25 +136,21 @@ static void nilfs_dat_commit_free(struct inode *dat,
int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
{
- int ret;
-
- ret = nilfs_dat_prepare_entry(dat, req, 0);
- WARN_ON(ret == -ENOENT);
- return ret;
+ return nilfs_dat_prepare_entry(dat, req, 0);
}
void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
sector_t blocknr)
{
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
entry->de_blocknr = cpu_to_le64(blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
nilfs_dat_commit_entry(dat, req);
}
@@ -149,21 +158,21 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
+ __u64 start;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_dat_prepare_entry(dat, req, 0);
- if (ret < 0) {
- WARN_ON(ret == -ENOENT);
+ if (ret < 0)
return ret;
- }
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
+ start = le64_to_cpu(entry->de_start);
blocknr = le64_to_cpu(entry->de_blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
if (blocknr == 0) {
ret = nilfs_palloc_prepare_free_entry(dat, req);
@@ -172,6 +181,15 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
return ret;
}
}
+ if (unlikely(start > nilfs_mdt_cno(dat))) {
+ nilfs_err(dat->i_sb,
+ "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)",
+ (unsigned long long)req->pr_entry_nr,
+ (unsigned long long)start,
+ (unsigned long long)nilfs_mdt_cno(dat));
+ nilfs_dat_abort_entry(dat, req);
+ return -EINVAL;
+ }
return 0;
}
@@ -182,11 +200,11 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
struct nilfs_dat_entry *entry;
__u64 start, end;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
end = start = le64_to_cpu(entry->de_start);
if (!dead) {
end = nilfs_mdt_cno(dat);
@@ -194,7 +212,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
}
entry->de_end = cpu_to_le64(end);
blocknr = le64_to_cpu(entry->de_blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
if (blocknr == 0)
nilfs_dat_commit_free(dat, req);
@@ -207,14 +225,14 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
struct nilfs_dat_entry *entry;
__u64 start;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
start = le64_to_cpu(entry->de_start);
blocknr = le64_to_cpu(entry->de_blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
if (start == nilfs_mdt_cno(dat) && blocknr == 0)
nilfs_palloc_abort_free_entry(dat, req);
@@ -253,18 +271,16 @@ void nilfs_dat_abort_update(struct inode *dat,
}
/**
- * nilfs_dat_mark_dirty -
- * @dat: DAT file inode
+ * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified
+ * virtual block address entry as dirty
+ * @dat: DAT file inode
* @vblocknr: virtual block number
*
- * Description:
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid DAT entry (internal code).
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr)
{
@@ -287,14 +303,11 @@ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr)
* Description: nilfs_dat_freev() frees the virtual block numbers specified by
* @vblocknrs and @nitems.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - The virtual block number have not been allocated.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - The virtual block number have not been allocated.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems)
{
@@ -310,18 +323,16 @@ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems)
* Description: nilfs_dat_move() changes the block number associated with
* @vblocknr to @blocknr.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
{
struct buffer_head *entry_bh;
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
@@ -344,21 +355,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
}
}
- kaddr = kmap_atomic(entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh);
+ entry = kmap_local_folio(entry_bh->b_folio, offset);
if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
nilfs_crit(dat->i_sb,
"%s: invalid vblocknr = %llu, [%llu, %llu)",
__func__, (unsigned long long)vblocknr,
(unsigned long long)le64_to_cpu(entry->de_start),
(unsigned long long)le64_to_cpu(entry->de_end));
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
brelse(entry_bh);
return -EINVAL;
}
WARN_ON(blocknr == 0);
entry->de_blocknr = cpu_to_le64(blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
mark_buffer_dirty(entry_bh);
nilfs_mdt_mark_dirty(dat);
@@ -375,24 +386,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
* @blocknrp: pointer to a block number
*
* Description: nilfs_dat_translate() maps the virtual block number @vblocknr
- * to the corresponding block number.
- *
- * Return Value: On success, 0 is returned and the block number associated
- * with @vblocknr is stored in the place pointed by @blocknrp. On error, one
- * of the following negative error codes is returned.
- *
- * %-EIO - I/O error.
+ * to the corresponding block number. The block number associated with
+ * @vblocknr is stored in the place pointed to by @blocknrp.
*
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - A block number associated with @vblocknr does not exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - A block number associated with @vblocknr does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
{
struct buffer_head *entry_bh, *bh;
struct nilfs_dat_entry *entry;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
@@ -408,8 +416,8 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
}
}
- kaddr = kmap_atomic(entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh);
+ entry = kmap_local_folio(entry_bh->b_folio, offset);
blocknr = le64_to_cpu(entry->de_blocknr);
if (blocknr == 0) {
ret = -ENOENT;
@@ -418,7 +426,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
*blocknrp = blocknr;
out:
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
brelse(entry_bh);
return ret;
}
@@ -427,11 +435,12 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz,
size_t nvi)
{
struct buffer_head *entry_bh;
- struct nilfs_dat_entry *entry;
+ struct nilfs_dat_entry *entry, *first_entry;
struct nilfs_vinfo *vinfo = buf;
__u64 first, last;
- void *kaddr;
+ size_t offset;
unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block;
+ unsigned int entry_size = NILFS_MDT(dat)->mi_entry_size;
int i, j, n, ret;
for (i = 0; i < nvi; i += n) {
@@ -439,23 +448,28 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz,
0, &entry_bh);
if (ret < 0)
return ret;
- kaddr = kmap_atomic(entry_bh->b_page);
- /* last virtual block number in this block */
+
first = vinfo->vi_vblocknr;
- do_div(first, entries_per_block);
+ first = div64_ul(first, entries_per_block);
first *= entries_per_block;
+ /* first virtual block number in this block */
+
last = first + entries_per_block - 1;
+ /* last virtual block number in this block */
+
+ offset = nilfs_palloc_entry_offset(dat, first, entry_bh);
+ first_entry = kmap_local_folio(entry_bh->b_folio, offset);
for (j = i, n = 0;
j < nvi && vinfo->vi_vblocknr >= first &&
vinfo->vi_vblocknr <= last;
j++, n++, vinfo = (void *)vinfo + visz) {
- entry = nilfs_palloc_block_get_entry(
- dat, vinfo->vi_vblocknr, entry_bh, kaddr);
+ entry = (void *)first_entry +
+ (vinfo->vi_vblocknr - first) * entry_size;
vinfo->vi_start = le64_to_cpu(entry->de_start);
vinfo->vi_end = le64_to_cpu(entry->de_end);
vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
}
- kunmap_atomic(kaddr);
+ kunmap_local(first_entry);
brelse(entry_bh);
}
@@ -468,6 +482,8 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz,
* @entry_size: size of a dat entry
* @raw_inode: on-disk dat inode
* @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_inode *raw_inode, struct inode **inodep)
@@ -490,7 +506,7 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size,
dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO);
if (unlikely(!dat))
return -ENOMEM;
- if (!(dat->i_state & I_NEW))
+ if (!(inode_state_read_once(dat) & I_NEW))
goto out;
err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di));
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index decd6471300b..6ca3d74be1e1 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -64,19 +64,13 @@ static inline unsigned int nilfs_chunk_size(struct inode *inode)
return inode->i_sb->s_blocksize;
}
-static inline void nilfs_put_page(struct page *page)
-{
- kunmap(page);
- put_page(page);
-}
-
/*
* Return the offset into page `page_nr' of the last valid
* byte in that page, plus one.
*/
static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr)
{
- unsigned int last_byte = inode->i_size;
+ u64 last_byte = inode->i_size;
last_byte -= page_nr << PAGE_SHIFT;
if (last_byte > PAGE_SIZE)
@@ -84,48 +78,46 @@ static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr)
return last_byte;
}
-static int nilfs_prepare_chunk(struct page *page, unsigned int from,
+static int nilfs_prepare_chunk(struct folio *folio, unsigned int from,
unsigned int to)
{
- loff_t pos = page_offset(page) + from;
+ loff_t pos = folio_pos(folio) + from;
- return __block_write_begin(page, pos, to - from, nilfs_get_block);
+ return __block_write_begin(folio, pos, to - from, nilfs_get_block);
}
-static void nilfs_commit_chunk(struct page *page,
- struct address_space *mapping,
- unsigned int from, unsigned int to)
+static void nilfs_commit_chunk(struct folio *folio,
+ struct address_space *mapping, size_t from, size_t to)
{
struct inode *dir = mapping->host;
- loff_t pos = page_offset(page) + from;
- unsigned int len = to - from;
- unsigned int nr_dirty, copied;
+ loff_t pos = folio_pos(folio) + from;
+ size_t copied, len = to - from;
+ unsigned int nr_dirty;
int err;
- nr_dirty = nilfs_page_count_clean_buffers(page, from, to);
- copied = block_write_end(NULL, mapping, pos, len, len, page, NULL);
+ nr_dirty = nilfs_page_count_clean_buffers(folio, from, to);
+ copied = block_write_end(pos, len, len, folio);
if (pos + copied > dir->i_size)
i_size_write(dir, pos + copied);
if (IS_DIRSYNC(dir))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
err = nilfs_set_file_dirty(dir, nr_dirty);
WARN_ON(err); /* do not happen */
- unlock_page(page);
+ folio_unlock(folio);
}
-static bool nilfs_check_page(struct page *page)
+static bool nilfs_check_folio(struct folio *folio, char *kaddr)
{
- struct inode *dir = page->mapping->host;
+ struct inode *dir = folio->mapping->host;
struct super_block *sb = dir->i_sb;
unsigned int chunk_size = nilfs_chunk_size(dir);
- char *kaddr = page_address(page);
- unsigned int offs, rec_len;
- unsigned int limit = PAGE_SIZE;
+ size_t offs, rec_len;
+ size_t limit = folio_size(folio);
struct nilfs_dir_entry *p;
char *error;
- if ((dir->i_size >> PAGE_SHIFT) == page->index) {
- limit = dir->i_size & ~PAGE_MASK;
+ if (dir->i_size < folio_pos(folio) + limit) {
+ limit = dir->i_size - folio_pos(folio);
if (limit & (chunk_size - 1))
goto Ebadsize;
if (!limit)
@@ -143,11 +135,14 @@ static bool nilfs_check_page(struct page *page)
goto Enamelen;
if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
goto Espan;
+ if (unlikely(p->inode &&
+ NILFS_PRIVATE_INODE(le64_to_cpu(p->inode))))
+ goto Einumber;
}
if (offs != limit)
goto Eend;
out:
- SetPageChecked(page);
+ folio_set_checked(folio);
return true;
/* Too bad, we had an error */
@@ -168,10 +163,13 @@ Enamelen:
goto bad_entry;
Espan:
error = "directory entry across blocks";
+ goto bad_entry;
+Einumber:
+ error = "disallowed inode number";
bad_entry:
nilfs_error(sb,
- "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
- dir->i_ino, error, (page->index << PAGE_SHIFT) + offs,
+ "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d",
+ dir->i_ino, error, (folio->index << PAGE_SHIFT) + offs,
(unsigned long)le64_to_cpu(p->inode),
rec_len, p->name_len);
goto fail;
@@ -179,29 +177,33 @@ Eend:
p = (struct nilfs_dir_entry *)(kaddr + offs);
nilfs_error(sb,
"entry in directory #%lu spans the page boundary offset=%lu, inode=%lu",
- dir->i_ino, (page->index << PAGE_SHIFT) + offs,
+ dir->i_ino, (folio->index << PAGE_SHIFT) + offs,
(unsigned long)le64_to_cpu(p->inode));
fail:
- SetPageError(page);
return false;
}
-static struct page *nilfs_get_page(struct inode *dir, unsigned long n)
+static void *nilfs_get_folio(struct inode *dir, unsigned long n,
+ struct folio **foliop)
{
struct address_space *mapping = dir->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
+ struct folio *folio = read_mapping_folio(mapping, n, NULL);
+ void *kaddr;
- if (!IS_ERR(page)) {
- kmap(page);
- if (unlikely(!PageChecked(page))) {
- if (!nilfs_check_page(page))
- goto fail;
- }
+ if (IS_ERR(folio))
+ return folio;
+
+ kaddr = kmap_local_folio(folio, 0);
+ if (unlikely(!folio_test_checked(folio))) {
+ if (!nilfs_check_folio(folio, kaddr))
+ goto fail;
}
- return page;
+
+ *foliop = folio;
+ return kaddr;
fail:
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return ERR_PTR(-EIO);
}
@@ -229,37 +231,6 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p)
nilfs_rec_len_from_disk(p->rec_len));
}
-static unsigned char
-nilfs_filetype_table[NILFS_FT_MAX] = {
- [NILFS_FT_UNKNOWN] = DT_UNKNOWN,
- [NILFS_FT_REG_FILE] = DT_REG,
- [NILFS_FT_DIR] = DT_DIR,
- [NILFS_FT_CHRDEV] = DT_CHR,
- [NILFS_FT_BLKDEV] = DT_BLK,
- [NILFS_FT_FIFO] = DT_FIFO,
- [NILFS_FT_SOCK] = DT_SOCK,
- [NILFS_FT_SYMLINK] = DT_LNK,
-};
-
-#define S_SHIFT 12
-static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
- [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK,
-};
-
-static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
-{
- umode_t mode = inode->i_mode;
-
- de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
-
static int nilfs_readdir(struct file *file, struct dir_context *ctx)
{
loff_t pos = ctx->pos;
@@ -275,99 +246,93 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx)
for ( ; n < npages; n++, offset = 0) {
char *kaddr, *limit;
struct nilfs_dir_entry *de;
- struct page *page = nilfs_get_page(inode, n);
+ struct folio *folio;
- if (IS_ERR(page)) {
+ kaddr = nilfs_get_folio(inode, n, &folio);
+ if (IS_ERR(kaddr)) {
nilfs_error(sb, "bad page in #%lu", inode->i_ino);
ctx->pos += PAGE_SIZE - offset;
return -EIO;
}
- kaddr = page_address(page);
de = (struct nilfs_dir_entry *)(kaddr + offset);
limit = kaddr + nilfs_last_byte(inode, n) -
NILFS_DIR_REC_LEN(1);
for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) {
if (de->rec_len == 0) {
nilfs_error(sb, "zero-length directory entry");
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return -EIO;
}
if (de->inode) {
unsigned char t;
- if (de->file_type < NILFS_FT_MAX)
- t = nilfs_filetype_table[de->file_type];
- else
- t = DT_UNKNOWN;
+ t = fs_ftype_to_dtype(de->file_type);
if (!dir_emit(ctx, de->name, de->name_len,
le64_to_cpu(de->inode), t)) {
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return 0;
}
}
ctx->pos += nilfs_rec_len_from_disk(de->rec_len);
}
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
}
return 0;
}
/*
- * nilfs_find_entry()
+ * nilfs_find_entry()
*
- * finds an entry in the specified directory with the wanted name. It
- * returns the page in which the entry was found, and the entry itself
- * (as a parameter - res_dir). Page is returned mapped and unlocked.
- * Entry is guaranteed to be valid.
+ * Finds an entry in the specified directory with the wanted name. It
+ * returns the folio in which the entry was found, and the entry itself.
+ * The folio is mapped and unlocked. When the caller is finished with
+ * the entry, it should call folio_release_kmap().
+ *
+ * On failure, returns an error pointer and the caller should ignore foliop.
*/
-struct nilfs_dir_entry *
-nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
- struct page **res_page)
+struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir,
+ const struct qstr *qstr, struct folio **foliop)
{
const unsigned char *name = qstr->name;
int namelen = qstr->len;
unsigned int reclen = NILFS_DIR_REC_LEN(namelen);
unsigned long start, n;
unsigned long npages = dir_pages(dir);
- struct page *page = NULL;
struct nilfs_inode_info *ei = NILFS_I(dir);
struct nilfs_dir_entry *de;
if (npages == 0)
goto out;
- /* OFFSET_CACHE */
- *res_page = NULL;
-
start = ei->i_dir_start_lookup;
if (start >= npages)
start = 0;
n = start;
do {
- char *kaddr;
-
- page = nilfs_get_page(dir, n);
- if (!IS_ERR(page)) {
- kaddr = page_address(page);
- de = (struct nilfs_dir_entry *)kaddr;
- kaddr += nilfs_last_byte(dir, n) - reclen;
- while ((char *) de <= kaddr) {
- if (de->rec_len == 0) {
- nilfs_error(dir->i_sb,
- "zero-length directory entry");
- nilfs_put_page(page);
- goto out;
- }
- if (nilfs_match(namelen, name, de))
- goto found;
- de = nilfs_next_entry(de);
+ char *kaddr = nilfs_get_folio(dir, n, foliop);
+
+ if (IS_ERR(kaddr))
+ return ERR_CAST(kaddr);
+
+ de = (struct nilfs_dir_entry *)kaddr;
+ kaddr += nilfs_last_byte(dir, n) - reclen;
+ while ((char *)de <= kaddr) {
+ if (de->rec_len == 0) {
+ nilfs_error(dir->i_sb,
+ "zero-length directory entry");
+ folio_release_kmap(*foliop, kaddr);
+ goto out;
}
- nilfs_put_page(page);
+ if (nilfs_match(namelen, name, de))
+ goto found;
+ de = nilfs_next_entry(de);
}
+ folio_release_kmap(*foliop, kaddr);
+
if (++n >= npages)
n = 0;
- /* next page is past the blocks we've got */
+ /* next folio is past the blocks we've got */
if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
nilfs_error(dir->i_sb,
"dir %lu size %lld exceeds block count %llu",
@@ -377,59 +342,83 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
}
} while (n != start);
out:
- return NULL;
+ return ERR_PTR(-ENOENT);
found:
- *res_page = page;
ei->i_dir_start_lookup = n;
return de;
}
-struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p)
+struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
{
- struct page *page = nilfs_get_page(dir, 0);
- struct nilfs_dir_entry *de = NULL;
+ struct folio *folio;
+ struct nilfs_dir_entry *de, *next_de;
+ size_t limit;
+ char *msg;
+
+ de = nilfs_get_folio(dir, 0, &folio);
+ if (IS_ERR(de))
+ return NULL;
+
+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
+ !nilfs_match(1, ".", de))) {
+ msg = "missing '.'";
+ goto fail;
+ }
- if (!IS_ERR(page)) {
- de = nilfs_next_entry(
- (struct nilfs_dir_entry *)page_address(page));
- *p = page;
+ next_de = nilfs_next_entry(de);
+ /*
+ * If "next_de" has not reached the end of the chunk, there is
+ * at least one more record. Check whether it matches "..".
+ */
+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
+ !nilfs_match(2, "..", next_de))) {
+ msg = "missing '..'";
+ goto fail;
}
- return de;
+ *foliop = folio;
+ return next_de;
+
+fail:
+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+ folio_release_kmap(folio, de);
+ return NULL;
}
-ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
+int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino)
{
- ino_t res = 0;
struct nilfs_dir_entry *de;
- struct page *page;
+ struct folio *folio;
- de = nilfs_find_entry(dir, qstr, &page);
- if (de) {
- res = le64_to_cpu(de->inode);
- kunmap(page);
- put_page(page);
- }
- return res;
+ de = nilfs_find_entry(dir, qstr, &folio);
+ if (IS_ERR(de))
+ return PTR_ERR(de);
+
+ *ino = le64_to_cpu(de->inode);
+ folio_release_kmap(folio, de);
+ return 0;
}
-/* Releases the page */
-void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
- struct page *page, struct inode *inode)
+int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
+ struct folio *folio, struct inode *inode)
{
- unsigned int from = (char *)de - (char *)page_address(page);
- unsigned int to = from + nilfs_rec_len_from_disk(de->rec_len);
- struct address_space *mapping = page->mapping;
+ size_t from = offset_in_folio(folio, de);
+ size_t to = from + nilfs_rec_len_from_disk(de->rec_len);
+ struct address_space *mapping = folio->mapping;
int err;
- lock_page(page);
- err = nilfs_prepare_chunk(page, from, to);
- BUG_ON(err);
+ folio_lock(folio);
+ err = nilfs_prepare_chunk(folio, from, to);
+ if (unlikely(err)) {
+ folio_unlock(folio);
+ return err;
+ }
de->inode = cpu_to_le64(inode->i_ino);
- nilfs_set_de_type(de, inode);
- nilfs_commit_chunk(page, mapping, from, to);
- nilfs_put_page(page);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
+ nilfs_commit_chunk(folio, mapping, from, to);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
+ return 0;
}
/*
@@ -443,31 +432,28 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
unsigned int chunk_size = nilfs_chunk_size(dir);
unsigned int reclen = NILFS_DIR_REC_LEN(namelen);
unsigned short rec_len, name_len;
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct nilfs_dir_entry *de;
unsigned long npages = dir_pages(dir);
unsigned long n;
- char *kaddr;
- unsigned int from, to;
+ size_t from, to;
int err;
/*
* We take care of directory expansion in the same loop.
- * This code plays outside i_size, so it locks the page
+ * This code plays outside i_size, so it locks the folio
* to protect that region.
*/
for (n = 0; n <= npages; n++) {
+ char *kaddr = nilfs_get_folio(dir, n, &folio);
char *dir_end;
- page = nilfs_get_page(dir, n);
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto out;
- lock_page(page);
- kaddr = page_address(page);
+ if (IS_ERR(kaddr))
+ return PTR_ERR(kaddr);
+ folio_lock(folio);
dir_end = kaddr + nilfs_last_byte(dir, n);
de = (struct nilfs_dir_entry *)kaddr;
- kaddr += PAGE_SIZE - reclen;
+ kaddr += folio_size(folio) - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
/* We hit i_size */
@@ -494,16 +480,16 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
goto got_it;
de = (struct nilfs_dir_entry *)((char *)de + rec_len);
}
- unlock_page(page);
- nilfs_put_page(page);
+ folio_unlock(folio);
+ folio_release_kmap(folio, kaddr);
}
BUG();
return -EINVAL;
got_it:
- from = (char *)de - (char *)page_address(page);
+ from = offset_in_folio(folio, de);
to = from + rec_len;
- err = nilfs_prepare_chunk(page, from, to);
+ err = nilfs_prepare_chunk(folio, from, to);
if (err)
goto out_unlock;
if (de->inode) {
@@ -517,30 +503,29 @@ got_it:
de->name_len = namelen;
memcpy(de->name, name, namelen);
de->inode = cpu_to_le64(inode->i_ino);
- nilfs_set_de_type(de, inode);
- nilfs_commit_chunk(page, page->mapping, from, to);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
+ nilfs_commit_chunk(folio, folio->mapping, from, to);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
nilfs_mark_inode_dirty(dir);
/* OFFSET_CACHE */
out_put:
- nilfs_put_page(page);
-out:
+ folio_release_kmap(folio, de);
return err;
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
goto out_put;
}
/*
* nilfs_delete_entry deletes a directory entry by merging it with the
- * previous entry. Page is up-to-date. Releases the page.
+ * previous entry. Folio is up-to-date.
*/
-int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
+int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct folio *folio)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
- char *kaddr = page_address(page);
- unsigned int from, to;
+ char *kaddr = (char *)((unsigned long)dir & ~(folio_size(folio) - 1));
+ size_t from, to;
struct nilfs_dir_entry *de, *pde = NULL;
int err;
@@ -559,17 +544,19 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
de = nilfs_next_entry(de);
}
if (pde)
- from = (char *)pde - (char *)page_address(page);
- lock_page(page);
- err = nilfs_prepare_chunk(page, from, to);
- BUG_ON(err);
+ from = (char *)pde - kaddr;
+ folio_lock(folio);
+ err = nilfs_prepare_chunk(folio, from, to);
+ if (unlikely(err)) {
+ folio_unlock(folio);
+ goto out;
+ }
if (pde)
pde->rec_len = nilfs_rec_len_to_disk(to - from);
dir->inode = 0;
- nilfs_commit_chunk(page, mapping, from, to);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ nilfs_commit_chunk(folio, mapping, from, to);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
out:
- nilfs_put_page(page);
return err;
}
@@ -579,39 +566,39 @@ out:
int nilfs_make_empty(struct inode *inode, struct inode *parent)
{
struct address_space *mapping = inode->i_mapping;
- struct page *page = grab_cache_page(mapping, 0);
+ struct folio *folio = filemap_grab_folio(mapping, 0);
unsigned int chunk_size = nilfs_chunk_size(inode);
struct nilfs_dir_entry *de;
int err;
void *kaddr;
- if (!page)
- return -ENOMEM;
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- err = nilfs_prepare_chunk(page, 0, chunk_size);
+ err = nilfs_prepare_chunk(folio, 0, chunk_size);
if (unlikely(err)) {
- unlock_page(page);
+ folio_unlock(folio);
goto fail;
}
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_folio(folio, 0);
memset(kaddr, 0, chunk_size);
de = (struct nilfs_dir_entry *)kaddr;
de->name_len = 1;
de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1));
memcpy(de->name, ".\0\0", 4);
de->inode = cpu_to_le64(inode->i_ino);
- nilfs_set_de_type(de, inode);
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1));
de->name_len = 2;
de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1));
de->inode = cpu_to_le64(parent->i_ino);
memcpy(de->name, "..\0", 4);
- nilfs_set_de_type(de, inode);
- kunmap_atomic(kaddr);
- nilfs_commit_chunk(page, mapping, 0, chunk_size);
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
+ kunmap_local(kaddr);
+ nilfs_commit_chunk(folio, mapping, 0, chunk_size);
fail:
- put_page(page);
+ folio_put(folio);
return err;
}
@@ -620,18 +607,17 @@ fail:
*/
int nilfs_empty_dir(struct inode *inode)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
+ char *kaddr;
unsigned long i, npages = dir_pages(inode);
for (i = 0; i < npages; i++) {
- char *kaddr;
struct nilfs_dir_entry *de;
- page = nilfs_get_page(inode, i);
- if (IS_ERR(page))
- continue;
+ kaddr = nilfs_get_folio(inode, i, &folio);
+ if (IS_ERR(kaddr))
+ return 0;
- kaddr = page_address(page);
de = (struct nilfs_dir_entry *)kaddr;
kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1);
@@ -657,12 +643,12 @@ int nilfs_empty_dir(struct inode *inode)
}
de = nilfs_next_entry(de);
}
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
}
return 1;
not_empty:
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return 0;
}
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index a35f2795b242..2d8dc6b35b54 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
dat = nilfs_bmap_get_dat(direct);
ret = nilfs_dat_translate(dat, ptr, &blocknr);
if (ret < 0)
- return ret;
+ goto dat_error;
ptr = blocknr;
}
@@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
if (dat) {
ret = nilfs_dat_translate(dat, ptr2, &blocknr);
if (ret < 0)
- return ret;
+ goto dat_error;
ptr2 = blocknr;
}
if (ptr2 != ptr + cnt)
@@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
}
*ptrp = ptr;
return cnt;
+
+ dat_error:
+ if (ret == -ENOENT)
+ ret = -EINVAL; /* Notify bmap layer of metadata corruption */
+ return ret;
}
static __u64
@@ -268,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
dat = nilfs_bmap_get_dat(bmap);
key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(bmap, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
+ return -EINVAL;
+
if (!buffer_nilfs_volatile(bh)) {
oldreq.pr_entry_nr = ptr;
newreq.pr_entry_nr = ptr;
@@ -314,6 +322,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = 0;
+ memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index a265d391ffe9..1b8d754db44d 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -45,34 +45,36 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vma->vm_file);
struct nilfs_transaction_info ti;
+ struct buffer_head *bh, *head;
int ret = 0;
if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
return VM_FAULT_SIGBUS; /* -ENOSPC */
sb_start_pagefault(inode->i_sb);
- lock_page(page);
- if (page->mapping != inode->i_mapping ||
- page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
- unlock_page(page);
+ folio_lock(folio);
+ if (folio->mapping != inode->i_mapping ||
+ folio_pos(folio) >= i_size_read(inode) ||
+ !folio_test_uptodate(folio)) {
+ folio_unlock(folio);
ret = -EFAULT; /* make the VM retry the fault */
goto out;
}
/*
- * check to see if the page is mapped already (no holes)
+ * check to see if the folio is mapped already (no holes)
*/
- if (PageMappedToDisk(page))
+ if (folio_test_mappedtodisk(folio))
goto mapped;
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
+ head = folio_buffers(folio);
+ if (head) {
int fully_mapped = 1;
- bh = head = page_buffers(page);
+ bh = head;
do {
if (!buffer_mapped(bh)) {
fully_mapped = 0;
@@ -81,11 +83,11 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
} while (bh = bh->b_this_page, bh != head);
if (fully_mapped) {
- SetPageMappedToDisk(page);
+ folio_set_mappedtodisk(folio);
goto mapped;
}
}
- unlock_page(page);
+ folio_unlock(folio);
/*
* fill hole blocks
@@ -105,10 +107,16 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- wait_for_stable_page(page);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ folio_wait_writeback(folio);
out:
sb_end_pagefault(inode->i_sb);
- return block_page_mkwrite_return(ret);
+ return vmf_fs_error(ret);
}
static const struct vm_operations_struct nilfs_file_vm_ops = {
@@ -117,10 +125,10 @@ static const struct vm_operations_struct nilfs_file_vm_ops = {
.page_mkwrite = nilfs_page_mkwrite,
};
-static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int nilfs_file_mmap_prepare(struct vm_area_desc *desc)
{
- file_accessed(file);
- vma->vm_ops = &nilfs_file_vm_ops;
+ file_accessed(desc->file);
+ desc->vm_ops = &nilfs_file_vm_ops;
return 0;
}
@@ -136,11 +144,11 @@ const struct file_operations nilfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = nilfs_compat_ioctl,
#endif /* CONFIG_COMPAT */
- .mmap = nilfs_file_mmap,
+ .mmap_prepare = nilfs_file_mmap_prepare,
.open = generic_file_open,
/* .release = nilfs_release_file, */
.fsync = nilfs_sync_file,
- .splice_read = generic_file_splice_read,
+ .splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
};
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index b0d22ff24b67..561c220799c7 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -46,14 +46,11 @@
* specified by @pbn to the GC pagecache with the key @blkoff.
* This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer.
*
- * Return Value: On success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - The block specified with @pbn does not exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - The block specified with @pbn does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
sector_t pbn, __u64 vbn,
@@ -73,10 +70,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
- if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
- brelse(bh);
+ if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */
goto failed;
- }
}
lock_buffer(bh);
@@ -85,10 +80,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
goto out;
}
- if (!buffer_mapped(bh)) {
- bh->b_bdev = inode->i_sb->s_bdev;
+ if (!buffer_mapped(bh))
set_buffer_mapped(bh);
- }
bh->b_blocknr = pbn;
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
@@ -100,8 +93,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
*out_bh = bh;
failed:
- unlock_page(bh->b_page);
- put_page(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
+ if (unlikely(err))
+ brelse(bh);
return err;
}
@@ -116,12 +111,11 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
* specified by @vbn to the GC pagecache. @pbn can be supplied by the
* caller to avoid translation of the disk block address.
*
- * Return Value: On success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Invalid virtual block address.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
__u64 vbn, struct buffer_head **out_bh)
@@ -140,7 +134,7 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
{
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- struct inode *inode = bh->b_page->mapping->host;
+ struct inode *inode = bh->b_folio->mapping->host;
nilfs_err(inode->i_sb,
"I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)",
@@ -165,7 +159,7 @@ int nilfs_init_gcinode(struct inode *inode)
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- inode->i_mapping->a_ops = &empty_aops;
+ inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
ii->i_flags = 0;
nilfs_bmap_init_gc(ii->i_bmap);
@@ -175,6 +169,7 @@ int nilfs_init_gcinode(struct inode *inode)
/**
* nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes
+ * @nilfs: NILFS filesystem instance
*/
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
{
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index a8a4bc8490b4..99eb8a59009e 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -15,6 +15,7 @@
#include "mdt.h"
#include "alloc.h"
#include "ifile.h"
+#include "cpfile.h"
/**
* struct nilfs_ifile_info - on-memory private data of ifile
@@ -37,17 +38,16 @@ static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile)
* @out_ino: pointer to a variable to store inode number
* @out_bh: buffer_head contains newly allocated disk inode
*
- * Return Value: On success, 0 is returned and the newly allocated inode
- * number is stored in the place pointed by @ino, and buffer_head pointer
- * that contains newly allocated disk inode structure is stored in the
- * place pointed by @out_bh
- * On error, one of the following negative error codes is returned.
+ * nilfs_ifile_create_inode() allocates a new inode in the ifile metadata
+ * file and stores the inode number in the variable pointed to by @out_ino,
+ * as well as storing the ifile's buffer with the disk inode in the location
+ * pointed to by @out_bh.
*
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOSPC - No inode left.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No inode left.
*/
int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
struct buffer_head **out_bh)
@@ -55,13 +55,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
struct nilfs_palloc_req req;
int ret;
- req.pr_entry_nr = 0; /*
- * 0 says find free inode from beginning
- * of a group. dull code!!
- */
+ req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb);
req.pr_entry_bh = NULL;
- ret = nilfs_palloc_prepare_alloc_entry(ifile, &req);
+ ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false);
if (!ret) {
ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1,
&req.pr_entry_bh);
@@ -85,14 +82,11 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
* @ifile: ifile inode
* @ino: inode number
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - The inode number @ino have not been allocated.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Inode number unallocated.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
{
@@ -100,7 +94,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
.pr_entry_nr = ino, .pr_entry_bh = NULL
};
struct nilfs_inode *raw_inode;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_palloc_prepare_free_entry(ifile, &req);
@@ -115,11 +109,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
return ret;
}
- kaddr = kmap_atomic(req.pr_entry_bh->b_page);
- raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
- req.pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(ifile, req.pr_entry_nr,
+ req.pr_entry_bh);
+ raw_inode = kmap_local_folio(req.pr_entry_bh->b_folio, offset);
raw_inode->i_flags = 0;
- kunmap_atomic(kaddr);
+ kunmap_local(raw_inode);
mark_buffer_dirty(req.pr_entry_bh);
brelse(req.pr_entry_bh);
@@ -152,6 +146,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
* @ifile: ifile inode
* @nmaxinodes: current maximum of available inodes count [out]
* @nfreeinodes: free inodes count [out]
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_ifile_count_free_inodes(struct inode *ifile,
u64 *nmaxinodes, u64 *nfreeinodes)
@@ -173,21 +169,26 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile,
* nilfs_ifile_read - read or get ifile inode
* @sb: super block instance
* @root: root object
+ * @cno: number of checkpoint entry to read
* @inode_size: size of an inode
- * @raw_inode: on-disk ifile inode
- * @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
*/
int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
- size_t inode_size, struct nilfs_inode *raw_inode,
- struct inode **inodep)
+ __u64 cno, size_t inode_size)
{
+ struct the_nilfs *nilfs;
struct inode *ifile;
int err;
ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO);
if (unlikely(!ifile))
return -ENOMEM;
- if (!(ifile->i_state & I_NEW))
+ if (!(inode_state_read_once(ifile) & I_NEW))
goto out;
err = nilfs_mdt_init(ifile, NILFS_MDT_GFP,
@@ -201,13 +202,13 @@ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache);
- err = nilfs_read_inode_common(ifile, raw_inode);
+ nilfs = sb->s_fs_info;
+ err = nilfs_cpfile_read_checkpoint(nilfs->ns_cpfile, cno, root, ifile);
if (err)
goto failed;
unlock_new_inode(ifile);
out:
- *inodep = ifile;
return 0;
failed:
iget_failed(ifile);
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index 35c5273f4821..5d116a566d9e 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -21,15 +21,14 @@
static inline struct nilfs_inode *
nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
{
- void *kaddr = kmap(ibh->b_page);
+ size_t __offset_in_folio = nilfs_palloc_entry_offset(ifile, ino, ibh);
- return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr);
+ return kmap_local_folio(ibh->b_folio, __offset_in_folio);
}
-static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino,
- struct buffer_head *ibh)
+static inline void nilfs_ifile_unmap_inode(struct nilfs_inode *raw_inode)
{
- kunmap(ibh->b_page);
+ kunmap_local(raw_inode);
}
int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
@@ -39,7 +38,6 @@ int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *);
int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
- size_t inode_size, struct nilfs_inode *raw_inode,
- struct inode **inodep);
+ __u64 cno, size_t inode_size);
#endif /* _NILFS_IFILE_H */
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 232dd7b6cca1..51bde45d5865 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/uio.h>
#include <linux/fiemap.h>
+#include <linux/random.h>
#include "nilfs.h"
#include "btnode.h"
#include "segment.h"
@@ -28,17 +29,13 @@
* @ino: inode number
* @cno: checkpoint number
* @root: pointer on NILFS root object (mounted checkpoint)
- * @for_gc: inode for GC flag
- * @for_btnc: inode for B-tree node cache flag
- * @for_shadow: inode for shadowed page cache flag
+ * @type: inode type
*/
struct nilfs_iget_args {
u64 ino;
__u64 cno;
struct nilfs_root *root;
- bool for_gc;
- bool for_btnc;
- bool for_shadow;
+ unsigned int type;
};
static int nilfs_iget_test(struct inode *inode, void *opaque);
@@ -71,6 +68,8 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
*
* This function does not issue actual read request of the specified data
* block. It is done by VFS.
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_get_block(struct inode *inode, sector_t blkoff,
struct buffer_head *bh_result, int create)
@@ -112,7 +111,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
"%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
__func__, inode->i_ino,
(unsigned long long)blkoff);
- err = 0;
+ err = -EAGAIN;
}
nilfs_transaction_abort(inode->i_sb);
goto out;
@@ -144,6 +143,8 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
* address_space_operations.
* @file: file struct of the file to be read
* @folio: the folio to be read
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_read_folio(struct file *file, struct folio *folio)
{
@@ -162,7 +163,7 @@ static int nilfs_writepages(struct address_space *mapping,
int err = 0;
if (sb_rdonly(inode->i_sb)) {
- nilfs_clear_dirty_pages(mapping, false);
+ nilfs_clear_dirty_pages(mapping);
return -EROFS;
}
@@ -173,36 +174,6 @@ static int nilfs_writepages(struct address_space *mapping,
return err;
}
-static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- int err;
-
- if (sb_rdonly(inode->i_sb)) {
- /*
- * It means that filesystem was remounted in read-only
- * mode because of error or metadata corruption. But we
- * have dirty pages that try to be flushed in background.
- * So, here we simply discard this dirty page.
- */
- nilfs_clear_dirty_page(page, false);
- unlock_page(page);
- return -EROFS;
- }
-
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- err = nilfs_construct_segment(inode->i_sb);
- if (unlikely(err))
- return err;
- } else if (wbc->for_reclaim)
- nilfs_flush_segment(inode->i_sb, inode->i_ino);
-
- return 0;
-}
-
static bool nilfs_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
@@ -214,7 +185,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping,
/*
* The page may not be locked, eg if called from try_to_unmap_one()
*/
- spin_lock(&mapping->private_lock);
+ spin_lock(&mapping->i_private_lock);
head = folio_buffers(folio);
if (head) {
struct buffer_head *bh = head;
@@ -230,7 +201,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping,
} else if (ret) {
nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
}
- spin_unlock(&mapping->private_lock);
+ spin_unlock(&mapping->i_private_lock);
if (nr_dirty)
nilfs_set_file_dirty(inode, nr_dirty);
@@ -247,9 +218,10 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int nilfs_write_begin(struct file *file, struct address_space *mapping,
+static int nilfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
@@ -258,7 +230,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
if (unlikely(err))
return err;
- err = block_write_begin(mapping, pos, len, pagep, nilfs_get_block);
+ err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block);
if (unlikely(err)) {
nilfs_write_failed(mapping, pos + len);
nilfs_transaction_abort(inode->i_sb);
@@ -266,18 +238,19 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
return err;
}
-static int nilfs_write_end(struct file *file, struct address_space *mapping,
+static int nilfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
unsigned int start = pos & (PAGE_SIZE - 1);
unsigned int nr_dirty;
int err;
- nr_dirty = nilfs_page_count_clean_buffers(page, start,
+ nr_dirty = nilfs_page_count_clean_buffers(folio, start,
start + copied);
- copied = generic_write_end(file, mapping, pos, len, copied, page,
+ copied = generic_write_end(iocb, mapping, pos, len, copied, folio,
fsdata);
nilfs_set_file_dirty(inode, nr_dirty);
err = nilfs_transaction_commit(inode->i_sb);
@@ -297,7 +270,6 @@ nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
const struct address_space_operations nilfs_aops = {
- .writepage = nilfs_writepage,
.read_folio = nilfs_read_folio,
.writepages = nilfs_writepages,
.dirty_folio = nilfs_dirty_folio,
@@ -306,16 +278,20 @@ const struct address_space_operations nilfs_aops = {
.write_end = nilfs_write_end,
.invalidate_folio = block_invalidate_folio,
.direct_IO = nilfs_direct_IO,
+ .migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
};
+const struct address_space_operations nilfs_buffer_cache_aops = {
+ .invalidate_folio = block_invalidate_folio,
+};
+
static int nilfs_insert_inode_locked(struct inode *inode,
struct nilfs_root *root,
unsigned long ino)
{
struct nilfs_iget_args args = {
- .ino = ino, .root = root, .cno = 0, .for_gc = false,
- .for_btnc = false, .for_shadow = false
+ .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
};
return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
@@ -324,7 +300,6 @@ static int nilfs_insert_inode_locked(struct inode *inode,
struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
{
struct super_block *sb = dir->i_sb;
- struct the_nilfs *nilfs = sb->s_fs_info;
struct inode *inode;
struct nilfs_inode_info *ii;
struct nilfs_root *root;
@@ -342,31 +317,19 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
root = NILFS_I(dir)->i_root;
ii = NILFS_I(inode);
ii->i_state = BIT(NILFS_I_NEW);
+ ii->i_type = NILFS_I_TYPE_NORMAL;
ii->i_root = root;
err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
if (unlikely(err))
goto failed_ifile_create_inode;
/* reference count of i_bh inherits from nilfs_mdt_read_block() */
-
- if (unlikely(ino < NILFS_USER_INO)) {
- nilfs_warn(sb,
- "inode bitmap is inconsistent for reserved inodes");
- do {
- brelse(bh);
- err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
- if (unlikely(err))
- goto failed_ifile_create_inode;
- } while (ino < NILFS_USER_INO);
-
- nilfs_info(sb, "repaired inode bitmap for reserved inodes");
- }
ii->i_bh = bh;
atomic64_inc(&root->inodes_count);
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = ino;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ simple_inode_init_ts(inode);
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
err = nilfs_bmap_read(ii->i_bmap, NULL);
@@ -384,9 +347,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
/* ii->i_dir_acl = 0; */
ii->i_dir_start_lookup = 0;
nilfs_set_inode_flags(inode);
- spin_lock(&nilfs->ns_next_gen_lock);
- inode->i_generation = nilfs->ns_next_generation++;
- spin_unlock(&nilfs->ns_next_gen_lock);
+ inode->i_generation = get_random_u32();
if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
err = -EIO;
goto failed_after_creation;
@@ -404,7 +365,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
failed_after_creation:
clear_nlink(inode);
- if (inode->i_state & I_NEW)
+ if (inode_state_read_once(inode) & I_NEW)
unlock_new_inode(inode);
iput(inode); /*
* raw_inode will be deleted through
@@ -449,12 +410,12 @@ int nilfs_read_inode_common(struct inode *inode,
i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
inode->i_size = le64_to_cpu(raw_inode->i_size);
- inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
- inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
- inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
- inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
- inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
- inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+ inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime),
+ le32_to_cpu(raw_inode->i_mtime_nsec));
+ inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
+ le32_to_cpu(raw_inode->i_ctime_nsec));
+ inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime),
+ le32_to_cpu(raw_inode->i_mtime_nsec));
if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
return -EIO; /* this inode is for metadata and corrupted */
if (inode->i_nlink == 0)
@@ -513,13 +474,20 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
- } else {
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &nilfs_special_inode_operations;
init_special_inode(
inode, inode->i_mode,
huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
+ } else {
+ nilfs_error(sb,
+ "invalid file type bits in mode 0%o for inode %lu",
+ inode->i_mode, ino);
+ err = -EIO;
+ goto failed_unmap;
}
- nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+ nilfs_ifile_unmap_inode(raw_inode);
brelse(bh);
up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
nilfs_set_inode_flags(inode);
@@ -528,7 +496,7 @@ static int __nilfs_read_inode(struct super_block *sb,
return 0;
failed_unmap:
- nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+ nilfs_ifile_unmap_inode(raw_inode);
brelse(bh);
bad_inode:
@@ -545,23 +513,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque)
return 0;
ii = NILFS_I(inode);
- if (test_bit(NILFS_I_BTNC, &ii->i_state)) {
- if (!args->for_btnc)
- return 0;
- } else if (args->for_btnc) {
+ if (ii->i_type != args->type)
return 0;
- }
- if (test_bit(NILFS_I_SHADOW, &ii->i_state)) {
- if (!args->for_shadow)
- return 0;
- } else if (args->for_shadow) {
- return 0;
- }
-
- if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
- return !args->for_gc;
- return args->for_gc && args->cno == ii->i_cno;
+ return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno;
}
static int nilfs_iget_set(struct inode *inode, void *opaque)
@@ -571,15 +526,9 @@ static int nilfs_iget_set(struct inode *inode, void *opaque)
inode->i_ino = args->ino;
NILFS_I(inode)->i_cno = args->cno;
NILFS_I(inode)->i_root = args->root;
+ NILFS_I(inode)->i_type = args->type;
if (args->root && args->ino == NILFS_ROOT_INO)
nilfs_get_root(args->root);
-
- if (args->for_gc)
- NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE);
- if (args->for_btnc)
- NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC);
- if (args->for_shadow)
- NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW);
return 0;
}
@@ -587,8 +536,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
unsigned long ino)
{
struct nilfs_iget_args args = {
- .ino = ino, .root = root, .cno = 0, .for_gc = false,
- .for_btnc = false, .for_shadow = false
+ .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
};
return ilookup5(sb, ino, nilfs_iget_test, &args);
@@ -598,8 +546,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
unsigned long ino)
{
struct nilfs_iget_args args = {
- .ino = ino, .root = root, .cno = 0, .for_gc = false,
- .for_btnc = false, .for_shadow = false
+ .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
};
return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
@@ -614,8 +561,14 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
inode = nilfs_iget_locked(sb, root, ino);
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+
+ if (!(inode_state_read_once(inode) & I_NEW)) {
+ if (!inode->i_nlink) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
return inode;
+ }
err = __nilfs_read_inode(sb, root, ino, inode);
if (unlikely(err)) {
@@ -630,8 +583,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
__u64 cno)
{
struct nilfs_iget_args args = {
- .ino = ino, .root = NULL, .cno = cno, .for_gc = true,
- .for_btnc = false, .for_shadow = false
+ .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC
};
struct inode *inode;
int err;
@@ -639,7 +591,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (!(inode_state_read_once(inode) & I_NEW))
return inode;
err = nilfs_init_gcinode(inode);
@@ -659,10 +611,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
* or does nothing if the inode already has it. This function allocates
* an additional inode to maintain page cache of B-tree nodes one-on-one.
*
- * Return Value: On success, 0 is returned. On errors, one of the following
- * negative error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or %-ENOMEM if memory is insufficient.
*/
int nilfs_attach_btree_node_cache(struct inode *inode)
{
@@ -676,15 +625,13 @@ int nilfs_attach_btree_node_cache(struct inode *inode)
args.ino = inode->i_ino;
args.root = ii->i_root;
args.cno = ii->i_cno;
- args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0;
- args.for_btnc = true;
- args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0;
+ args.type = ii->i_type | NILFS_I_TYPE_BTNC;
btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
nilfs_iget_set, &args);
if (unlikely(!btnc_inode))
return -ENOMEM;
- if (btnc_inode->i_state & I_NEW) {
+ if (inode_state_read_once(btnc_inode) & I_NEW) {
nilfs_init_btnc_inode(btnc_inode);
unlock_new_inode(btnc_inode);
}
@@ -723,17 +670,14 @@ void nilfs_detach_btree_node_cache(struct inode *inode)
* in one inode and the one for b-tree node pages is set up in the
* other inode, which is attached to the former inode.
*
- * Return Value: On success, a pointer to the inode for data pages is
- * returned. On errors, one of the following negative error code is returned
- * in a pointer type.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: a pointer to the inode for data pages on success, or %-ENOMEM
+ * if memory is insufficient.
*/
struct inode *nilfs_iget_for_shadow(struct inode *inode)
{
struct nilfs_iget_args args = {
- .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false,
- .for_btnc = false, .for_shadow = true
+ .ino = inode->i_ino, .root = NULL, .cno = 0,
+ .type = NILFS_I_TYPE_SHADOW
};
struct inode *s_inode;
int err;
@@ -742,12 +686,13 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode)
nilfs_iget_set, &args);
if (unlikely(!s_inode))
return ERR_PTR(-ENOMEM);
- if (!(s_inode->i_state & I_NEW))
+ if (!(inode_state_read_once(s_inode) & I_NEW))
return inode;
NILFS_I(s_inode)->i_flags = 0;
memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
+ s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
err = nilfs_attach_btree_node_cache(s_inode);
if (unlikely(err)) {
@@ -758,8 +703,18 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode)
return s_inode;
}
+/**
+ * nilfs_write_inode_common - export common inode information to on-disk inode
+ * @inode: inode object
+ * @raw_inode: on-disk inode
+ *
+ * This function writes standard information from the on-memory inode @inode
+ * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap
+ * data is not exported, nilfs_bmap_write() must be called separately during
+ * log writing.
+ */
void nilfs_write_inode_common(struct inode *inode,
- struct nilfs_inode *raw_inode, int has_bmap)
+ struct nilfs_inode *raw_inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
@@ -768,30 +723,15 @@ void nilfs_write_inode_common(struct inode *inode,
raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le64(inode->i_size);
- raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
- raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
- raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
+ raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
raw_inode->i_flags = cpu_to_le32(ii->i_flags);
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
- if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
- struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
-
- /* zero-fill unused portion in the case of super root block */
- raw_inode->i_xattr = 0;
- raw_inode->i_pad = 0;
- memset((void *)raw_inode + sizeof(*raw_inode), 0,
- nilfs->ns_inode_size - sizeof(*raw_inode));
- }
-
- if (has_bmap)
- nilfs_bmap_write(ii->i_bmap, raw_inode);
- else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
- raw_inode->i_device_code =
- cpu_to_le64(huge_encode_dev(inode->i_rdev));
/*
* When extending inode, nilfs->ns_inode_size should be checked
* for substitutions of appended fields.
@@ -812,14 +752,13 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
if (flags & I_DIRTY_DATASYNC)
set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
- nilfs_write_inode_common(inode, raw_inode, 0);
- /*
- * XXX: call with has_bmap = 0 is a workaround to avoid
- * deadlock of bmap. This delays update of i_bmap to just
- * before writing.
- */
+ nilfs_write_inode_common(inode, raw_inode);
- nilfs_ifile_unmap_inode(ifile, ino, ibh);
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ raw_inode->i_device_code =
+ cpu_to_le64(huge_encode_dev(inode->i_rdev));
+
+ nilfs_ifile_unmap_inode(raw_inode);
}
#define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */
@@ -875,7 +814,7 @@ void nilfs_truncate(struct inode *inode)
nilfs_truncate_bmap(ii, blkoff);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
@@ -905,7 +844,7 @@ static void nilfs_clear_inode(struct inode *inode)
if (test_bit(NILFS_I_BMAP, &ii->i_state))
nilfs_bmap_clear(ii->i_bmap);
- if (!test_bit(NILFS_I_BTNC, &ii->i_state))
+ if (!(ii->i_type & NILFS_I_TYPE_BTNC))
nilfs_detach_btree_node_cache(inode);
if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
@@ -917,6 +856,7 @@ void nilfs_evict_inode(struct inode *inode)
struct nilfs_transaction_info ti;
struct super_block *sb = inode->i_sb;
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs;
int ret;
if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
@@ -929,6 +869,23 @@ void nilfs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
+ nilfs = sb->s_fs_info;
+ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
+ /*
+ * If this inode is about to be disposed after the file system
+ * has been degraded to read-only due to file system corruption
+ * or after the writer has been detached, do not make any
+ * changes that cause writes, just clear it.
+ * Do this check after read-locking ns_segctor_sem by
+ * nilfs_transaction_begin() in order to avoid a race with
+ * the writer detach operation.
+ */
+ clear_inode(inode);
+ nilfs_clear_inode(inode);
+ nilfs_transaction_abort(sb);
+ return;
+ }
+
/* TODO: some of the following operations may fail. */
nilfs_truncate_bmap(ii, 0);
nilfs_mark_inode_dirty(inode);
@@ -949,7 +906,7 @@ void nilfs_evict_inode(struct inode *inode)
*/
}
-int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct nilfs_transaction_info ti;
@@ -957,7 +914,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct super_block *sb = inode->i_sb;
int err;
- err = setattr_prepare(&init_user_ns, dentry, iattr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (err)
return err;
@@ -972,7 +929,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
nilfs_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE) {
@@ -988,7 +945,7 @@ out_err:
return err;
}
-int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -997,7 +954,7 @@ int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
root->cno != NILFS_CPTREE_CURRENT_CNO)
return -EROFS; /* snapshot is not writable */
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
@@ -1007,7 +964,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
int err;
spin_lock(&nilfs->ns_inode_lock);
- if (ii->i_bh == NULL) {
+ if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
spin_unlock(&nilfs->ns_inode_lock);
err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
inode->i_ino, pbh);
@@ -1016,7 +973,10 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
spin_lock(&nilfs->ns_inode_lock);
if (ii->i_bh == NULL)
ii->i_bh = *pbh;
- else {
+ else if (unlikely(!buffer_uptodate(ii->i_bh))) {
+ __brelse(ii->i_bh);
+ ii->i_bh = *pbh;
+ } else {
brelse(*pbh);
*pbh = ii->i_bh;
}
@@ -1083,9 +1043,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
struct buffer_head *ibh;
int err;
+ /*
+ * Do not dirty inodes after the log writer has been detached
+ * and its nilfs_root struct has been freed.
+ */
+ if (unlikely(nilfs_purging(nilfs)))
+ return 0;
+
err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) {
nilfs_warn(inode->i_sb,
@@ -1227,7 +1195,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (size) {
if (phys && blkphy << blkbits == phys + size) {
/* The current extent goes on */
- size += n << blkbits;
+ size += (u64)n << blkbits;
} else {
/* Terminate the current extent */
ret = fiemap_fill_next_extent(
@@ -1240,14 +1208,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags = FIEMAP_EXTENT_MERGED;
logical = blkoff << blkbits;
phys = blkphy << blkbits;
- size = n << blkbits;
+ size = (u64)n << blkbits;
}
} else {
/* Start a new extent */
flags = FIEMAP_EXTENT_MERGED;
logical = blkoff << blkbits;
phys = blkphy << blkbits;
- size = n << blkbits;
+ size = (u64)n << blkbits;
}
blkoff += n;
}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 87e1004b606d..e17b8da66491 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -17,6 +17,7 @@
#include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */
#include <linux/buffer_head.h>
#include <linux/fileattr.h>
+#include <linux/string.h>
#include "nilfs.h"
#include "segment.h"
#include "bmap.h"
@@ -32,17 +33,14 @@
* @dofunc: concrete function of get/set metadata info
*
* Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of
- * calling dofunc() function on the basis of @argv argument.
- *
- * Return Value: On success, 0 is returned and requested metadata info
- * is copied into userspace. On error, one of the following
- * negative error codes is returned.
- *
- * %-EINVAL - Invalid arguments from userspace.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EFAULT - Failure during execution of requested operation.
+ * calling dofunc() function on the basis of @argv argument. If successful,
+ * the requested metadata information is copied to userspace memory.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during execution of requested operation.
+ * * %-EINVAL - Invalid arguments from userspace.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
struct nilfs_argv *argv, int dir,
@@ -51,7 +49,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
void *, size_t, size_t))
{
void *buf;
- void __user *base = (void __user *)(unsigned long)argv->v_base;
+ void __user *base = u64_to_user_ptr(argv->v_base);
size_t maxmembs, total, n;
ssize_t nr;
int ret, i;
@@ -60,7 +58,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
if (argv->v_nmembs == 0)
return 0;
- if (argv->v_size > PAGE_SIZE)
+ if ((size_t)argv->v_size > PAGE_SIZE)
return -EINVAL;
/*
@@ -71,7 +69,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
if (argv->v_index > ~(__u64)0 - argv->v_nmembs)
return -EINVAL;
- buf = (void *)__get_free_pages(GFP_NOFS, 0);
+ buf = (void *)get_zeroed_page(GFP_NOFS);
if (unlikely(!buf))
return -ENOMEM;
maxmembs = PAGE_SIZE / argv->v_size;
@@ -114,9 +112,13 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
}
/**
- * nilfs_fileattr_get - ioctl to support lsattr
+ * nilfs_fileattr_get - retrieve miscellaneous file attributes
+ * @dentry: the object to retrieve from
+ * @fa: fileattr pointer
+ *
+ * Return: always 0 as success.
*/
-int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -126,10 +128,15 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
}
/**
- * nilfs_fileattr_set - ioctl to support chattr
+ * nilfs_fileattr_set - change miscellaneous file attributes
+ * @idmap: idmap of the mount
+ * @dentry: the object to change
+ * @fa: fileattr pointer
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
-int nilfs_fileattr_set(struct user_namespace *mnt_userns,
- struct dentry *dentry, struct fileattr *fa)
+int nilfs_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa)
{
struct inode *inode = d_inode(dentry);
struct nilfs_transaction_info ti;
@@ -149,7 +156,7 @@ int nilfs_fileattr_set(struct user_namespace *mnt_userns,
NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE);
nilfs_set_inode_flags(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
@@ -159,6 +166,10 @@ int nilfs_fileattr_set(struct user_namespace *mnt_userns,
/**
* nilfs_ioctl_getversion - get info about a file's version (generation number)
+ * @inode: inode object
+ * @argp: userspace memory where the generation number of @inode is stored
+ *
+ * Return: 0 on success, or %-EFAULT on error.
*/
static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
{
@@ -176,13 +187,10 @@ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
* given checkpoint between checkpoint and snapshot state. This ioctl
* is used in chcp and mkcp utilities.
*
- * Return Value: On success, 0 is returned and mode of a checkpoint is
- * changed. On error, one of the following negative error codes
- * is returned.
- *
- * %-EPERM - Operation not permitted.
- *
- * %-EFAULT - Failure during checkpoint mode changing.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * %-EFAULT - Failure during checkpoint mode changing.
+ * %-EPERM - Operation not permitted.
*/
static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -230,13 +238,10 @@ out:
* checkpoint from NILFS2 file system. This ioctl is used in rmcp
* utility.
*
- * Return Value: On success, 0 is returned and a checkpoint is
- * removed. On error, one of the following negative error codes
- * is returned.
- *
- * %-EPERM - Operation not permitted.
- *
- * %-EFAULT - Failure during checkpoint removing.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * %-EFAULT - Failure during checkpoint removing.
+ * %-EPERM - Operation not permitted.
*/
static int
nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
@@ -282,7 +287,7 @@ out:
* requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in
* lscp utility and by nilfs_cleanerd daemon.
*
- * Return value: count of nilfs_cpinfo structures in output buffer.
+ * Return: Count of nilfs_cpinfo structures in output buffer.
*/
static ssize_t
nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
@@ -306,17 +311,14 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
*
* Description: nilfs_ioctl_get_cpstat() returns information about checkpoints.
* The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities
- * and by nilfs_cleanerd daemon.
- *
- * Return Value: On success, 0 is returned, and checkpoints information is
- * copied into userspace pointer @argp. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EFAULT - Failure during getting checkpoints statistics.
+ * and by nilfs_cleanerd daemon. The checkpoint statistics are copied to
+ * the userspace memory pointed to by @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during getting checkpoints statistics.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -349,7 +351,8 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
* info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used
* in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon.
*
- * Return value: count of nilfs_suinfo structures in output buffer.
+ * Return: Count of nilfs_suinfo structures in output buffer on success,
+ * or a negative error code on failure.
*/
static ssize_t
nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
@@ -373,17 +376,14 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
*
* Description: nilfs_ioctl_get_sustat() returns segment usage statistics.
* The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities
- * and by nilfs_cleanerd daemon.
- *
- * Return Value: On success, 0 is returned, and segment usage information is
- * copied into userspace pointer @argp. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EFAULT - Failure during getting segment usage statistics.
+ * and by nilfs_cleanerd daemon. The requested segment usage information is
+ * copied to the userspace memory pointed to by @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during getting segment usage statistics.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -416,7 +416,8 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
* on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used
* by nilfs_cleanerd daemon.
*
- * Return value: count of nilfs_vinfo structures in output buffer.
+ * Return: Count of nilfs_vinfo structures in output buffer on success, or
+ * a negative error code on failure.
*/
static ssize_t
nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
@@ -443,7 +444,8 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
* about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
* is used by nilfs_cleanerd daemon.
*
- * Return value: count of nilfs_bdescs structures in output buffer.
+ * Return: Count of nilfs_bdescs structures in output buffer on success, or
+ * a negative error code on failure.
*/
static ssize_t
nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
@@ -480,19 +482,15 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
*
* Description: nilfs_ioctl_do_get_bdescs() function returns information
* about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
- * is used by nilfs_cleanerd daemon.
- *
- * Return Value: On success, 0 is returned, and disk block descriptors are
- * copied into userspace pointer @argp. On error, one of the following
- * negative error codes is returned.
- *
- * %-EINVAL - Invalid arguments from userspace.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EFAULT - Failure during getting disk block descriptors.
+ * is used by nilfs_cleanerd daemon. If successful, disk block descriptors
+ * are copied to userspace pointer @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during getting disk block descriptors.
+ * * %-EINVAL - Invalid arguments from userspace.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -526,16 +524,12 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
* Description: nilfs_ioctl_move_inode_block() function registers data/node
* buffer in the GC pagecache and submit read request.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - Requested block doesn't exist.
- *
- * %-EEXIST - Blocks conflict is detected.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EEXIST - Block conflict detected.
+ * * %-EIO - I/O error.
+ * * %-ENOENT - Requested block doesn't exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_move_inode_block(struct inode *inode,
struct nilfs_vdesc *vdesc,
@@ -590,8 +584,8 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
* blocks that garbage collector specified with the array of nilfs_vdesc
* structures and stores them into page caches of GC inodes.
*
- * Return Value: Number of processed nilfs_vdesc structures or
- * error code, otherwise.
+ * Return: Number of processed nilfs_vdesc structures on success, or
+ * a negative error code on failure.
*/
static int nilfs_ioctl_move_blocks(struct super_block *sb,
struct nilfs_argv *argv, void *buf)
@@ -668,14 +662,11 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
* in the period from p_start to p_end, excluding p_end itself. The checkpoints
* which have been already deleted are ignored.
*
- * Return Value: Number of processed nilfs_period structures or
- * error code, otherwise.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - invalid checkpoints.
+ * Return: Number of processed nilfs_period structures on success, or one of
+ * the following negative error codes on failure:
+ * * %-EINVAL - invalid checkpoints.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
@@ -703,14 +694,11 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
* Description: nilfs_ioctl_free_vblocknrs() function frees
* the virtual block numbers specified by @buf and @argv->v_nmembs.
*
- * Return Value: Number of processed virtual block numbers or
- * error code, otherwise.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - The virtual block number have not been allocated.
+ * Return: Number of processed virtual block numbers on success, or one of the
+ * following negative error codes on failure:
+ * * %-EIO - I/O error.
+ * * %-ENOENT - Unallocated virtual block number.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
@@ -732,14 +720,11 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
* Description: nilfs_ioctl_mark_blocks_dirty() function marks
* metadata file or data blocks as dirty.
*
- * Return Value: Number of processed block descriptors or
- * error code, otherwise.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-EIO - I/O error
- *
- * %-ENOENT - the specified block does not exist (hole block)
+ * Return: Number of processed block descriptors on success, or one of the
+ * following negative error codes on failure:
+ * * %-EIO - I/O error.
+ * * %-ENOENT - Non-existent block (hole block).
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
@@ -838,7 +823,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
* from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by
* nilfs_cleanerd daemon.
*
- * Return Value: On success, 0 is returned or error code, otherwise.
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -851,7 +836,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
sizeof(struct nilfs_bdesc),
sizeof(__u64),
};
- void __user *base;
void *kbufs[5];
struct the_nilfs *nilfs;
size_t len, nsegs;
@@ -872,16 +856,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
nsegs = argv[4].v_nmembs;
if (argv[4].v_size != argsz[4])
goto out;
- if (nsegs > UINT_MAX / sizeof(__u64))
- goto out;
/*
* argv[4] points to segment numbers this ioctl cleans. We
- * use kmalloc() for its buffer because memory used for the
- * segment numbers is enough small.
+ * use kmalloc() for its buffer because the memory used for the
+ * segment numbers is small enough.
*/
- kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base,
- nsegs * sizeof(__u64));
+ kbufs[4] = memdup_array_user(u64_to_user_ptr(argv[4].v_base),
+ nsegs, sizeof(__u64));
if (IS_ERR(kbufs[4])) {
ret = PTR_ERR(kbufs[4]);
goto out;
@@ -900,20 +882,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
goto out_free;
len = argv[n].v_size * argv[n].v_nmembs;
- base = (void __user *)(unsigned long)argv[n].v_base;
if (len == 0) {
kbufs[n] = NULL;
continue;
}
- kbufs[n] = vmalloc(len);
- if (!kbufs[n]) {
- ret = -ENOMEM;
- goto out_free;
- }
- if (copy_from_user(kbufs[n], base, len)) {
- ret = -EFAULT;
- vfree(kbufs[n]);
+ kbufs[n] = vmemdup_user(u64_to_user_ptr(argv[n].v_base), len);
+ if (IS_ERR(kbufs[n])) {
+ ret = PTR_ERR(kbufs[n]);
goto out_free;
}
}
@@ -945,7 +921,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
out_free:
while (--n >= 0)
- vfree(kbufs[n]);
+ kvfree(kbufs[n]);
kfree(kbufs[4]);
out:
mnt_drop_write_file(filp);
@@ -964,20 +940,14 @@ out:
* and metadata are written out to the device when it successfully
* returned.
*
- * Return Value: On success, 0 is retured. On errors, one of the following
- * negative error code is returned.
- *
- * %-EROFS - Read only filesystem.
- *
- * %-EIO - I/O error
- *
- * %-ENOSPC - No space left on device (only in a panic state).
- *
- * %-ERESTARTSYS - Interrupted.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-EFAULT - Failure during execution of requested operation.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during execution of requested operation.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
+ * * %-EROFS - Read only filesystem.
*/
static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -1011,7 +981,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
* @filp: file object
* @argp: pointer on argument from userspace
*
- * Return Value: On success, 0 is returned or error code, otherwise.
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
void __user *argp)
@@ -1047,7 +1017,7 @@ out:
* checks the arguments from userspace and calls nilfs_sufile_trim_fs, which
* performs the actual trim operation.
*
- * Return Value: On success, 0 is returned or negative error code, otherwise.
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp)
{
@@ -1089,7 +1059,7 @@ static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp)
* of segments in bytes and upper limit of segments in bytes.
* The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility.
*
- * Return Value: On success, 0 is returned or error code, otherwise.
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
{
@@ -1113,9 +1083,16 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
minseg = range[0] + segbytes - 1;
- do_div(minseg, segbytes);
+ minseg = div64_ul(minseg, segbytes);
+
+ if (range[1] < 4096)
+ goto out;
+
maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
- do_div(maxseg, segbytes);
+ if (maxseg < segbytes)
+ goto out;
+
+ maxseg = div64_ul(maxseg, segbytes);
maxseg--;
ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
@@ -1133,17 +1110,15 @@ out:
* @dofunc: concrete function of getting metadata info
*
* Description: nilfs_ioctl_get_info() gets metadata info by means of
- * calling dofunc() function.
- *
- * Return Value: On success, 0 is returned and requested metadata info
- * is copied into userspace. On error, one of the following
- * negative error codes is returned.
- *
- * %-EINVAL - Invalid arguments from userspace.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EFAULT - Failure during execution of requested operation.
+ * calling dofunc() function. The requested metadata information is copied
+ * to userspace memory @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during execution of requested operation.
+ * * %-EINVAL - Invalid arguments from userspace.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp,
@@ -1183,18 +1158,14 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
* encapsulated in nilfs_argv and updates the segment usage info
* according to the flags in nilfs_suinfo_update.
*
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EPERM - Not enough permissions
- *
- * %-EFAULT - Error copying input data
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid values in input (segment number, flags or nblocks)
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EEXIST - Block conflict detected.
+ * * %-EFAULT - Error copying input data.
+ * * %-EINVAL - Invalid values in input (segment number, flags or nblocks).
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EPERM - Not enough permissions.
*/
static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -1203,7 +1174,6 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
struct nilfs_transaction_info ti;
struct nilfs_argv argv;
size_t len;
- void __user *base;
void *kbuf;
int ret;
@@ -1234,18 +1204,12 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
goto out;
}
- base = (void __user *)(unsigned long)argv.v_base;
- kbuf = vmalloc(len);
- if (!kbuf) {
- ret = -ENOMEM;
+ kbuf = vmemdup_user(u64_to_user_ptr(argv.v_base), len);
+ if (IS_ERR(kbuf)) {
+ ret = PTR_ERR(kbuf);
goto out;
}
- if (copy_from_user(kbuf, base, len)) {
- ret = -EFAULT;
- goto out_free;
- }
-
nilfs_transaction_begin(inode->i_sb, &ti, 0);
ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size,
argv.v_nmembs);
@@ -1254,13 +1218,98 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
else
nilfs_transaction_commit(inode->i_sb); /* never fails */
-out_free:
- vfree(kbuf);
+ kvfree(kbuf);
out:
mnt_drop_write_file(filp);
return ret;
}
+/**
+ * nilfs_ioctl_get_fslabel - get the volume name of the file system
+ * @sb: super block instance
+ * @argp: pointer to userspace memory where the volume name should be stored
+ *
+ * Return: 0 on success, %-EFAULT if copying to userspace memory fails.
+ */
+static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ char label[NILFS_MAX_VOLUME_NAME + 1];
+
+ BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX);
+
+ down_read(&nilfs->ns_sem);
+ memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name);
+ up_read(&nilfs->ns_sem);
+
+ if (copy_to_user(argp, label, sizeof(label)))
+ return -EFAULT;
+ return 0;
+}
+
+/**
+ * nilfs_ioctl_set_fslabel - set the volume name of the file system
+ * @sb: super block instance
+ * @filp: file object
+ * @argp: pointer to userspace memory that contains the volume name
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Error copying input data.
+ * * %-EINVAL - Label length exceeds record size in superblock.
+ * * %-EIO - I/O error.
+ * * %-EPERM - Operation not permitted (insufficient permissions).
+ * * %-EROFS - Read only file system.
+ */
+static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp,
+ void __user *argp)
+{
+ char label[NILFS_MAX_VOLUME_NAME + 1];
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ struct nilfs_super_block **sbp;
+ size_t len;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) {
+ ret = -EFAULT;
+ goto out_drop_write;
+ }
+
+ len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1);
+ if (len > NILFS_MAX_VOLUME_NAME) {
+ nilfs_err(sb, "unable to set label with more than %zu bytes",
+ NILFS_MAX_VOLUME_NAME);
+ ret = -EINVAL;
+ goto out_drop_write;
+ }
+
+ down_write(&nilfs->ns_sem);
+ sbp = nilfs_prepare_super(sb, false);
+ if (unlikely(!sbp)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ strtomem_pad(sbp[0]->s_volume_name, label, 0);
+ if (sbp[1])
+ strtomem_pad(sbp[1]->s_volume_name, label, 0);
+
+ ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
+
+out_unlock:
+ up_write(&nilfs->ns_sem);
+out_drop_write:
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1303,6 +1352,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return nilfs_ioctl_set_alloc_range(inode, argp);
case FITRIM:
return nilfs_ioctl_trim_fs(inode, argp);
+ case FS_IOC_GETFSLABEL:
+ return nilfs_ioctl_get_fslabel(inode->i_sb, argp);
+ case FS_IOC_SETFSLABEL:
+ return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp);
default:
return -ENOTTY;
}
@@ -1329,6 +1382,8 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case NILFS_IOCTL_RESIZE:
case NILFS_IOCTL_SET_ALLOC_RANGE:
case FITRIM:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index cbf4fa60eea2..946b0d3534a5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -33,7 +33,8 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
struct buffer_head *, void *))
{
struct nilfs_inode_info *ii = NILFS_I(inode);
- void *kaddr;
+ struct folio *folio = bh->b_folio;
+ void *from;
int ret;
/* Caller exclude read accesses using page lock */
@@ -47,12 +48,14 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
set_buffer_mapped(bh);
- kaddr = kmap_atomic(bh->b_page);
- memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
+ /* Initialize block (block size > PAGE_SIZE not yet supported) */
+ from = kmap_local_folio(folio, offset_in_folio(folio, bh->b_data));
+ memset(from, 0, bh->b_size);
if (init_block)
- init_block(inode, bh, kaddr);
- flush_dcache_page(bh->b_page);
- kunmap_atomic(kaddr);
+ init_block(inode, bh, from);
+ kunmap_local(from);
+
+ flush_dcache_folio(folio);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
@@ -89,7 +92,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
if (buffer_uptodate(bh))
goto failed_bh;
- bh->b_bdev = sb->s_bdev;
err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
if (likely(!err)) {
get_bh(bh);
@@ -97,8 +99,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
}
failed_bh:
- unlock_page(bh->b_page);
- put_page(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
brelse(bh);
failed_unlock:
@@ -158,8 +160,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf,
*out_bh = bh;
failed_bh:
- unlock_page(bh->b_page);
- put_page(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
brelse(bh);
failed:
return ret;
@@ -224,20 +226,21 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
* @out_bh: output of a pointer to the buffer_head
*
* nilfs_mdt_get_block() looks up the specified buffer and tries to create
- * a new buffer if @create is not zero. On success, the returned buffer is
- * assured to be either existing or formatted using a buffer lock on success.
- * @out_bh is substituted only when zero is returned.
- *
- * Return Value: On success, it returns 0. On error, the following negative
- * error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
+ * a new buffer if @create is not zero. If (and only if) this function
+ * succeeds, it stores a pointer to the retrieved buffer head in the location
+ * pointed to by @out_bh.
*
- * %-EIO - I/O error
+ * The retrieved buffer may be either an existing one or a newly allocated one.
+ * For a newly created buffer, if the callback function argument @init_block
+ * is non-NULL, the callback will be called with the buffer locked to format
+ * the block.
*
- * %-ENOENT - the specified block does not exist (hole block)
- *
- * %-EROFS - Read only filesystem (for create mode)
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - The specified block does not exist (hole block).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EROFS - Read only filesystem (for create mode).
*/
int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
void (*init_block)(struct inode *,
@@ -273,14 +276,11 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
* @out_bh, and block offset to @blkoff, respectively. @out_bh and
* @blkoff are substituted only when zero is returned.
*
- * Return Value: On success, it returns 0. On error, the following negative
- * error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-EIO - I/O error
- *
- * %-ENOENT - no block was found in the range
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No block was found in the range.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
unsigned long end, unsigned long *blkoff,
@@ -319,12 +319,11 @@ out:
* @inode: inode of the meta data file
* @block: block offset
*
- * Return Value: On success, zero is returned.
- * On error, one of the following negative error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-EIO - I/O error
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Non-existent block.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
{
@@ -347,39 +346,35 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
* nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
* tries to release the page including the buffer from a page cache.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-EBUSY - page has an active buffer.
- *
- * %-ENOENT - page cache has no page addressed by the offset.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Page has an active buffer.
+ * * %-ENOENT - Page cache has no page addressed by the offset.
*/
int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
{
- pgoff_t index = (pgoff_t)block >>
- (PAGE_SHIFT - inode->i_blkbits);
- struct page *page;
- unsigned long first_block;
+ pgoff_t index = block >> (PAGE_SHIFT - inode->i_blkbits);
+ struct folio *folio;
+ struct buffer_head *bh;
int ret = 0;
int still_dirty;
- page = find_lock_page(inode->i_mapping, index);
- if (!page)
+ folio = filemap_lock_folio(inode->i_mapping, index);
+ if (IS_ERR(folio))
return -ENOENT;
- wait_on_page_writeback(page);
-
- first_block = (unsigned long)index <<
- (PAGE_SHIFT - inode->i_blkbits);
- if (page_has_buffers(page)) {
- struct buffer_head *bh;
+ folio_wait_writeback(folio);
- bh = nilfs_page_get_nth_block(page, block - first_block);
+ bh = folio_buffers(folio);
+ if (bh) {
+ unsigned long first_block = index <<
+ (PAGE_SHIFT - inode->i_blkbits);
+ bh = get_nth_bh(bh, block - first_block);
nilfs_forget_buffer(bh);
}
- still_dirty = PageDirty(page);
- unlock_page(page);
- put_page(page);
+ still_dirty = folio_test_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
if (still_dirty ||
invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
@@ -398,10 +393,10 @@ int nilfs_mdt_fetch_dirty(struct inode *inode)
return test_bit(NILFS_I_DIRTY, &ii->i_state);
}
-static int
-nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
+static int nilfs_mdt_write_folio(struct folio *folio,
+ struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct super_block *sb;
int err = 0;
@@ -409,16 +404,16 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
/*
* It means that filesystem was remounted in read-only
* mode because of error or metadata corruption. But we
- * have dirty pages that try to be flushed in background.
- * So, here we simply discard this dirty page.
+ * have dirty folios that try to be flushed in background.
+ * So, here we simply discard this dirty folio.
*/
- nilfs_clear_dirty_page(page, false);
- unlock_page(page);
+ nilfs_clear_folio_dirty(folio);
+ folio_unlock(folio);
return -EROFS;
}
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
if (!inode)
return 0;
@@ -427,17 +422,27 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL)
err = nilfs_construct_segment(sb);
- else if (wbc->for_reclaim)
- nilfs_flush_segment(sb, inode->i_ino);
return err;
}
+static int nilfs_mdt_writeback(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct folio *folio = NULL;
+ int error;
+
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ error = nilfs_mdt_write_folio(folio, wbc);
+
+ return error;
+}
static const struct address_space_operations def_mdt_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
- .writepage = nilfs_mdt_write_page,
+ .writepages = nilfs_mdt_writeback,
+ .migrate_folio = buffer_migrate_folio_norefs,
};
static const struct inode_operations def_mdt_iops;
@@ -512,6 +517,8 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
* nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
* @inode: inode of the metadata file
* @shadow: shadow mapping
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct nilfs_shadow_map *shadow)
@@ -533,6 +540,8 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
/**
* nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map
* @inode: inode of the metadata file
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_mdt_save_to_shadow_map(struct inode *inode)
{
@@ -560,17 +569,20 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen;
- struct page *page;
+ struct folio *folio;
int blkbits = inode->i_blkbits;
- page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index);
- if (!page)
- return -ENOMEM;
+ folio = filemap_grab_folio(shadow->inode->i_mapping,
+ bh->b_folio->index);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << blkbits, 0);
+ bh_frozen = folio_buffers(folio);
+ if (!bh_frozen)
+ bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0);
- bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
+ bh_frozen = get_nth_bh(bh_frozen,
+ offset_in_folio(folio, bh->b_data) >> blkbits);
if (!buffer_uptodate(bh_frozen))
nilfs_copy_buffer(bh_frozen, bh);
@@ -582,8 +594,8 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
brelse(bh_frozen); /* already frozen */
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return 0;
}
@@ -592,17 +604,20 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen = NULL;
- struct page *page;
+ struct folio *folio;
int n;
- page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index);
- if (page) {
- if (page_has_buffers(page)) {
- n = bh_offset(bh) >> inode->i_blkbits;
- bh_frozen = nilfs_page_get_nth_block(page, n);
+ folio = filemap_lock_folio(shadow->inode->i_mapping,
+ bh->b_folio->index);
+ if (!IS_ERR(folio)) {
+ bh_frozen = folio_buffers(folio);
+ if (bh_frozen) {
+ n = offset_in_folio(folio, bh->b_data) >>
+ inode->i_blkbits;
+ bh_frozen = get_nth_bh(bh_frozen, n);
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
return bh_frozen;
}
@@ -635,10 +650,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
if (mi->mi_palloc_cache)
nilfs_palloc_clear_cache(inode);
- nilfs_clear_dirty_pages(inode->i_mapping, true);
+ nilfs_clear_dirty_pages(inode->i_mapping);
nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping);
- nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true);
+ nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping);
nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping,
NILFS_I(shadow->inode)->i_assoc_inode->i_mapping);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 23899e0ae850..40f4b1a28705 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -55,12 +55,25 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
ino_t ino;
+ int res;
if (dentry->d_name.len > NILFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ino = nilfs_inode_by_name(dir, &dentry->d_name);
- inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
+ res = nilfs_inode_by_name(dir, &dentry->d_name, &ino);
+ if (res) {
+ if (res != -ENOENT)
+ return ERR_PTR(res);
+ inode = NULL;
+ } else {
+ inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
+ if (inode == ERR_PTR(-ESTALE)) {
+ nilfs_error(dir->i_sb,
+ "deleted inode referenced: %lu", ino);
+ return ERR_PTR(-EIO);
+ }
+ }
+
return d_splice_alias(inode, dentry);
}
@@ -72,7 +85,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int nilfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
@@ -100,7 +113,7 @@ static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir,
}
static int
-nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+nilfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -125,7 +138,7 @@ nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return err;
}
-static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct nilfs_transaction_info ti;
@@ -149,6 +162,9 @@ static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
+ mapping_set_gfp_mask(inode->i_mapping,
+ mapping_gfp_constraint(inode->i_mapping,
+ ~__GFP_FS));
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
@@ -185,7 +201,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
if (err)
return err;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_link_count(inode);
ihold(inode);
@@ -202,8 +218,8 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
return err;
}
-static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+static struct dentry *nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode;
struct nilfs_transaction_info ti;
@@ -211,7 +227,7 @@ static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
- return err;
+ return ERR_PTR(err);
inc_nlink(dir);
@@ -242,7 +258,7 @@ out:
else
nilfs_transaction_abort(dir->i_sb);
- return err;
+ return ERR_PTR(err);
out_fail:
drop_nlink(inode);
@@ -260,13 +276,14 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
struct nilfs_dir_entry *de;
- struct page *page;
+ struct folio *folio;
int err;
- err = -ENOENT;
- de = nilfs_find_entry(dir, &dentry->d_name, &page);
- if (!de)
+ de = nilfs_find_entry(dir, &dentry->d_name, &folio);
+ if (IS_ERR(de)) {
+ err = PTR_ERR(de);
goto out;
+ }
inode = d_inode(dentry);
err = -EIO;
@@ -279,11 +296,12 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
inode->i_ino, inode->i_nlink);
set_nlink(inode, 1);
}
- err = nilfs_delete_entry(de, page);
+ err = nilfs_delete_entry(de, folio);
+ folio_release_kmap(folio, de);
if (err)
goto out;
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
drop_nlink(inode);
err = 0;
out:
@@ -340,18 +358,19 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
return err;
}
-static int nilfs_rename(struct user_namespace *mnt_userns,
+static int nilfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct page *dir_page = NULL;
+ struct folio *dir_folio = NULL;
struct nilfs_dir_entry *dir_de = NULL;
- struct page *old_page;
+ struct folio *old_folio;
struct nilfs_dir_entry *old_de;
struct nilfs_transaction_info ti;
+ bool old_is_dir = S_ISDIR(old_inode->i_mode);
int err;
if (flags & ~RENAME_NOREPLACE)
@@ -361,34 +380,40 @@ static int nilfs_rename(struct user_namespace *mnt_userns,
if (unlikely(err))
return err;
- err = -ENOENT;
- old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page);
- if (!old_de)
+ old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio);
+ if (IS_ERR(old_de)) {
+ err = PTR_ERR(old_de);
goto out;
+ }
- if (S_ISDIR(old_inode->i_mode)) {
+ if (old_is_dir && old_dir != new_dir) {
err = -EIO;
- dir_de = nilfs_dotdot(old_inode, &dir_page);
+ dir_de = nilfs_dotdot(old_inode, &dir_folio);
if (!dir_de)
goto out_old;
}
if (new_inode) {
- struct page *new_page;
+ struct folio *new_folio;
struct nilfs_dir_entry *new_de;
err = -ENOTEMPTY;
- if (dir_de && !nilfs_empty_dir(new_inode))
+ if (old_is_dir && !nilfs_empty_dir(new_inode))
goto out_dir;
- err = -ENOENT;
- new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
- if (!new_de)
+ new_de = nilfs_find_entry(new_dir, &new_dentry->d_name,
+ &new_folio);
+ if (IS_ERR(new_de)) {
+ err = PTR_ERR(new_de);
+ goto out_dir;
+ }
+ err = nilfs_set_link(new_dir, new_de, new_folio, old_inode);
+ folio_release_kmap(new_folio, new_de);
+ if (unlikely(err))
goto out_dir;
- nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
- new_inode->i_ctime = current_time(new_inode);
- if (dir_de)
+ inode_set_ctime_current(new_inode);
+ if (old_is_dir)
drop_nlink(new_inode);
drop_nlink(new_inode);
nilfs_mark_inode_dirty(new_inode);
@@ -396,7 +421,7 @@ static int nilfs_rename(struct user_namespace *mnt_userns,
err = nilfs_add_link(new_dentry, old_inode);
if (err)
goto out_dir;
- if (dir_de) {
+ if (old_is_dir) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
}
@@ -406,30 +431,30 @@ static int nilfs_rename(struct user_namespace *mnt_userns,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = current_time(old_inode);
-
- nilfs_delete_entry(old_de, old_page);
-
- if (dir_de) {
- nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
- drop_nlink(old_dir);
+ inode_set_ctime_current(old_inode);
+
+ err = nilfs_delete_entry(old_de, old_folio);
+ if (likely(!err)) {
+ if (old_is_dir) {
+ if (old_dir != new_dir)
+ err = nilfs_set_link(old_inode, dir_de,
+ dir_folio, new_dir);
+ drop_nlink(old_dir);
+ }
+ nilfs_mark_inode_dirty(old_dir);
}
- nilfs_mark_inode_dirty(old_dir);
nilfs_mark_inode_dirty(old_inode);
- err = nilfs_transaction_commit(old_dir->i_sb);
- return err;
-
out_dir:
- if (dir_de) {
- kunmap(dir_page);
- put_page(dir_page);
- }
+ if (dir_de)
+ folio_release_kmap(dir_folio, dir_de);
out_old:
- kunmap(old_page);
- put_page(old_page);
+ folio_release_kmap(old_folio, old_de);
out:
- nilfs_transaction_abort(old_dir->i_sb);
+ if (likely(!err))
+ err = nilfs_transaction_commit(old_dir->i_sb);
+ else
+ nilfs_transaction_abort(old_dir->i_sb);
return err;
}
@@ -438,21 +463,17 @@ out:
*/
static struct dentry *nilfs_get_parent(struct dentry *child)
{
- unsigned long ino;
- struct inode *inode;
+ ino_t ino;
+ int res;
struct nilfs_root *root;
- ino = nilfs_inode_by_name(d_inode(child), &dotdot_name);
- if (!ino)
- return ERR_PTR(-ENOENT);
+ res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino);
+ if (res)
+ return ERR_PTR(res);
root = NILFS_I(d_inode(child))->i_root;
- inode = nilfs_iget(child->d_sb, root, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- return d_obtain_alias(inode);
+ return d_obtain_alias(nilfs_iget(child->d_sb, root, ino));
}
static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index aecda4fc95f5..b7e3d91b6243 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -14,6 +14,7 @@
#include <linux/buffer_head.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
+#include <linux/fs_struct.h>
#include <linux/nilfs2_api.h>
#include <linux/nilfs2_ondisk.h>
#include "the_nilfs.h"
@@ -22,6 +23,7 @@
/**
* struct nilfs_inode_info - nilfs inode data in memory
* @i_flags: inode flags
+ * @i_type: inode type (combination of flags that inidicate usage)
* @i_state: dynamic state flags
* @i_bmap: pointer on i_bmap_data
* @i_bmap_data: raw block mapping
@@ -37,6 +39,7 @@
*/
struct nilfs_inode_info {
__u32 i_flags;
+ unsigned int i_type;
unsigned long i_state; /* Dynamic state flags */
struct nilfs_bmap *i_bmap;
struct nilfs_bmap i_bmap_data;
@@ -90,9 +93,16 @@ enum {
NILFS_I_UPDATED, /* The file has been written back */
NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */
NILFS_I_BMAP, /* has bmap and btnode_cache */
- NILFS_I_GCINODE, /* inode for GC, on memory only */
- NILFS_I_BTNC, /* inode for btree node cache */
- NILFS_I_SHADOW, /* inode for shadowed page cache */
+};
+
+/*
+ * Flags to identify the usage of on-memory inodes (i_type)
+ */
+enum {
+ NILFS_I_TYPE_NORMAL = 0,
+ NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */
+ NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */
+ NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */
};
/*
@@ -103,6 +113,18 @@ enum {
NILFS_SB_COMMIT_ALL /* Commit both super blocks */
};
+/**
+ * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a
+ * file system volume name
+ *
+ * Defined by the size of the volume name field in the on-disk superblocks.
+ * This volume name does not include the terminating NULL byte if the string
+ * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the
+ * size of the buffer that requires a NULL byte termination.
+ */
+#define NILFS_MAX_VOLUME_NAME \
+ sizeof_field(struct nilfs_super_block, s_volume_name)
+
/*
* Macros to check inode numbers
*/
@@ -116,9 +138,15 @@ enum {
#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
#define NILFS_MDT_INODE(sb, ino) \
- ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino)))
+ ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino)))
#define NILFS_VALID_INODE(sb, ino) \
- ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino)))
+ ((ino) >= NILFS_FIRST_INO(sb) || \
+ ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino))))
+
+#define NILFS_PRIVATE_INODE(ino) ({ \
+ ino_t __ino = (ino); \
+ ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \
+ (__ino) != NILFS_SKETCH_INO); })
/**
* struct nilfs_transaction_info: context information for synchronization
@@ -226,24 +254,24 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags)
}
/* dir.c */
-extern int nilfs_add_link(struct dentry *, struct inode *);
-extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
-extern int nilfs_make_empty(struct inode *, struct inode *);
-extern struct nilfs_dir_entry *
-nilfs_find_entry(struct inode *, const struct qstr *, struct page **);
-extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *);
-extern int nilfs_empty_dir(struct inode *);
-extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **);
-extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
- struct page *, struct inode *);
+int nilfs_add_link(struct dentry *, struct inode *);
+int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino);
+int nilfs_make_empty(struct inode *, struct inode *);
+struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *,
+ struct folio **);
+int nilfs_delete_entry(struct nilfs_dir_entry *, struct folio *);
+int nilfs_empty_dir(struct inode *);
+struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct folio **);
+int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
+ struct folio *folio, struct inode *inode);
/* file.c */
extern int nilfs_sync_file(struct file *, loff_t, loff_t, int);
/* ioctl.c */
-int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *m);
-int nilfs_fileattr_set(struct user_namespace *mnt_userns,
- struct dentry *dentry, struct fileattr *fa);
+int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *m);
+int nilfs_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa);
long nilfs_ioctl(struct file *, unsigned int, unsigned long);
long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
@@ -256,7 +284,8 @@ extern struct inode *nilfs_new_inode(struct inode *, umode_t);
extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern void nilfs_set_inode_flags(struct inode *);
extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *);
-extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
+void nilfs_write_inode_common(struct inode *inode,
+ struct nilfs_inode *raw_inode);
struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
unsigned long ino);
struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
@@ -271,10 +300,10 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode);
extern void nilfs_update_inode(struct inode *, struct buffer_head *, int);
extern void nilfs_truncate(struct inode *);
extern void nilfs_evict_inode(struct inode *);
-extern int nilfs_setattr(struct user_namespace *, struct dentry *,
+extern int nilfs_setattr(struct mnt_idmap *, struct dentry *,
struct iattr *);
extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
-int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask);
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
extern int nilfs_inode_dirty(struct inode *);
@@ -334,8 +363,8 @@ void __nilfs_error(struct super_block *sb, const char *function,
extern struct nilfs_super_block *
nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
-extern int nilfs_store_magic_and_option(struct super_block *,
- struct nilfs_super_block *, char *);
+extern int nilfs_store_magic(struct super_block *sb,
+ struct nilfs_super_block *sbp);
extern int nilfs_check_feature_compatibility(struct super_block *,
struct nilfs_super_block *);
extern void nilfs_set_log_cursor(struct nilfs_super_block *,
@@ -373,6 +402,7 @@ extern const struct file_operations nilfs_dir_operations;
extern const struct inode_operations nilfs_file_inode_operations;
extern const struct file_operations nilfs_file_operations;
extern const struct address_space_operations nilfs_aops;
+extern const struct address_space_operations nilfs_buffer_cache_aops;
extern const struct inode_operations nilfs_dir_inode_operations;
extern const struct inode_operations nilfs_special_inode_operations;
extern const struct inode_operations nilfs_symlink_inode_operations;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 39b7eea2642a..56c4da417b6a 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -25,21 +25,20 @@
(BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \
BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked))
-static struct buffer_head *
-__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
- int blkbits, unsigned long b_state)
+static struct buffer_head *__nilfs_get_folio_block(struct folio *folio,
+ unsigned long block, pgoff_t index, int blkbits,
+ unsigned long b_state)
{
unsigned long first_block;
- struct buffer_head *bh;
+ struct buffer_head *bh = folio_buffers(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << blkbits, b_state);
+ if (!bh)
+ bh = create_empty_buffers(folio, 1 << blkbits, b_state);
first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
- bh = nilfs_page_get_nth_block(page, block - first_block);
+ bh = get_nth_bh(bh, block - first_block);
- touch_buffer(bh);
wait_on_buffer(bh);
return bh;
}
@@ -51,19 +50,20 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
{
int blkbits = inode->i_blkbits;
pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
- page = grab_cache_page(mapping, index);
- if (unlikely(!page))
+ folio = filemap_grab_folio(mapping, index);
+ if (IS_ERR(folio))
return NULL;
- bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
+ bh = __nilfs_get_folio_block(folio, blkoff, index, blkbits, b_state);
if (unlikely(!bh)) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return NULL;
}
+ bh->b_bdev = inode->i_sb->s_bdev;
return bh;
}
@@ -73,20 +73,21 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
*/
void nilfs_forget_buffer(struct buffer_head *bh)
{
- struct page *page = bh->b_page;
+ struct folio *folio = bh->b_folio;
const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
- BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
+ BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
+ BIT(BH_Delay));
lock_buffer(bh);
set_mask_bits(&bh->b_state, clear_bits, 0);
- if (nilfs_page_buffers_clean(page))
- __nilfs_clear_page_dirty(page);
+ if (nilfs_folio_buffers_clean(folio))
+ __nilfs_clear_folio_dirty(folio);
bh->b_blocknr = -1;
- ClearPageUptodate(page);
- ClearPageMappedToDisk(page);
+ folio_clear_uptodate(folio);
+ folio_clear_mappedtodisk(folio);
unlock_buffer(bh);
brelse(bh);
}
@@ -98,16 +99,16 @@ void nilfs_forget_buffer(struct buffer_head *bh)
*/
void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
{
- void *kaddr0, *kaddr1;
+ void *saddr, *daddr;
unsigned long bits;
- struct page *spage = sbh->b_page, *dpage = dbh->b_page;
+ struct folio *sfolio = sbh->b_folio, *dfolio = dbh->b_folio;
struct buffer_head *bh;
- kaddr0 = kmap_atomic(spage);
- kaddr1 = kmap_atomic(dpage);
- memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
- kunmap_atomic(kaddr1);
- kunmap_atomic(kaddr0);
+ saddr = kmap_local_folio(sfolio, bh_offset(sbh));
+ daddr = kmap_local_folio(dfolio, bh_offset(dbh));
+ memcpy(daddr, saddr, sbh->b_size);
+ kunmap_local(daddr);
+ kunmap_local(saddr);
dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
dbh->b_blocknr = sbh->b_blocknr;
@@ -121,58 +122,58 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
unlock_buffer(bh);
}
if (bits & BIT(BH_Uptodate))
- SetPageUptodate(dpage);
+ folio_mark_uptodate(dfolio);
else
- ClearPageUptodate(dpage);
+ folio_clear_uptodate(dfolio);
if (bits & BIT(BH_Mapped))
- SetPageMappedToDisk(dpage);
+ folio_set_mappedtodisk(dfolio);
else
- ClearPageMappedToDisk(dpage);
+ folio_clear_mappedtodisk(dfolio);
}
/**
- * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
- * @page: page to be checked
+ * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not.
+ * @folio: Folio to be checked.
*
- * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
- * Otherwise, it returns non-zero value.
+ * Return: false if the folio has dirty buffers, true otherwise.
*/
-int nilfs_page_buffers_clean(struct page *page)
+bool nilfs_folio_buffers_clean(struct folio *folio)
{
struct buffer_head *bh, *head;
- bh = head = page_buffers(page);
+ bh = head = folio_buffers(folio);
do {
if (buffer_dirty(bh))
- return 0;
+ return false;
bh = bh->b_this_page;
} while (bh != head);
- return 1;
+ return true;
}
-void nilfs_page_bug(struct page *page)
+void nilfs_folio_bug(struct folio *folio)
{
+ struct buffer_head *bh, *head;
struct address_space *m;
unsigned long ino;
- if (unlikely(!page)) {
- printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
+ if (unlikely(!folio)) {
+ printk(KERN_CRIT "NILFS_FOLIO_BUG(NULL)\n");
return;
}
- m = page->mapping;
+ m = folio->mapping;
ino = m ? m->host->i_ino : 0;
- printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
+ printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
"mapping=%p ino=%lu\n",
- page, page_ref_count(page),
- (unsigned long long)page->index, page->flags, m, ino);
+ folio, folio_ref_count(folio),
+ (unsigned long long)folio->index, folio->flags.f, m, ino);
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
+ head = folio_buffers(folio);
+ if (head) {
int i = 0;
- bh = head = page_buffers(page);
+ bh = head;
do {
printk(KERN_CRIT
" BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
@@ -184,30 +185,32 @@ void nilfs_page_bug(struct page *page)
}
/**
- * nilfs_copy_page -- copy the page with buffers
- * @dst: destination page
- * @src: source page
- * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
+ * nilfs_copy_folio -- copy the folio with buffers
+ * @dst: destination folio
+ * @src: source folio
+ * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads.
*
- * This function is for both data pages and btnode pages. The dirty flag
- * should be treated by caller. The page must not be under i/o.
- * Both src and dst page must be locked
+ * This function is for both data folios and btnode folios. The dirty flag
+ * should be treated by caller. The folio must not be under i/o.
+ * Both src and dst folio must be locked
*/
-static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
+static void nilfs_copy_folio(struct folio *dst, struct folio *src,
+ bool copy_dirty)
{
struct buffer_head *dbh, *dbufs, *sbh;
unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
- BUG_ON(PageWriteback(dst));
+ BUG_ON(folio_test_writeback(dst));
- sbh = page_buffers(src);
- if (!page_has_buffers(dst))
- create_empty_buffers(dst, sbh->b_size, 0);
+ sbh = folio_buffers(src);
+ dbh = folio_buffers(dst);
+ if (!dbh)
+ dbh = create_empty_buffers(dst, sbh->b_size, 0);
if (copy_dirty)
mask |= BIT(BH_Dirty);
- dbh = dbufs = page_buffers(dst);
+ dbufs = dbh;
do {
lock_buffer(sbh);
lock_buffer(dbh);
@@ -218,16 +221,16 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
dbh = dbh->b_this_page;
} while (dbh != dbufs);
- copy_highpage(dst, src);
+ folio_copy(dst, src);
- if (PageUptodate(src) && !PageUptodate(dst))
- SetPageUptodate(dst);
- else if (!PageUptodate(src) && PageUptodate(dst))
- ClearPageUptodate(dst);
- if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
- SetPageMappedToDisk(dst);
- else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
- ClearPageMappedToDisk(dst);
+ if (folio_test_uptodate(src) && !folio_test_uptodate(dst))
+ folio_mark_uptodate(dst);
+ else if (!folio_test_uptodate(src) && folio_test_uptodate(dst))
+ folio_clear_uptodate(dst);
+ if (folio_test_mappedtodisk(src) && !folio_test_mappedtodisk(dst))
+ folio_set_mappedtodisk(dst);
+ else if (!folio_test_mappedtodisk(src) && folio_test_mappedtodisk(dst))
+ folio_clear_mappedtodisk(dst);
do {
unlock_buffer(sbh);
@@ -240,42 +243,43 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
int nilfs_copy_dirty_pages(struct address_space *dmap,
struct address_space *smap)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
unsigned int i;
pgoff_t index = 0;
int err = 0;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
repeat:
- if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY))
+ if (!filemap_get_folios_tag(smap, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch))
return 0;
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i], *dpage;
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i], *dfolio;
- lock_page(page);
- if (unlikely(!PageDirty(page)))
- NILFS_PAGE_BUG(page, "inconsistent dirty state");
+ folio_lock(folio);
+ if (unlikely(!folio_test_dirty(folio)))
+ NILFS_FOLIO_BUG(folio, "inconsistent dirty state");
- dpage = grab_cache_page(dmap, page->index);
- if (unlikely(!dpage)) {
+ dfolio = filemap_grab_folio(dmap, folio->index);
+ if (IS_ERR(dfolio)) {
/* No empty page is added to the page cache */
- err = -ENOMEM;
- unlock_page(page);
+ folio_unlock(folio);
+ err = PTR_ERR(dfolio);
break;
}
- if (unlikely(!page_has_buffers(page)))
- NILFS_PAGE_BUG(page,
+ if (unlikely(!folio_buffers(folio)))
+ NILFS_FOLIO_BUG(folio,
"found empty page in dat page cache");
- nilfs_copy_page(dpage, page, 1);
- __set_page_dirty_nobuffers(dpage);
+ nilfs_copy_folio(dfolio, folio, true);
+ filemap_dirty_folio(folio_mapping(dfolio), dfolio);
- unlock_page(dpage);
- put_page(dpage);
- unlock_page(page);
+ folio_unlock(dfolio);
+ folio_put(dfolio);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
if (likely(!err))
@@ -310,10 +314,10 @@ repeat:
folio_lock(folio);
dfolio = filemap_lock_folio(dmap, index);
- if (dfolio) {
+ if (!IS_ERR(dfolio)) {
/* overwrite existing folio in the destination cache */
WARN_ON(folio_test_dirty(dfolio));
- nilfs_copy_page(&dfolio->page, &folio->page, 0);
+ nilfs_copy_folio(dfolio, folio, false);
folio_unlock(dfolio);
folio_put(dfolio);
/* Do we not need to remove folio from smap here? */
@@ -353,80 +357,101 @@ repeat:
/**
* nilfs_clear_dirty_pages - discard dirty pages in address space
* @mapping: address space with dirty pages for discarding
- * @silent: suppress [true] or print [false] warning messages
*/
-void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
+void nilfs_clear_dirty_pages(struct address_space *mapping)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
unsigned int i;
pgoff_t index = 0;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
+
+ while (filemap_get_folios_tag(mapping, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+
+ folio_lock(folio);
- while (pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
+ /*
+ * This folio may have been removed from the address
+ * space by truncation or invalidation when the lock
+ * was acquired. Skip processing in that case.
+ */
+ if (likely(folio->mapping == mapping))
+ nilfs_clear_folio_dirty(folio);
- lock_page(page);
- nilfs_clear_dirty_page(page, silent);
- unlock_page(page);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
/**
- * nilfs_clear_dirty_page - discard dirty page
- * @page: dirty page that will be discarded
- * @silent: suppress [true] or print [false] warning messages
+ * nilfs_clear_folio_dirty - discard dirty folio
+ * @folio: dirty folio that will be discarded
+ *
+ * nilfs_clear_folio_dirty() clears working states including dirty state for
+ * the folio and its buffers. If the folio has buffers, clear only if it is
+ * confirmed that none of the buffer heads are busy (none have valid
+ * references and none are locked).
*/
-void nilfs_clear_dirty_page(struct page *page, bool silent)
+void nilfs_clear_folio_dirty(struct folio *folio)
{
- struct inode *inode = page->mapping->host;
- struct super_block *sb = inode->i_sb;
-
- BUG_ON(!PageLocked(page));
-
- if (!silent)
- nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu",
- page_offset(page), inode->i_ino);
+ struct buffer_head *bh, *head;
- ClearPageUptodate(page);
- ClearPageMappedToDisk(page);
+ BUG_ON(!folio_test_locked(folio));
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
+ head = folio_buffers(folio);
+ if (head) {
const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
- BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
+ BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
+ BIT(BH_Delay));
+ bool busy, invalidated = false;
- bh = head = page_buffers(page);
+recheck_buffers:
+ busy = false;
+ bh = head;
do {
- lock_buffer(bh);
- if (!silent)
- nilfs_warn(sb,
- "discard dirty block: blocknr=%llu, size=%zu",
- (u64)bh->b_blocknr, bh->b_size);
+ if (atomic_read(&bh->b_count) | buffer_locked(bh)) {
+ busy = true;
+ break;
+ }
+ } while (bh = bh->b_this_page, bh != head);
+ if (busy) {
+ if (invalidated)
+ return;
+ invalidate_bh_lrus();
+ invalidated = true;
+ goto recheck_buffers;
+ }
+
+ bh = head;
+ do {
+ lock_buffer(bh);
set_mask_bits(&bh->b_state, clear_bits, 0);
unlock_buffer(bh);
} while (bh = bh->b_this_page, bh != head);
}
- __nilfs_clear_page_dirty(page);
+ folio_clear_uptodate(folio);
+ folio_clear_mappedtodisk(folio);
+ folio_clear_checked(folio);
+ __nilfs_clear_folio_dirty(folio);
}
-unsigned int nilfs_page_count_clean_buffers(struct page *page,
+unsigned int nilfs_page_count_clean_buffers(struct folio *folio,
unsigned int from, unsigned int to)
{
unsigned int block_start, block_end;
struct buffer_head *bh, *head;
unsigned int nc = 0;
- for (bh = head = page_buffers(page), block_start = 0;
+ for (bh = head = folio_buffers(folio), block_start = 0;
bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + bh->b_size;
@@ -446,22 +471,23 @@ unsigned int nilfs_page_count_clean_buffers(struct page *page,
* 2) Some B-tree operations like insertion or deletion may dispose buffers
* in dirty state, and this needs to cancel the dirty state of their pages.
*/
-int __nilfs_clear_page_dirty(struct page *page)
+void __nilfs_clear_folio_dirty(struct folio *folio)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
if (mapping) {
xa_lock_irq(&mapping->i_pages);
- if (test_bit(PG_dirty, &page->flags)) {
- __xa_clear_mark(&mapping->i_pages, page_index(page),
+ if (folio_test_dirty(folio)) {
+ __xa_clear_mark(&mapping->i_pages, folio->index,
PAGECACHE_TAG_DIRTY);
xa_unlock_irq(&mapping->i_pages);
- return clear_page_dirty_for_io(page);
+ folio_clear_dirty_for_io(folio);
+ return;
}
xa_unlock_irq(&mapping->i_pages);
- return 0;
+ return;
}
- return TestClearPageDirty(page);
+ folio_clear_dirty(folio);
}
/**
@@ -473,8 +499,9 @@ int __nilfs_clear_page_dirty(struct page *page)
* This function searches an extent of buffers marked "delayed" which
* starts from a block offset equal to or larger than @start_blk. If
* such an extent was found, this will store the start offset in
- * @blkoff and return its length in blocks. Otherwise, zero is
- * returned.
+ * @blkoff and return its length in blocks.
+ *
+ * Return: Length in blocks of found extent, 0 otherwise.
*/
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 21ddcdd4d63e..136cd1c143c9 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -30,37 +30,26 @@ BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
-int __nilfs_clear_page_dirty(struct page *);
+void __nilfs_clear_folio_dirty(struct folio *);
struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
unsigned long, unsigned long);
void nilfs_forget_buffer(struct buffer_head *);
void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
-int nilfs_page_buffers_clean(struct page *);
-void nilfs_page_bug(struct page *);
+bool nilfs_folio_buffers_clean(struct folio *);
+void nilfs_folio_bug(struct folio *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
-void nilfs_clear_dirty_page(struct page *, bool);
-void nilfs_clear_dirty_pages(struct address_space *, bool);
-unsigned int nilfs_page_count_clean_buffers(struct page *, unsigned int,
- unsigned int);
+void nilfs_clear_folio_dirty(struct folio *folio);
+void nilfs_clear_dirty_pages(struct address_space *mapping);
+unsigned int nilfs_page_count_clean_buffers(struct folio *folio,
+ unsigned int from, unsigned int to);
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
sector_t *blkoff);
-#define NILFS_PAGE_BUG(page, m, a...) \
- do { nilfs_page_bug(page); BUG(); } while (0)
-
-static inline struct buffer_head *
-nilfs_page_get_nth_block(struct page *page, unsigned int count)
-{
- struct buffer_head *bh = page_buffers(page);
-
- while (count-- > 0)
- bh = bh->b_this_page;
- get_bh(bh);
- return bh;
-}
+#define NILFS_FOLIO_BUG(folio, m, a...) \
+ do { nilfs_folio_bug(folio); BUG(); } while (0)
#endif /* _NILFS_PAGE_H */
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 0955b657938f..a9c61d0492cb 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -88,6 +88,8 @@ static int nilfs_warn_segment_error(struct super_block *sb, int err)
* @check_bytes: number of bytes to be checked
* @start: DBN of start block
* @nblock: number of blocks to be checked
+ *
+ * Return: 0 on success, or %-EIO if an I/O error occurs.
*/
static int nilfs_compute_checksum(struct the_nilfs *nilfs,
struct buffer_head *bhs, u32 *sum,
@@ -126,6 +128,11 @@ static int nilfs_compute_checksum(struct the_nilfs *nilfs,
* @sr_block: disk block number of the super root block
* @pbh: address of a buffer_head pointer to return super root buffer
* @check: CRC check flag
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Super root block corrupted.
+ * * %-EIO - I/O error.
*/
int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
struct buffer_head **pbh, int check)
@@ -176,6 +183,8 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
* @nilfs: nilfs object
* @start_blocknr: start block number of the log
* @sum: pointer to return segment summary structure
+ *
+ * Return: Buffer head pointer, or NULL if an I/O error occurs.
*/
static struct buffer_head *
nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
@@ -195,6 +204,13 @@ nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
* @seg_seq: sequence number of segment
* @bh_sum: buffer head of summary block
* @sum: segment summary struct
+ *
+ * Return: 0 on success, or one of the following internal codes on failure:
+ * * %NILFS_SEG_FAIL_MAGIC - Magic number mismatch.
+ * * %NILFS_SEG_FAIL_SEQ - Sequence number mismatch.
+ * * %NIFLS_SEG_FAIL_CONSISTENCY - Block count out of range.
+ * * %NILFS_SEG_FAIL_IO - I/O error.
+ * * %NILFS_SEG_FAIL_CHECKSUM_FULL - Full log checksum verification failed.
*/
static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq,
struct buffer_head *bh_sum,
@@ -238,6 +254,9 @@ out:
* @pbh: the current buffer head on summary blocks [in, out]
* @offset: the current byte offset on summary blocks [in, out]
* @bytes: byte size of the item to be read
+ *
+ * Return: Kernel space address of current segment summary entry, or
+ * NULL if an I/O error occurs.
*/
static void *nilfs_read_summary_info(struct the_nilfs *nilfs,
struct buffer_head **pbh,
@@ -300,6 +319,11 @@ static void nilfs_skip_summary_info(struct the_nilfs *nilfs,
* @start_blocknr: start block number of the log
* @sum: log summary information
* @head: list head to add nilfs_recovery_block struct
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
struct nilfs_segment_summary *sum,
@@ -433,8 +457,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
* The next segment is invalidated by this recovery.
*/
err = nilfs_sufile_free(sufile, segnum[1]);
- if (unlikely(err))
+ if (unlikely(err)) {
+ if (err == -ENOENT) {
+ nilfs_err(sb,
+ "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated",
+ (unsigned long long)nilfs->ns_last_pseg,
+ (unsigned long long)nilfs->ns_segnum,
+ (unsigned long long)segnum[1]);
+ err = -EINVAL;
+ }
goto failed;
+ }
for (i = 1; i < 4; i++) {
err = nilfs_segment_list_add(head, segnum[i]);
@@ -472,18 +505,16 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct folio *folio)
{
struct buffer_head *bh_org;
- void *kaddr;
+ size_t from = offset_in_folio(folio, pos);
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
if (unlikely(!bh_org))
return -EIO;
- kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
- kunmap_atomic(kaddr);
+ memcpy_to_folio(folio, from, bh_org->b_data, bh_org->b_size);
brelse(bh_org);
return 0;
}
@@ -497,7 +528,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
struct inode *inode;
struct nilfs_recovery_block *rb, *n;
unsigned int blocksize = nilfs->ns_blocksize;
- struct page *page;
+ struct folio *folio;
loff_t pos;
int err = 0, err2 = 0;
@@ -511,7 +542,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
pos = rb->blkoff << inode->i_blkbits;
err = block_write_begin(inode->i_mapping, pos, blocksize,
- &page, nilfs_get_block);
+ &folio, nilfs_get_block);
if (unlikely(err)) {
loff_t isize = inode->i_size;
@@ -521,26 +552,25 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, folio);
if (unlikely(err))
- goto failed_page;
+ goto failed_folio;
err = nilfs_set_file_dirty(inode, 1);
if (unlikely(err))
- goto failed_page;
+ goto failed_folio;
- block_write_end(NULL, inode->i_mapping, pos, blocksize,
- blocksize, page, NULL);
+ block_write_end(pos, blocksize, blocksize, folio);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
(*nr_salvaged_blocks)++;
goto next;
- failed_page:
- unlock_page(page);
- put_page(page);
+ failed_folio:
+ folio_unlock(folio);
+ folio_put(folio);
failed_inode:
nilfs_warn(sb,
@@ -562,7 +592,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
* checkpoint
* @nilfs: nilfs object
* @sb: super block instance
+ * @root: NILFS root instance
* @ri: pointer to a nilfs_recovery_info
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Log format error.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
struct super_block *sb,
@@ -697,9 +734,15 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
return;
bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize);
- BUG_ON(!bh);
+ if (WARN_ON(!bh))
+ return; /* should never happen */
+
+ lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
+ set_buffer_uptodate(bh);
set_buffer_dirty(bh);
+ unlock_buffer(bh);
+
err = sync_dirty_buffer(bh);
if (unlikely(err))
nilfs_warn(nilfs->ns_sb,
@@ -708,23 +751,45 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
}
/**
+ * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery
+ * @nilfs: nilfs object
+ */
+static void nilfs_abort_roll_forward(struct the_nilfs *nilfs)
+{
+ struct nilfs_inode_info *ii, *n;
+ LIST_HEAD(head);
+
+ /* Abandon inodes that have read recovery data */
+ spin_lock(&nilfs->ns_inode_lock);
+ list_splice_init(&nilfs->ns_dirty_files, &head);
+ spin_unlock(&nilfs->ns_inode_lock);
+ if (list_empty(&head))
+ return;
+
+ set_nilfs_purging(nilfs);
+ list_for_each_entry_safe(ii, n, &head, i_dirty) {
+ spin_lock(&nilfs->ns_inode_lock);
+ list_del_init(&ii->i_dirty);
+ spin_unlock(&nilfs->ns_inode_lock);
+
+ iput(&ii->vfs_inode);
+ }
+ clear_nilfs_purging(nilfs);
+}
+
+/**
* nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
* @nilfs: nilfs object
* @sb: super block instance
* @ri: pointer to a nilfs_recovery_info struct to store search results.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-EINVAL - Inconsistent filesystem state.
- *
- * %-EIO - I/O error
- *
- * %-ENOSPC - No space left on device (only in a panic state).
- *
- * %-ERESTARTSYS - Interrupted.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Inconsistent filesystem state.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
*/
int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
struct super_block *sb,
@@ -765,15 +830,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
if (unlikely(err)) {
nilfs_err(sb, "error %d writing segment for recovery",
err);
- goto failed;
+ goto put_root;
}
nilfs_finish_roll_forward(nilfs, ri);
}
- failed:
+put_root:
nilfs_put_root(root);
return err;
+
+failed:
+ nilfs_abort_roll_forward(nilfs);
+ goto put_root;
}
/**
@@ -785,14 +854,11 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
* segment pointed by the superblock. It sets up struct the_nilfs through
* this search. It fills nilfs_recovery_info (ri) required for recovery.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-EINVAL - No valid segment found
- *
- * %-EIO - I/O error
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - No valid segment found.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_search_super_root(struct the_nilfs *nilfs,
struct nilfs_recovery_info *ri)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 1362ccb64ec7..a8bdf3d318ea 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
if (unlikely(!bh))
return -ENOMEM;
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ memset(bh->b_data, 0, bh->b_size);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
nilfs_segbuf_add_segsum_buffer(segbuf, bh);
return 0;
}
@@ -199,7 +205,6 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
{
struct buffer_head *bh;
struct nilfs_segment_summary *raw_sum;
- void *kaddr;
u32 crc;
bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head,
@@ -214,9 +219,13 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
crc = crc32_le(crc, bh->b_data, bh->b_size);
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
- kaddr = kmap_atomic(bh->b_page);
- crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
- kunmap_atomic(kaddr);
+ size_t offset = offset_in_folio(bh->b_folio, bh->b_data);
+ unsigned char *from;
+
+ /* Do not support block sizes larger than PAGE_SIZE */
+ from = kmap_local_folio(bh->b_folio, offset);
+ crc = crc32_le(crc, from, bh->b_size);
+ kunmap_local(from);
}
raw_sum->ss_datasum = cpu_to_le32(crc);
}
@@ -368,7 +377,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
struct nilfs_write_info *wi,
struct buffer_head *bh)
{
- int len, err;
+ int err;
BUG_ON(wi->nr_vecs <= 0);
repeat:
@@ -379,8 +388,8 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
(wi->nilfs->ns_blocksize_bits - 9);
}
- len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh));
- if (len == bh->b_size) {
+ if (bio_add_folio(wi->bio, bh->b_folio, bh->b_size,
+ offset_in_folio(bh->b_folio, bh->b_data))) {
wi->end++;
return 0;
}
@@ -397,12 +406,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
* @segbuf: buffer storing a log to be written
* @nilfs: nilfs object
*
- * Return Value: On Success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: Always 0.
*/
static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
struct the_nilfs *nilfs)
@@ -443,10 +447,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
* nilfs_segbuf_wait - wait for completion of requested BIOs
* @segbuf: segment buffer
*
- * Return Value: On Success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error
+ * Return: 0 on success, or %-EIO if I/O error is detected.
*/
static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf)
{
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 76c3bd88b858..deee16bc9d4e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
#define nilfs_cnt32_ge(a, b) \
(typecheck(__u32, a) && typecheck(__u32, b) && \
- ((__s32)(a) - (__s32)(b) >= 0))
+ ((__s32)((a) - (b)) >= 0))
static int nilfs_prepare_segment_lock(struct super_block *sb,
struct nilfs_transaction_info *ti)
@@ -191,12 +191,10 @@ static int nilfs_prepare_segment_lock(struct super_block *sb,
* When @vacancy_check flag is set, this function will check the amount of
* free space, and will wait for the GC to reclaim disk space if low capacity.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-ENOSPC - No space left on device
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (if checking free space).
*/
int nilfs_transaction_begin(struct super_block *sb,
struct nilfs_transaction_info *ti,
@@ -252,6 +250,8 @@ int nilfs_transaction_begin(struct super_block *sb,
* nilfs_transaction_commit() sets a timer to start the segment
* constructor. If a sync flag is set, it starts construction
* directly.
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_transaction_commit(struct super_block *sb)
{
@@ -407,6 +407,8 @@ static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
/**
* nilfs_segctor_reset_segment_buffer - reset the current segment buffer
* @sci: nilfs_sc_info
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
{
@@ -430,6 +432,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
return 0;
}
+/**
+ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
+ * @sci: segment constructor object
+ *
+ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
+ * the current segment summary block.
+ */
+static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
+{
+ struct nilfs_segsum_pointer *ssp;
+
+ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
+ if (ssp->offset < ssp->bh->b_size)
+ memset(ssp->bh->b_data + ssp->offset, 0,
+ ssp->bh->b_size - ssp->offset);
+}
+
static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{
sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
@@ -438,6 +457,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
* The current segment is filled up
* (internal code)
*/
+ nilfs_segctor_zeropad_segsum(sci);
sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
return nilfs_segctor_reset_segment_buffer(sci);
}
@@ -501,7 +521,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
ii = NILFS_I(inode);
- if (test_bit(NILFS_I_GCINODE, &ii->i_state))
+ if (ii->i_type & NILFS_I_TYPE_GC)
cno = ii->i_cno;
else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
cno = 0;
@@ -542,6 +562,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
goto retry;
}
if (unlikely(required)) {
+ nilfs_segctor_zeropad_segsum(sci);
err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err))
goto failed;
@@ -680,7 +701,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
loff_t start, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
- struct pagevec pvec;
+ struct folio_batch fbatch;
pgoff_t index = 0, last = ULONG_MAX;
size_t ndirties = 0;
int i;
@@ -694,23 +715,29 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
index = start >> PAGE_SHIFT;
last = end >> PAGE_SHIFT;
}
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
repeat:
if (unlikely(index > last) ||
- !pagevec_lookup_range_tag(&pvec, mapping, &index, last,
- PAGECACHE_TAG_DIRTY))
+ !filemap_get_folios_tag(mapping, &index, last,
+ PAGECACHE_TAG_DIRTY, &fbatch))
return ndirties;
- for (i = 0; i < pagevec_count(&pvec); i++) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct buffer_head *bh, *head;
- struct page *page = pvec.pages[i];
+ struct folio *folio = fbatch.folios[i];
- lock_page(page);
- if (!page_has_buffers(page))
- create_empty_buffers(page, i_blocksize(inode), 0);
- unlock_page(page);
+ folio_lock(folio);
+ if (unlikely(folio->mapping != mapping)) {
+ /* Exclude folios removed from the address space */
+ folio_unlock(folio);
+ continue;
+ }
+ head = folio_buffers(folio);
+ if (!head)
+ head = create_empty_buffers(folio,
+ i_blocksize(inode), 0);
- bh = head = page_buffers(page);
+ bh = head;
do {
if (!buffer_dirty(bh) || buffer_async_write(bh))
continue;
@@ -718,13 +745,16 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
list_add_tail(&bh->b_assoc_buffers, listp);
ndirties++;
if (unlikely(ndirties >= nlimit)) {
- pagevec_release(&pvec);
+ folio_unlock(folio);
+ folio_batch_release(&fbatch);
cond_resched();
return ndirties;
}
} while (bh = bh->b_this_page, bh != head);
+
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
goto repeat;
}
@@ -734,20 +764,19 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
{
struct nilfs_inode_info *ii = NILFS_I(inode);
struct inode *btnc_inode = ii->i_assoc_inode;
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct buffer_head *bh, *head;
unsigned int i;
pgoff_t index = 0;
if (!btnc_inode)
return;
+ folio_batch_init(&fbatch);
- pagevec_init(&pvec);
-
- while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index,
- PAGECACHE_TAG_DIRTY)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- bh = head = page_buffers(pvec.pages[i]);
+ while (filemap_get_folios_tag(btnc_inode->i_mapping, &index,
+ (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ bh = head = folio_buffers(fbatch.folios[i]);
do {
if (buffer_dirty(bh) &&
!buffer_async_write(bh)) {
@@ -758,7 +787,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
bh = bh->b_this_page;
} while (bh != head);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
@@ -855,76 +884,6 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
nilfs_mdt_clear_dirty(nilfs->ns_dat);
}
-static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
-{
- struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- struct buffer_head *bh_cp;
- struct nilfs_checkpoint *raw_cp;
- int err;
-
- /* XXX: this interface will be changed */
- err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
- &raw_cp, &bh_cp);
- if (likely(!err)) {
- /*
- * The following code is duplicated with cpfile. But, it is
- * needed to collect the checkpoint even if it was not newly
- * created.
- */
- mark_buffer_dirty(bh_cp);
- nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
- nilfs_cpfile_put_checkpoint(
- nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
- } else if (err == -EINVAL || err == -ENOENT) {
- nilfs_error(sci->sc_super,
- "checkpoint creation failed due to metadata corruption.");
- err = -EIO;
- }
- return err;
-}
-
-static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
-{
- struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- struct buffer_head *bh_cp;
- struct nilfs_checkpoint *raw_cp;
- int err;
-
- err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
- &raw_cp, &bh_cp);
- if (unlikely(err)) {
- if (err == -EINVAL || err == -ENOENT) {
- nilfs_error(sci->sc_super,
- "checkpoint finalization failed due to metadata corruption.");
- err = -EIO;
- }
- goto failed_ibh;
- }
- raw_cp->cp_snapshot_list.ssl_next = 0;
- raw_cp->cp_snapshot_list.ssl_prev = 0;
- raw_cp->cp_inodes_count =
- cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
- raw_cp->cp_blocks_count =
- cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
- raw_cp->cp_nblk_inc =
- cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
- raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
- raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
-
- if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
- nilfs_checkpoint_clear_minor(raw_cp);
- else
- nilfs_checkpoint_set_minor(raw_cp);
-
- nilfs_write_inode_common(sci->sc_root->ifile,
- &raw_cp->cp_ifile_inode, 1);
- nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
- return 0;
-
- failed_ibh:
- return err;
-}
-
static void nilfs_fill_in_file_bmap(struct inode *ifile,
struct nilfs_inode_info *ii)
@@ -938,7 +897,7 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile,
raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
ibh);
nilfs_bmap_write(ii->i_bmap, raw_inode);
- nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
+ nilfs_ifile_unmap_inode(raw_inode);
}
}
@@ -952,6 +911,33 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
}
}
+/**
+ * nilfs_write_root_mdt_inode - export root metadata inode information to
+ * the on-disk inode
+ * @inode: inode object of the root metadata file
+ * @raw_inode: on-disk inode
+ *
+ * nilfs_write_root_mdt_inode() writes inode information and bmap data of
+ * @inode to the inode area of the metadata file allocated on the super root
+ * block created to finalize the log. Since super root blocks are configured
+ * each time, this function zero-fills the unused area of @raw_inode.
+ */
+static void nilfs_write_root_mdt_inode(struct inode *inode,
+ struct nilfs_inode *raw_inode)
+{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+ nilfs_write_inode_common(inode, raw_inode);
+
+ /* zero-fill unused portion of raw_inode */
+ raw_inode->i_xattr = 0;
+ raw_inode->i_pad = 0;
+ memset((void *)raw_inode + sizeof(*raw_inode), 0,
+ nilfs->ns_inode_size - sizeof(*raw_inode));
+
+ nilfs_bmap_write(NILFS_I(inode)->i_bmap, raw_inode);
+}
+
static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
@@ -960,23 +946,29 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
unsigned int isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
+
+ lock_buffer(bh_sr);
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
isz = nilfs->ns_inode_size;
srsz = NILFS_SR_BYTES(isz);
+ raw_sr->sr_sum = 0; /* Ensure initialization within this update */
raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
raw_sr->sr_flags = 0;
- nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
- NILFS_SR_DAT_OFFSET(isz), 1);
- nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
- NILFS_SR_CPFILE_OFFSET(isz), 1);
- nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
- NILFS_SR_SUFILE_OFFSET(isz), 1);
+ nilfs_write_root_mdt_inode(nilfs->ns_dat, (void *)raw_sr +
+ NILFS_SR_DAT_OFFSET(isz));
+ nilfs_write_root_mdt_inode(nilfs->ns_cpfile, (void *)raw_sr +
+ NILFS_SR_CPFILE_OFFSET(isz));
+ nilfs_write_root_mdt_inode(nilfs->ns_sufile, (void *)raw_sr +
+ NILFS_SR_SUFILE_OFFSET(isz));
+
memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
+ set_buffer_uptodate(bh_sr);
+ unlock_buffer(bh_sr);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -1114,12 +1106,65 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
return err;
}
+/**
+ * nilfs_free_segments - free the segments given by an array of segment numbers
+ * @nilfs: nilfs object
+ * @segnumv: array of segment numbers to be freed
+ * @nsegs: number of segments to be freed in @segnumv
+ *
+ * nilfs_free_segments() wraps nilfs_sufile_freev() and
+ * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file
+ * (sufile) to free all segments given by @segnumv and @nsegs at once. If
+ * it fails midway, it cancels the changes so that none of the segments are
+ * freed. If @nsegs is 0, this function does nothing.
+ *
+ * The freeing of segments is not finalized until the writing of a log with
+ * a super root block containing this sufile change is complete, and it can
+ * be canceled with nilfs_sufile_cancel_freev() until then.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid segment number.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ */
+static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv,
+ size_t nsegs)
+{
+ size_t ndone;
+ int ret;
+
+ if (!nsegs)
+ return 0;
+
+ ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone);
+ if (unlikely(ret)) {
+ nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone,
+ NULL);
+ /*
+ * If a segment usage of the segments to be freed is in a
+ * hole block, nilfs_sufile_freev() will return -ENOENT.
+ * In this case, -EINVAL should be returned to the caller
+ * since there is something wrong with the given segment
+ * number array. This error can only occur during GC, so
+ * there is no need to worry about it propagating to other
+ * callers (such as fsync).
+ */
+ if (ret == -ENOENT) {
+ nilfs_err(nilfs->ns_sb,
+ "The segment usage entry %llu to be freed is invalid (in a hole)",
+ (unsigned long long)segnumv[ndone]);
+ ret = -EINVAL;
+ }
+ }
+ return ret;
+}
+
static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
{
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
struct list_head *head;
struct nilfs_inode_info *ii;
- size_t ndone;
int err = 0;
switch (nilfs_sc_cstage_get(sci)) {
@@ -1200,7 +1245,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
break;
nilfs_sc_cstage_inc(sci);
/* Creating a checkpoint */
- err = nilfs_segctor_create_checkpoint(sci);
+ err = nilfs_cpfile_create_checkpoint(nilfs->ns_cpfile,
+ nilfs->ns_cno);
if (unlikely(err))
break;
fallthrough;
@@ -1212,14 +1258,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
nilfs_sc_cstage_inc(sci);
fallthrough;
case NILFS_ST_SUFILE:
- err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
- sci->sc_nfreesegs, &ndone);
- if (unlikely(err)) {
- nilfs_sufile_cancel_freev(nilfs->ns_sufile,
- sci->sc_freesegs, ndone,
- NULL);
+ err = nilfs_free_segments(nilfs, sci->sc_freesegs,
+ sci->sc_nfreesegs);
+ if (unlikely(err))
break;
- }
sci->sc_stage.flags |= NILFS_CF_SUFREED;
err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
@@ -1278,6 +1320,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
* nilfs_segctor_begin_construction - setup segment buffer to make a new log
* @sci: nilfs_sc_info
* @nilfs: nilfs object
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
@@ -1531,6 +1575,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
+ nilfs_segctor_zeropad_segsum(sci);
nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
return 0;
@@ -1581,7 +1626,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
nblocks = le32_to_cpu(finfo->fi_nblocks);
ndatablk = le32_to_cpu(finfo->fi_ndatablk);
- inode = bh->b_page->mapping->host;
+ inode = bh->b_folio->mapping->host;
if (mode == SC_LSEG_DSYNC)
sc_op = &nilfs_sc_dsync_ops;
@@ -1634,68 +1679,95 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
return 0;
}
-static void nilfs_begin_page_io(struct page *page)
+static void nilfs_begin_folio_io(struct folio *folio)
{
- if (!page || PageWriteback(page))
+ if (!folio || folio_test_writeback(folio))
/*
* For split b-tree node pages, this function may be called
* twice. We ignore the 2nd or later calls by this check.
*/
return;
- lock_page(page);
- clear_page_dirty_for_io(page);
- set_page_writeback(page);
- unlock_page(page);
+ folio_lock(folio);
+ folio_clear_dirty_for_io(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
}
-static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
+/**
+ * nilfs_prepare_write_logs - prepare to write logs
+ * @logs: logs to prepare for writing
+ * @seed: checksum seed value
+ *
+ * nilfs_prepare_write_logs() adds checksums and prepares the block
+ * buffers/folios for writing logs. In order to stabilize folios of
+ * memory-mapped file blocks by putting them in writeback state before
+ * calculating the checksums, first prepare to write payload blocks other
+ * than segment summary and super root blocks in which the checksums will
+ * be embedded.
+ */
+static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed)
{
struct nilfs_segment_buffer *segbuf;
- struct page *bd_page = NULL, *fs_page = NULL;
-
- list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
- struct buffer_head *bh;
+ struct folio *bd_folio = NULL, *fs_folio = NULL;
+ struct buffer_head *bh;
- list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
+ /* Prepare to write payload blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
+ list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- if (bh->b_page != bd_page) {
- if (bd_page) {
- lock_page(bd_page);
- clear_page_dirty_for_io(bd_page);
- set_page_writeback(bd_page);
- unlock_page(bd_page);
- }
- bd_page = bh->b_page;
+ if (bh == segbuf->sb_super_root)
+ break;
+ set_buffer_async_write(bh);
+ if (bh->b_folio != fs_folio) {
+ nilfs_begin_folio_io(fs_folio);
+ fs_folio = bh->b_folio;
}
}
+ }
+ nilfs_begin_folio_io(fs_folio);
- list_for_each_entry(bh, &segbuf->sb_payload_buffers,
+ nilfs_add_checksums_on_logs(logs, seed);
+
+ /* Prepare to write segment summary blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
+ list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
- if (bh == segbuf->sb_super_root) {
- if (bh->b_page != bd_page) {
- lock_page(bd_page);
- clear_page_dirty_for_io(bd_page);
- set_page_writeback(bd_page);
- unlock_page(bd_page);
- bd_page = bh->b_page;
- }
- break;
- }
- if (bh->b_page != fs_page) {
- nilfs_begin_page_io(fs_page);
- fs_page = bh->b_page;
+ mark_buffer_dirty(bh);
+ if (bh->b_folio == bd_folio)
+ continue;
+ if (bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
}
+ bd_folio = bh->b_folio;
}
}
- if (bd_page) {
- lock_page(bd_page);
- clear_page_dirty_for_io(bd_page);
- set_page_writeback(bd_page);
- unlock_page(bd_page);
+
+ /* Prepare to write super root block */
+ bh = NILFS_LAST_SEGBUF(logs)->sb_super_root;
+ if (bh) {
+ mark_buffer_dirty(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+ bd_folio = bh->b_folio;
+ }
+ }
+
+ if (bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
}
- nilfs_begin_page_io(fs_page);
}
static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1708,17 +1780,18 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
return ret;
}
-static void nilfs_end_page_io(struct page *page, int err)
+static void nilfs_end_folio_io(struct folio *folio, int err)
{
- if (!page)
+ if (!folio)
return;
- if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
+ if (buffer_nilfs_node(folio_buffers(folio)) &&
+ !folio_test_writeback(folio)) {
/*
* For b-tree node pages, this function may be called twice
* or more because they might be split in a segment.
*/
- if (PageDirty(page)) {
+ if (folio_test_dirty(folio)) {
/*
* For pages holding split b-tree node buffers, dirty
* flag on the buffers may be cleared discretely.
@@ -1726,30 +1799,24 @@ static void nilfs_end_page_io(struct page *page, int err)
* remaining buffers, and it must be cancelled if
* all the buffers get cleaned later.
*/
- lock_page(page);
- if (nilfs_page_buffers_clean(page))
- __nilfs_clear_page_dirty(page);
- unlock_page(page);
+ folio_lock(folio);
+ if (nilfs_folio_buffers_clean(folio))
+ __nilfs_clear_folio_dirty(folio);
+ folio_unlock(folio);
}
return;
}
- if (!err) {
- if (!nilfs_page_buffers_clean(page))
- __set_page_dirty_nobuffers(page);
- ClearPageError(page);
- } else {
- __set_page_dirty_nobuffers(page);
- SetPageError(page);
- }
+ if (err || !nilfs_folio_buffers_clean(folio))
+ filemap_dirty_folio(folio->mapping, folio);
- end_page_writeback(page);
+ folio_end_writeback(folio);
}
static void nilfs_abort_logs(struct list_head *logs, int err)
{
struct nilfs_segment_buffer *segbuf;
- struct page *bd_page = NULL, *fs_page = NULL;
+ struct folio *bd_folio = NULL, *fs_folio = NULL;
struct buffer_head *bh;
if (list_empty(logs))
@@ -1758,33 +1825,35 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
- if (bh->b_page != bd_page) {
- if (bd_page)
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ clear_buffer_uptodate(bh);
+ if (bh->b_folio != bd_folio) {
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
- if (bh->b_page != bd_page) {
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ clear_buffer_uptodate(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
break;
}
- if (bh->b_page != fs_page) {
- nilfs_end_page_io(fs_page, err);
- fs_page = bh->b_page;
+ clear_buffer_async_write(bh);
+ if (bh->b_folio != fs_folio) {
+ nilfs_end_folio_io(fs_folio, err);
+ fs_folio = bh->b_folio;
}
}
}
- if (bd_page)
- end_page_writeback(bd_page);
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
- nilfs_end_page_io(fs_page, err);
+ nilfs_end_folio_io(fs_folio, err);
}
static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
@@ -1798,6 +1867,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
+ if (list_empty(&logs))
+ return; /* if the first segment buffer preparation failed */
+
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
@@ -1826,7 +1898,7 @@ static void nilfs_set_next_segment(struct the_nilfs *nilfs,
static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
{
struct nilfs_segment_buffer *segbuf;
- struct page *bd_page = NULL, *fs_page = NULL;
+ struct folio *bd_folio = NULL, *fs_folio = NULL;
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
int update_sr = false;
@@ -1837,21 +1909,21 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
- if (bh->b_page != bd_page) {
- if (bd_page)
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ if (bh->b_folio != bd_folio) {
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
}
/*
- * We assume that the buffers which belong to the same page
+ * We assume that the buffers which belong to the same folio
* continue over the buffer list.
- * Under this assumption, the last BHs of pages is
- * identifiable by the discontinuity of bh->b_page
- * (page != fs_page).
+ * Under this assumption, the last BHs of folios is
+ * identifiable by the discontinuity of bh->b_folio
+ * (folio != fs_folio).
*
* For B-tree node blocks, however, this assumption is not
- * guaranteed. The cleanup code of B-tree node pages needs
+ * guaranteed. The cleanup code of B-tree node folios needs
* special care.
*/
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
@@ -1862,18 +1934,20 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Redirected));
- set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
- if (bh->b_page != bd_page) {
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
update_sr = true;
break;
}
- if (bh->b_page != fs_page) {
- nilfs_end_page_io(fs_page, 0);
- fs_page = bh->b_page;
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
+ if (bh->b_folio != fs_folio) {
+ nilfs_end_folio_io(fs_folio, 0);
+ fs_folio = bh->b_folio;
}
}
@@ -1887,13 +1961,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
}
}
/*
- * Since pages may continue over multiple segment buffers,
- * end of the last page must be checked outside of the loop.
+ * Since folios may continue over multiple segment buffers,
+ * end of the last folio must be checked outside of the loop.
*/
- if (bd_page)
- end_page_writeback(bd_page);
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
- nilfs_end_page_io(fs_page, 0);
+ nilfs_end_folio_io(fs_folio, 0);
nilfs_drop_collected_inodes(&sci->sc_dirty_files);
@@ -2019,6 +2093,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
int err;
+ if (sb_rdonly(sci->sc_super))
+ return -EROFS;
+
nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
sci->sc_cno = nilfs->ns_cno;
@@ -2037,7 +2114,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
err = nilfs_segctor_begin_construction(sci, nilfs);
if (unlikely(err))
- goto out;
+ goto failed;
/* Update time stamp */
sci->sc_seg_ctime = ktime_get_real_seconds();
@@ -2062,7 +2139,11 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
if (mode == SC_LSEG_SR &&
nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
- err = nilfs_segctor_fill_in_checkpoint(sci);
+ err = nilfs_cpfile_finalize_checkpoint(
+ nilfs->ns_cpfile, nilfs->ns_cno, sci->sc_root,
+ sci->sc_nblk_inc + sci->sc_nblk_this_inc,
+ sci->sc_seg_ctime,
+ !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags));
if (unlikely(err))
goto failed_to_write;
@@ -2071,10 +2152,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
/* Write partial segments */
- nilfs_segctor_prepare_write(sci);
-
- nilfs_add_checksums_on_logs(&sci->sc_segbufs,
- nilfs->ns_crc_seed);
+ nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed);
err = nilfs_segctor_write(sci, nilfs);
if (unlikely(err))
@@ -2100,10 +2178,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
return err;
failed_to_write:
- if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
- nilfs_redirty_inodes(&sci->sc_dirty_files);
-
failed:
+ if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE)
+ nilfs_redirty_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
nilfs_redirty_inodes(&sci->sc_gc_inodes);
nilfs_segctor_abort_construction(sci, nilfs, err);
@@ -2122,8 +2199,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
spin_lock(&sci->sc_state_lock);
if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
- sci->sc_timer.expires = jiffies + sci->sc_interval;
- add_timer(&sci->sc_timer);
+ if (sci->sc_task) {
+ sci->sc_timer.expires = jiffies + sci->sc_interval;
+ add_timer(&sci->sc_timer);
+ }
sci->sc_state |= NILFS_SEGCTOR_COMMIT;
}
spin_unlock(&sci->sc_state_lock);
@@ -2142,22 +2221,6 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
spin_unlock(&sci->sc_state_lock);
}
-/**
- * nilfs_flush_segment - trigger a segment construction for resource control
- * @sb: super block
- * @ino: inode number of the file to be flushed out.
- */
-void nilfs_flush_segment(struct super_block *sb, ino_t ino)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- struct nilfs_sc_info *sci = nilfs->ns_writer;
-
- if (!sci || nilfs_doing_construction())
- return;
- nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
- /* assign bit 0 to data files */
-}
-
struct nilfs_segctor_wait_request {
wait_queue_entry_t wq;
__u32 seq;
@@ -2170,19 +2233,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
struct nilfs_segctor_wait_request wait_req;
int err = 0;
- spin_lock(&sci->sc_state_lock);
init_wait(&wait_req.wq);
wait_req.err = 0;
atomic_set(&wait_req.done, 0);
+ init_waitqueue_entry(&wait_req.wq, current);
+
+ /*
+ * To prevent a race issue where completion notifications from the
+ * log writer thread are missed, increment the request sequence count
+ * "sc_seq_request" and insert a wait queue entry using the current
+ * sequence number into the "sc_wait_request" queue at the same time
+ * within the lock section of "sc_state_lock".
+ */
+ spin_lock(&sci->sc_state_lock);
wait_req.seq = ++sci->sc_seq_request;
+ add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
spin_unlock(&sci->sc_state_lock);
- init_waitqueue_entry(&wait_req.wq, current);
- add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
- set_current_state(TASK_INTERRUPTIBLE);
wake_up(&sci->sc_wait_daemon);
for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /*
+ * Synchronize only while the log writer thread is alive.
+ * Leave flushing out after the log writer thread exits to
+ * the cleanup work in nilfs_segctor_destroy().
+ */
+ if (!sci->sc_task)
+ break;
+
if (atomic_read(&wait_req.done)) {
err = wait_req.err;
break;
@@ -2198,7 +2278,7 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
return err;
}
-static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
+static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force)
{
struct nilfs_segctor_wait_request *wrq, *n;
unsigned long flags;
@@ -2206,7 +2286,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
if (!atomic_read(&wrq->done) &&
- nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
+ (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) {
wrq->err = err;
atomic_set(&wrq->done, 1);
}
@@ -2223,18 +2303,13 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
* nilfs_construct_segment - construct a logical segment
* @sb: super block
*
- * Return Value: On success, 0 is returned. On errors, one of the following
- * negative error code is returned.
- *
- * %-EROFS - Read only filesystem.
- *
- * %-EIO - I/O error
- *
- * %-ENOSPC - No space left on device (only in a panic state).
- *
- * %-ERESTARTSYS - Interrupted.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
+ * * %-EROFS - Read only filesystem.
*/
int nilfs_construct_segment(struct super_block *sb)
{
@@ -2258,18 +2333,13 @@ int nilfs_construct_segment(struct super_block *sb)
* @start: start byte offset
* @end: end byte offset (inclusive)
*
- * Return Value: On success, 0 is returned. On errors, one of the following
- * negative error code is returned.
- *
- * %-EROFS - Read only filesystem.
- *
- * %-EIO - I/O error
- *
- * %-ENOSPC - No space left on device (only in a panic state).
- *
- * %-ERESTARTSYS - Interrupted.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
+ * * %-EROFS - Read only filesystem.
*/
int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
loff_t start, loff_t end)
@@ -2324,10 +2394,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
*/
static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
{
+ bool thread_is_alive;
+
spin_lock(&sci->sc_state_lock);
sci->sc_seq_accepted = sci->sc_seq_request;
+ thread_is_alive = (bool)sci->sc_task;
spin_unlock(&sci->sc_state_lock);
- del_timer_sync(&sci->sc_timer);
+
+ /*
+ * This function does not race with the log writer thread's
+ * termination. Therefore, deleting sc_timer, which should not be
+ * done after the log writer thread exits, can be done safely outside
+ * the area protected by sc_state_lock.
+ */
+ if (thread_is_alive)
+ timer_delete_sync(&sci->sc_timer);
}
/**
@@ -2344,7 +2425,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
if (mode == SC_LSEG_SR) {
sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
sci->sc_seq_done = sci->sc_seq_accepted;
- nilfs_segctor_wakeup(sci, err);
+ nilfs_segctor_wakeup(sci, err, false);
sci->sc_flush_request = 0;
} else {
if (mode == SC_FLUSH_FILE)
@@ -2353,7 +2434,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
sci->sc_flush_request &= ~FLUSH_DAT_BIT;
/* re-enable timer if checkpoint creation was not done */
- if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task &&
time_before(jiffies, sci->sc_timer.expires))
add_timer(&sci->sc_timer);
}
@@ -2364,6 +2445,8 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
* nilfs_segctor_construct - form logs and write them to disk
* @sci: segment constructor object
* @mode: mode of log forming
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
{
@@ -2402,9 +2485,9 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
static void nilfs_construction_timeout(struct timer_list *t)
{
- struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer);
+ struct nilfs_sc_info *sci = timer_container_of(sci, t, sc_timer);
- wake_up_process(sci->sc_timer_task);
+ wake_up_process(sci->sc_task);
}
static void
@@ -2530,121 +2613,85 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
}
/**
- * nilfs_segctor_thread - main loop of the segment constructor thread.
+ * nilfs_log_write_required - determine whether log writing is required
+ * @sci: nilfs_sc_info struct
+ * @modep: location for storing log writing mode
+ *
+ * Return: true if log writing is required, false otherwise. If log writing
+ * is required, the mode is stored in the location pointed to by @modep.
+ */
+static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep)
+{
+ bool timedout, ret = true;
+
+ spin_lock(&sci->sc_state_lock);
+ timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ time_after_eq(jiffies, sci->sc_timer.expires));
+ if (timedout || sci->sc_seq_request != sci->sc_seq_done)
+ *modep = SC_LSEG_SR;
+ else if (sci->sc_flush_request)
+ *modep = nilfs_segctor_flush_mode(sci);
+ else
+ ret = false;
+
+ spin_unlock(&sci->sc_state_lock);
+ return ret;
+}
+
+/**
+ * nilfs_segctor_thread - main loop of the log writer thread
* @arg: pointer to a struct nilfs_sc_info.
*
- * nilfs_segctor_thread() initializes a timer and serves as a daemon
- * to execute segment constructions.
+ * nilfs_segctor_thread() is the main loop function of the log writer kernel
+ * thread, which determines whether log writing is necessary, and if so,
+ * performs the log write in the background, or waits if not. It is also
+ * used to decide the background writeback of the superblock.
+ *
+ * Return: Always 0.
*/
static int nilfs_segctor_thread(void *arg)
{
struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- int timeout = 0;
- sci->sc_timer_task = current;
-
- /* start sync. */
- sci->sc_task = current;
- wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
nilfs_info(sci->sc_super,
"segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds",
sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
- spin_lock(&sci->sc_state_lock);
- loop:
- for (;;) {
- int mode;
+ set_freezable();
- if (sci->sc_state & NILFS_SEGCTOR_QUIT)
- goto end_thread;
-
- if (timeout || sci->sc_seq_request != sci->sc_seq_done)
- mode = SC_LSEG_SR;
- else if (sci->sc_flush_request)
- mode = nilfs_segctor_flush_mode(sci);
- else
- break;
-
- spin_unlock(&sci->sc_state_lock);
- nilfs_segctor_thread_construct(sci, mode);
- spin_lock(&sci->sc_state_lock);
- timeout = 0;
- }
-
-
- if (freezing(current)) {
- spin_unlock(&sci->sc_state_lock);
- try_to_freeze();
- spin_lock(&sci->sc_state_lock);
- } else {
+ while (!kthread_should_stop()) {
DEFINE_WAIT(wait);
- int should_sleep = 1;
+ bool should_write;
+ int mode;
+
+ if (freezing(current)) {
+ try_to_freeze();
+ continue;
+ }
prepare_to_wait(&sci->sc_wait_daemon, &wait,
TASK_INTERRUPTIBLE);
-
- if (sci->sc_seq_request != sci->sc_seq_done)
- should_sleep = 0;
- else if (sci->sc_flush_request)
- should_sleep = 0;
- else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
- should_sleep = time_before(jiffies,
- sci->sc_timer.expires);
-
- if (should_sleep) {
- spin_unlock(&sci->sc_state_lock);
+ should_write = nilfs_log_write_required(sci, &mode);
+ if (!should_write)
schedule();
- spin_lock(&sci->sc_state_lock);
- }
finish_wait(&sci->sc_wait_daemon, &wait);
- timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
- time_after_eq(jiffies, sci->sc_timer.expires));
if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
set_nilfs_discontinued(nilfs);
- }
- goto loop;
- end_thread:
- spin_unlock(&sci->sc_state_lock);
+ if (should_write)
+ nilfs_segctor_thread_construct(sci, mode);
+ }
/* end sync. */
+ spin_lock(&sci->sc_state_lock);
sci->sc_task = NULL;
- wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
- return 0;
-}
-
-static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
-{
- struct task_struct *t;
-
- t = kthread_run(nilfs_segctor_thread, sci, "segctord");
- if (IS_ERR(t)) {
- int err = PTR_ERR(t);
-
- nilfs_err(sci->sc_super, "error %d creating segctord thread",
- err);
- return err;
- }
- wait_event(sci->sc_wait_task, sci->sc_task != NULL);
+ timer_shutdown_sync(&sci->sc_timer);
+ spin_unlock(&sci->sc_state_lock);
return 0;
}
-static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
- __acquires(&sci->sc_state_lock)
- __releases(&sci->sc_state_lock)
-{
- sci->sc_state |= NILFS_SEGCTOR_QUIT;
-
- while (sci->sc_task) {
- wake_up(&sci->sc_wait_daemon);
- spin_unlock(&sci->sc_state_lock);
- wait_event(sci->sc_wait_task, sci->sc_task == NULL);
- spin_lock(&sci->sc_state_lock);
- }
-}
-
/*
* Setup & clean-up functions
*/
@@ -2665,7 +2712,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
init_waitqueue_head(&sci->sc_wait_request);
init_waitqueue_head(&sci->sc_wait_daemon);
- init_waitqueue_head(&sci->sc_wait_task);
spin_lock_init(&sci->sc_state_lock);
INIT_LIST_HEAD(&sci->sc_dirty_files);
INIT_LIST_HEAD(&sci->sc_segbufs);
@@ -2673,7 +2719,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_iput_queue);
INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
- timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2703,7 +2748,7 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
flush_work(&sci->sc_iput_work);
- } while (ret && retrycount-- > 0);
+ } while (ret && ret != -EROFS && retrycount-- > 0);
}
/**
@@ -2721,12 +2766,28 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
up_write(&nilfs->ns_segctor_sem);
+ if (sci->sc_task) {
+ wake_up(&sci->sc_wait_daemon);
+ if (kthread_stop(sci->sc_task)) {
+ spin_lock(&sci->sc_state_lock);
+ sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
+ spin_unlock(&sci->sc_state_lock);
+ }
+ }
+
spin_lock(&sci->sc_state_lock);
- nilfs_segctor_kill_thread(sci);
flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
|| sci->sc_seq_request != sci->sc_seq_done);
spin_unlock(&sci->sc_state_lock);
+ /*
+ * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can
+ * be called from delayed iput() via nilfs_evict_inode() and can race
+ * with the above log writer thread termination.
+ */
+ nilfs_segctor_wakeup(sci, 0, true);
+
if (flush_work(&sci->sc_iput_work))
flag = true;
@@ -2752,7 +2813,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
down_write(&nilfs->ns_segctor_sem);
- timer_shutdown_sync(&sci->sc_timer);
kfree(sci);
}
@@ -2764,14 +2824,16 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
* This allocates a log writer object, initializes it, and starts the
* log writer.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINTR - Log writer thread creation failed due to interruption.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
{
struct the_nilfs *nilfs = sb->s_fs_info;
+ struct nilfs_sc_info *sci;
+ struct task_struct *t;
int err;
if (nilfs->ns_writer) {
@@ -2784,17 +2846,23 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
return 0;
}
- nilfs->ns_writer = nilfs_segctor_new(sb, root);
- if (!nilfs->ns_writer)
+ sci = nilfs_segctor_new(sb, root);
+ if (unlikely(!sci))
return -ENOMEM;
- inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
-
- err = nilfs_segctor_start_thread(nilfs->ns_writer);
- if (unlikely(err))
+ nilfs->ns_writer = sci;
+ t = kthread_create(nilfs_segctor_thread, sci, "segctord");
+ if (IS_ERR(t)) {
+ err = PTR_ERR(t);
+ nilfs_err(sb, "error %d creating segctord thread", err);
nilfs_detach_log_writer(sb);
+ return err;
+ }
+ sci->sc_task = t;
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
- return err;
+ wake_up_process(sci->sc_task);
+ return 0;
}
/**
@@ -2814,6 +2882,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
nilfs_segctor_destroy(nilfs->ns_writer);
nilfs->ns_writer = NULL;
}
+ set_nilfs_purging(nilfs);
/* Force to free the list of dirty files */
spin_lock(&nilfs->ns_inode_lock);
@@ -2826,4 +2895,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
up_write(&nilfs->ns_segctor_sem);
nilfs_dispose_list(nilfs, &garbage_list, 1);
+ clear_nilfs_purging(nilfs);
}
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 1060f72ebf5a..4b39ed43ae72 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -22,10 +22,10 @@ struct nilfs_root;
* struct nilfs_recovery_info - Recovery information
* @ri_need_recovery: Recovery status
* @ri_super_root: Block number of the last super root
- * @ri_ri_cno: Number of the last checkpoint
+ * @ri_cno: Number of the last checkpoint
* @ri_lsegs_start: Region for roll-forwarding (start block number)
* @ri_lsegs_end: Region for roll-forwarding (end block number)
- * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start
+ * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start
* @ri_used_segments: List of segments to be mark active
* @ri_pseg_start: Block number of the last partial segment
* @ri_seq: Sequence number on the last partial segment
@@ -105,9 +105,8 @@ struct nilfs_segsum_pointer {
* @sc_flush_request: inode bitmap of metadata files to be flushed
* @sc_wait_request: Client request queue
* @sc_wait_daemon: Daemon wait queue
- * @sc_wait_task: Start/end wait queue to control segctord task
* @sc_seq_request: Request counter
- * @sc_seq_accept: Accepted request count
+ * @sc_seq_accepted: Accepted request count
* @sc_seq_done: Completion counter
* @sc_sync: Request of explicit sync operation
* @sc_interval: Timeout value of background construction
@@ -158,7 +157,6 @@ struct nilfs_sc_info {
wait_queue_head_t sc_wait_request;
wait_queue_head_t sc_wait_daemon;
- wait_queue_head_t sc_wait_task;
__u32 sc_seq_request;
__u32 sc_seq_accepted;
@@ -171,7 +169,6 @@ struct nilfs_sc_info {
unsigned long sc_watermark;
struct timer_list sc_timer;
- struct task_struct *sc_timer_task;
struct task_struct *sc_task;
};
@@ -192,7 +189,6 @@ enum {
};
/* sc_state */
-#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */
#define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */
/*
@@ -230,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
loff_t, loff_t);
-extern void nilfs_flush_segment(struct super_block *, ino_t);
extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
void **);
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index dc359b56fdfa..83f93337c01b 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -48,7 +48,7 @@ nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum)
{
__u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset;
- do_div(t, nilfs_sufile_segment_usages_per_block(sufile));
+ t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile));
return (unsigned long)t;
}
@@ -70,19 +70,35 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr,
max - curr + 1);
}
-static struct nilfs_segment_usage *
-nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum,
- struct buffer_head *bh, void *kaddr)
+/**
+ * nilfs_sufile_segment_usage_offset - calculate the byte offset of a segment
+ * usage entry in the folio containing it
+ * @sufile: segment usage file inode
+ * @segnum: number of segment usage
+ * @bh: buffer head of block containing segment usage indexed by @segnum
+ *
+ * Return: Byte offset in the folio of the segment usage entry.
+ */
+static size_t nilfs_sufile_segment_usage_offset(const struct inode *sufile,
+ __u64 segnum,
+ struct buffer_head *bh)
{
- return kaddr + bh_offset(bh) +
+ return offset_in_folio(bh->b_folio, bh->b_data) +
nilfs_sufile_get_offset(sufile, segnum) *
NILFS_MDT(sufile)->mi_entry_size;
}
-static inline int nilfs_sufile_get_header_block(struct inode *sufile,
- struct buffer_head **bhp)
+static int nilfs_sufile_get_header_block(struct inode *sufile,
+ struct buffer_head **bhp)
{
- return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp);
+ int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp);
+
+ if (unlikely(err == -ENOENT)) {
+ nilfs_error(sufile->i_sb,
+ "missing header block in segment usage metadata");
+ err = -EIO;
+ }
+ return err;
}
static inline int
@@ -105,13 +121,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
u64 ncleanadd, u64 ndirtyadd)
{
struct nilfs_sufile_header *header;
- void *kaddr;
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(header_bh);
}
@@ -119,6 +133,8 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
/**
* nilfs_sufile_get_ncleansegs - return the number of clean segments
* @sufile: inode of segment usage file
+ *
+ * Return: Number of clean segments.
*/
unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile)
{
@@ -141,17 +157,13 @@ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile)
* of successfully modified segments from the head is stored in the
* place @ndone points to.
*
- * Return Value: On success, zero is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - Given segment usage is in hole block (may be returned if
- * @create is zero)
- *
- * %-EINVAL - Invalid segment usage number
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid segment usage number
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Given segment usage is in hole block (may be returned if
+ * @create is zero)
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs,
int create, size_t *ndone,
@@ -258,10 +270,7 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
* @start: minimum segment number of allocatable region (inclusive)
* @end: maximum segment number of allocatable region (inclusive)
*
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-ERANGE - invalid segment region
+ * Return: 0 on success, or %-ERANGE if segment range is invalid.
*/
int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
{
@@ -286,17 +295,14 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
* @sufile: inode of segment usage file
* @segnump: pointer to segment number
*
- * Description: nilfs_sufile_alloc() allocates a clean segment.
+ * Description: nilfs_sufile_alloc() allocates a clean segment, and stores
+ * its segment number in the place pointed to by @segnump.
*
- * Return Value: On success, 0 is returned and the segment number of the
- * allocated segment is stored in the place pointed by @segnump. On error, one
- * of the following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOSPC - No clean segment left.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No clean segment left.
*/
int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
{
@@ -306,6 +312,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
__u64 segnum, maxsegnum, last_alloc;
+ size_t offset;
void *kaddr;
unsigned long nsegments, nsus, cnt;
int ret, j;
@@ -315,10 +322,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
ret = nilfs_sufile_get_header_block(sufile, &header_bh);
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
last_alloc = le64_to_cpu(header->sh_last_alloc);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
nsegments = nilfs_sufile_get_nsegments(sufile);
maxsegnum = sui->allocmax;
@@ -352,9 +358,10 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
&su_bh);
if (ret < 0)
goto out_header;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio, offset);
nsus = nilfs_sufile_segment_usages_in_block(
sufile, segnum, maxsegnum);
@@ -363,14 +370,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
continue;
/* found a clean segment */
nilfs_segment_usage_set_dirty(su);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->sh_ncleansegs, -1);
le64_add_cpu(&header->sh_ndirtysegs, 1);
header->sh_last_alloc = cpu_to_le64(segnum);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
sui->ncleansegs--;
mark_buffer_dirty(header_bh);
@@ -384,7 +390,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
goto out_header;
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
brelse(su_bh);
}
@@ -404,18 +410,18 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
if (unlikely(!nilfs_segment_usage_clean(su))) {
nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean",
__func__, (unsigned long long)segnum);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
return;
}
nilfs_segment_usage_set_dirty(su);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
nilfs_sufile_mod_counter(header_bh, -1, 1);
NILFS_SUI(sufile)->ncleansegs--;
@@ -429,14 +435,14 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int clean, dirty;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) &&
su->su_nblocks == cpu_to_le32(0)) {
- kunmap_atomic(kaddr);
+ kunmap_local(su);
return;
}
clean = nilfs_segment_usage_clean(su);
@@ -446,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
su->su_lastmod = cpu_to_le64(0);
su->su_nblocks = cpu_to_le32(0);
su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY));
- kunmap_atomic(kaddr);
+ kunmap_local(su);
nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
NILFS_SUI(sufile)->ncleansegs -= clean;
@@ -460,23 +466,28 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int sudirty;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
if (nilfs_segment_usage_clean(su)) {
nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean",
__func__, (unsigned long long)segnum);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
return;
}
- WARN_ON(nilfs_segment_usage_error(su));
- WARN_ON(!nilfs_segment_usage_dirty(su));
+ if (unlikely(nilfs_segment_usage_error(su)))
+ nilfs_warn(sufile->i_sb, "free segment %llu marked in error",
+ (unsigned long long)segnum);
sudirty = nilfs_segment_usage_dirty(su);
+ if (unlikely(!sudirty))
+ nilfs_warn(sufile->i_sb, "free unallocated segment %llu",
+ (unsigned long long)segnum);
+
nilfs_segment_usage_set_clean(su);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
mark_buffer_dirty(su_bh);
nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
@@ -491,25 +502,57 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
* nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty
* @sufile: inode of segment usage file
* @segnum: segment number
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
- void *kaddr;
+ size_t offset;
struct nilfs_segment_usage *su;
int ret;
down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
- if (!ret) {
+ if (unlikely(ret)) {
+ if (ret == -ENOENT) {
+ nilfs_error(sufile->i_sb,
+ "segment usage for segment %llu is unreadable due to a hole block",
+ (unsigned long long)segnum);
+ ret = -EIO;
+ }
+ goto out_sem;
+ }
+
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh);
+ su = kmap_local_folio(bh->b_folio, offset);
+ if (unlikely(nilfs_segment_usage_error(su))) {
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+
+ kunmap_local(su);
+ brelse(bh);
+ if (nilfs_segment_is_active(nilfs, segnum)) {
+ nilfs_error(sufile->i_sb,
+ "active segment %llu is erroneous",
+ (unsigned long long)segnum);
+ } else {
+ /*
+ * Segments marked erroneous are never allocated by
+ * nilfs_sufile_alloc(); only active segments, ie,
+ * the segments indexed by ns_segnum or ns_nextnum,
+ * can be erroneous here.
+ */
+ WARN_ON_ONCE(1);
+ }
+ ret = -EIO;
+ } else {
+ nilfs_segment_usage_set_dirty(su);
+ kunmap_local(su);
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
- kaddr = kmap_atomic(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
- nilfs_segment_usage_set_dirty(su);
- kunmap_atomic(kaddr);
brelse(bh);
}
+out_sem:
up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
@@ -520,13 +563,15 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
* @segnum: segment number
* @nblocks: number of live blocks in the segment
* @modtime: modification time (option)
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
unsigned long nblocks, time64_t modtime)
{
struct buffer_head *bh;
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int ret;
down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -534,13 +579,18 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
- WARN_ON(nilfs_segment_usage_error(su));
- if (modtime)
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh);
+ su = kmap_local_folio(bh->b_folio, offset);
+ if (modtime) {
+ /*
+ * Check segusage error and set su_lastmod only when updating
+ * this entry with a valid timestamp, not for cancellation.
+ */
+ WARN_ON_ONCE(nilfs_segment_usage_error(su));
su->su_lastmod = cpu_to_le64(modtime);
+ }
su->su_nblocks = cpu_to_le32(nblocks);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
@@ -556,23 +606,19 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
* @sufile: inode of segment usage file
* @sustat: pointer to a structure of segment usage statistics
*
- * Description: nilfs_sufile_get_stat() returns information about segment
- * usage.
- *
- * Return Value: On success, 0 is returned, and segment usage information is
- * stored in the place pointed by @sustat. On error, one of the following
- * negative error codes is returned.
+ * Description: nilfs_sufile_get_stat() retrieves segment usage statistics
+ * and stores them in the location pointed to by @sustat.
*
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
{
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
- void *kaddr;
int ret;
down_read(&NILFS_MDT(sufile)->mi_sem);
@@ -581,8 +627,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs);
@@ -591,7 +636,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
spin_lock(&nilfs->ns_last_segment_lock);
sustat->ss_prot_seq = nilfs->ns_prot_seq;
spin_unlock(&nilfs->ns_last_segment_lock);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(header_bh);
out_sem:
@@ -604,18 +649,18 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int suclean;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
if (nilfs_segment_usage_error(su)) {
- kunmap_atomic(kaddr);
+ kunmap_local(su);
return;
}
suclean = nilfs_segment_usage_clean(su);
nilfs_segment_usage_set_error(su);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
if (suclean) {
nilfs_sufile_mod_counter(header_bh, -1, 0);
@@ -631,16 +676,12 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
* @start: start segment number (inclusive)
* @end: end segment number (inclusive)
*
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid number of segments specified
- *
- * %-EBUSY - Dirty or active segments are present in the range
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Dirty or active segments are present in the range.
+ * * %-EINVAL - Invalid number of segments specified.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_sufile_truncate_range(struct inode *sufile,
__u64 start, __u64 end)
@@ -653,7 +694,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
unsigned long segusages_per_block;
unsigned long nsegs, ncleaned;
__u64 segnum;
- void *kaddr;
+ size_t offset;
ssize_t n, nc;
int ret;
int j;
@@ -684,16 +725,16 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
/* hole */
continue;
}
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
su2 = su;
for (j = 0; j < n; j++, su = (void *)su + susz) {
if ((le32_to_cpu(su->su_flags) &
~BIT(NILFS_SEGMENT_USAGE_ERROR)) ||
nilfs_segment_is_active(nilfs, segnum + j)) {
ret = -EBUSY;
- kunmap_atomic(kaddr);
+ kunmap_local(su2);
brelse(su_bh);
goto out_header;
}
@@ -705,7 +746,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
nc++;
}
}
- kunmap_atomic(kaddr);
+ kunmap_local(su2);
if (nc > 0) {
mark_buffer_dirty(su_bh);
ncleaned += nc;
@@ -735,16 +776,12 @@ out:
* @sufile: inode of segment usage file
* @newnsegs: new number of segments
*
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOSPC - Enough free space is not left for shrinking
- *
- * %-EBUSY - Dirty or active segments exist in the region to be truncated
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Dirty or active segments exist in the region to be truncated.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - Enough free space is not left for shrinking.
*/
int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
{
@@ -752,7 +789,6 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
- void *kaddr;
unsigned long nsegs, nrsvsegs;
int ret = 0;
@@ -779,12 +815,20 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
goto out_header;
sui->ncleansegs -= nsegs - newnsegs;
+
+ /*
+ * If the sufile is successfully truncated, immediately adjust
+ * the segment allocation space while locking the semaphore
+ * "mi_sem" so that nilfs_sufile_alloc() never allocates
+ * segments in the truncated space.
+ */
+ sui->allocmax = newnsegs - 1;
+ sui->allocmin = 0;
}
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(sufile);
@@ -798,21 +842,17 @@ out:
}
/**
- * nilfs_sufile_get_suinfo -
+ * nilfs_sufile_get_suinfo - get segment usage information
* @sufile: inode of segment usage file
* @segnum: segment number to start looking
- * @buf: array of suinfo
- * @sisz: byte size of suinfo
- * @nsi: size of suinfo array
+ * @buf: array of suinfo
+ * @sisz: byte size of suinfo
+ * @nsi: size of suinfo array
*
- * Description:
- *
- * Return Value: On success, 0 is returned and .... On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: Count of segment usage info items stored in the output buffer on
+ * success, or one of the following negative error codes on failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
unsigned int sisz, size_t nsi)
@@ -822,6 +862,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
struct nilfs_suinfo *si = buf;
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ size_t offset;
void *kaddr;
unsigned long nsegs, segusages_per_block;
ssize_t n;
@@ -849,9 +890,9 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
continue;
}
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio, offset);
for (j = 0; j < n;
j++, su = (void *)su + susz, si = (void *)si + sisz) {
si->sui_lastmod = le64_to_cpu(su->su_lastmod);
@@ -862,7 +903,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
si->sui_flags |=
BIT(NILFS_SEGMENT_USAGE_ACTIVE);
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
brelse(su_bh);
}
ret = nsegs;
@@ -883,14 +924,11 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
* segment usage accordingly. Only the fields indicated by the sup_flags
* are updated.
*
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid values in input (segment number, flags or nblocks)
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid values in input (segment number, flags or nblocks).
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
unsigned int supsz, size_t nsup)
@@ -899,7 +937,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
struct buffer_head *header_bh, *bh;
struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup;
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
unsigned long blkoff, prev_blkoff;
int cleansi, cleansu, dirtysi, dirtysu;
long ncleaned = 0, ndirtied = 0;
@@ -931,9 +969,9 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
goto out_header;
for (;;) {
- kaddr = kmap_atomic(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, sup->sup_segnum, bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(
+ sufile, sup->sup_segnum, bh);
+ su = kmap_local_folio(bh->b_folio, offset);
if (nilfs_suinfo_update_lastmod(sup))
su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod);
@@ -968,7 +1006,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags);
}
- kunmap_atomic(kaddr);
+ kunmap_local(su);
sup = (void *)sup + supsz;
if (sup >= supend)
@@ -1017,13 +1055,14 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
* and start+len is rounded down. For each clean segment blkdev_issue_discard
* function is invoked.
*
- * Return Value: On success, 0 is returned or negative error code, otherwise.
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
{
struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
struct buffer_head *su_bh;
struct nilfs_segment_usage *su;
+ size_t offset;
void *kaddr;
size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size;
sector_t seg_start, seg_end, start_block, end_block;
@@ -1073,9 +1112,9 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
continue;
}
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum,
- su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio, offset);
for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) {
if (!nilfs_segment_usage_clean(su))
continue;
@@ -1103,7 +1142,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
}
if (nblocks >= minlen) {
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
@@ -1115,16 +1154,17 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
}
ndiscarded += nblocks;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(
+ sufile, segnum, su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio,
+ offset);
}
/* start new extent */
start = seg_start;
nblocks = seg_end - seg_start + 1;
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
put_bh(su_bh);
}
@@ -1161,6 +1201,8 @@ out_sem:
* @susize: size of a segment usage entry
* @raw_inode: on-disk sufile inode
* @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_inode *raw_inode, struct inode **inodep)
@@ -1169,7 +1211,6 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_sufile_info *sui;
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
- void *kaddr;
int err;
if (susize > sb->s_blocksize) {
@@ -1185,7 +1226,7 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO);
if (unlikely(!sufile))
return -ENOMEM;
- if (!(sufile->i_state & I_NEW))
+ if (!(inode_state_read_once(sufile) & I_NEW))
goto out;
err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui));
@@ -1199,15 +1240,20 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
if (err)
goto failed;
- err = nilfs_sufile_get_header_block(sufile, &header_bh);
- if (err)
+ err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh);
+ if (unlikely(err)) {
+ if (err == -ENOENT) {
+ nilfs_err(sb,
+ "missing header block in segment usage metadata");
+ err = -EINVAL;
+ }
goto failed;
+ }
sui = NILFS_SUI(sufile);
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(header_bh);
sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 8e8a1a5a0402..cd6f28ab3521 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -58,6 +58,8 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range);
* nilfs_sufile_scrap - make a segment garbage
* @sufile: inode of segment usage file
* @segnum: segment number to be freed
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
{
@@ -68,6 +70,8 @@ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
* nilfs_sufile_free - free segment
* @sufile: inode of segment usage file
* @segnum: segment number to be freed
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
{
@@ -80,6 +84,8 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
* @segnumv: array of segment numbers
* @nsegs: size of @segnumv array
* @ndone: place to store the number of freed segments
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv,
size_t nsegs, size_t *ndone)
@@ -95,8 +101,7 @@ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv,
* @nsegs: size of @segnumv array
* @ndone: place to store the number of cancelled segments
*
- * Return Value: On success, 0 is returned. On error, a negative error codes
- * is returned.
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_cancel_freev(struct inode *sufile,
__u64 *segnumv, size_t nsegs,
@@ -114,14 +119,11 @@ static inline int nilfs_sufile_cancel_freev(struct inode *sufile,
* Description: nilfs_sufile_set_error() marks the segment specified by
* @segnum as erroneous. The error segment will never be used again.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid segment usage number.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid segment usage number.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
{
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6edb6e0dd61f..badc2cbc895e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -29,12 +29,13 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
-#include <linux/parser.h>
#include <linux/crc32.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "nilfs.h"
#include "export.h"
#include "mdt.h"
@@ -60,7 +61,6 @@ struct kmem_cache *nilfs_segbuf_cachep;
struct kmem_cache *nilfs_btree_path_cache;
static int nilfs_setup_super(struct super_block *sb, int is_mount);
-static int nilfs_remount(struct super_block *sb, int *flags, char *data);
void __nilfs_msg(struct super_block *sb, const char *fmt, ...)
{
@@ -105,6 +105,10 @@ static void nilfs_set_error(struct super_block *sb)
/**
* __nilfs_error() - report failure condition on a filesystem
+ * @sb: super block instance
+ * @function: name of calling function
+ * @fmt: format string for message to be output
+ * @...: optional arguments to @fmt
*
* __nilfs_error() sets an ERROR_FS flag on the superblock as well as
* reporting an error message. This function should be called when
@@ -156,6 +160,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
return NULL;
ii->i_bh = NULL;
ii->i_state = 0;
+ ii->i_type = 0;
ii->i_cno = 0;
ii->i_assoc_inode = NULL;
ii->i_bmap = &ii->i_bmap_data;
@@ -304,6 +309,8 @@ int nilfs_commit_super(struct super_block *sb, int flag)
* This function restores state flags in the on-disk super block.
* This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
* filesystem was not clean previously.
+ *
+ * Return: 0 on success, %-EIO if I/O error or superblock is corrupted.
*/
int nilfs_cleanup_super(struct super_block *sb)
{
@@ -334,6 +341,8 @@ int nilfs_cleanup_super(struct super_block *sb)
* nilfs_move_2nd_super - relocate secondary super block
* @sb: super block instance
* @sb2off: new offset of the secondary super block (in bytes)
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
{
@@ -372,10 +381,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
goto out;
}
nsbp = (void *)nsbh->b_data + offset;
- memset(nsbp, 0, nilfs->ns_blocksize);
+ lock_buffer(nsbh);
if (sb2i >= 0) {
+ /*
+ * The position of the second superblock only changes by 4KiB,
+ * which is larger than the maximum superblock data size
+ * (= 1KiB), so there is no need to use memmove() to allow
+ * overlap between source and destination.
+ */
memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+
+ /*
+ * Zero fill after copy to avoid overwriting in case of move
+ * within the same block.
+ */
+ memset(nsbh->b_data, 0, offset);
+ memset((void *)nsbp + nilfs->ns_sbsize, 0,
+ nsbh->b_size - offset - nilfs->ns_sbsize);
+ } else {
+ memset(nsbh->b_data, 0, nsbh->b_size);
+ }
+ set_buffer_uptodate(nsbh);
+ unlock_buffer(nsbh);
+
+ if (sb2i >= 0) {
brelse(nilfs->ns_sbh[sb2i]);
nilfs->ns_sbh[sb2i] = nsbh;
nilfs->ns_sbp[sb2i] = nsbp;
@@ -394,6 +424,8 @@ out:
* nilfs_resize_fs - resize the filesystem
* @sb: super block instance
* @newsize: new size of the filesystem (in bytes)
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
{
@@ -409,6 +441,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
goto out;
/*
+ * Prevent underflow in second superblock position calculation.
+ * The exact minimum size check is done in nilfs_sufile_resize().
+ */
+ if (newsize < 4096) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /*
* Write lock is required to protect some functions depending
* on the number of segments, the number of reserved segments,
* and so forth.
@@ -417,7 +458,7 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
newnsegs = sb2off >> nilfs->ns_blocksize_bits;
- do_div(newnsegs, nilfs->ns_blocks_per_segment);
+ newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment);
ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
up_write(&nilfs->ns_segctor_sem);
@@ -473,6 +514,7 @@ static void nilfs_put_super(struct super_block *sb)
up_write(&nilfs->ns_sem);
}
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
@@ -512,8 +554,6 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
{
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_root *root;
- struct nilfs_checkpoint *raw_cp;
- struct buffer_head *bh_cp;
int err = -ENOMEM;
root = nilfs_find_or_create_root(
@@ -525,38 +565,19 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
goto reuse; /* already attached checkpoint */
down_read(&nilfs->ns_segctor_sem);
- err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
- &bh_cp);
+ err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size);
up_read(&nilfs->ns_segctor_sem);
- if (unlikely(err)) {
- if (err == -ENOENT || err == -EINVAL) {
- nilfs_err(sb,
- "Invalid checkpoint (checkpoint number=%llu)",
- (unsigned long long)cno);
- err = -EINVAL;
- }
+ if (unlikely(err))
goto failed;
- }
-
- err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
- &raw_cp->cp_ifile_inode, &root->ifile);
- if (err)
- goto failed_bh;
-
- atomic64_set(&root->inodes_count,
- le64_to_cpu(raw_cp->cp_inodes_count));
- atomic64_set(&root->blocks_count,
- le64_to_cpu(raw_cp->cp_blocks_count));
-
- nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
reuse:
*rootp = root;
return 0;
- failed_bh:
- nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
failed:
+ if (err == -EINVAL)
+ nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)",
+ (unsigned long long)cno);
nilfs_put_root(root);
return err;
@@ -691,105 +712,98 @@ static const struct super_operations nilfs_sops = {
.freeze_fs = nilfs_freeze,
.unfreeze_fs = nilfs_unfreeze,
.statfs = nilfs_statfs,
- .remount_fs = nilfs_remount,
.show_options = nilfs_show_options
};
enum {
- Opt_err_cont, Opt_err_panic, Opt_err_ro,
- Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
- Opt_discard, Opt_nodiscard, Opt_err,
+ Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery,
+ Opt_discard,
};
-static match_table_t tokens = {
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_snapshot, "cp=%u"},
- {Opt_order, "order=%s"},
- {Opt_norecovery, "norecovery"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_err, NULL}
+static const struct constant_table nilfs_param_err[] = {
+ {"continue", NILFS_MOUNT_ERRORS_CONT},
+ {"panic", NILFS_MOUNT_ERRORS_PANIC},
+ {"remount-ro", NILFS_MOUNT_ERRORS_RO},
+ {}
};
-static int parse_options(char *options, struct super_block *sb, int is_remount)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
+static const struct fs_parameter_spec nilfs_param_spec[] = {
+ fsparam_enum ("errors", Opt_err, nilfs_param_err),
+ fsparam_flag_no ("barrier", Opt_barrier),
+ fsparam_u64 ("cp", Opt_snapshot),
+ fsparam_string ("order", Opt_order),
+ fsparam_flag ("norecovery", Opt_norecovery),
+ fsparam_flag_no ("discard", Opt_discard),
+ {}
+};
- if (!*p)
- continue;
+struct nilfs_fs_context {
+ unsigned long ns_mount_opt;
+ __u64 cno;
+};
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_barrier:
- nilfs_set_opt(nilfs, BARRIER);
- break;
- case Opt_nobarrier:
+static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct nilfs_fs_context *nilfs = fc->fs_private;
+ int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, nilfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_barrier:
+ if (result.negated)
nilfs_clear_opt(nilfs, BARRIER);
- break;
- case Opt_order:
- if (strcmp(args[0].from, "relaxed") == 0)
- /* Ordered data semantics */
- nilfs_clear_opt(nilfs, STRICT_ORDER);
- else if (strcmp(args[0].from, "strict") == 0)
- /* Strict in-order semantics */
- nilfs_set_opt(nilfs, STRICT_ORDER);
- else
- return 0;
- break;
- case Opt_err_panic:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
- break;
- case Opt_err_ro:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
- break;
- case Opt_err_cont:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
- break;
- case Opt_snapshot:
- if (is_remount) {
- nilfs_err(sb,
- "\"%s\" option is invalid for remount",
- p);
- return 0;
- }
- break;
- case Opt_norecovery:
- nilfs_set_opt(nilfs, NORECOVERY);
- break;
- case Opt_discard:
- nilfs_set_opt(nilfs, DISCARD);
- break;
- case Opt_nodiscard:
- nilfs_clear_opt(nilfs, DISCARD);
- break;
- default:
- nilfs_err(sb, "unrecognized mount option \"%s\"", p);
- return 0;
+ else
+ nilfs_set_opt(nilfs, BARRIER);
+ break;
+ case Opt_order:
+ if (strcmp(param->string, "relaxed") == 0)
+ /* Ordered data semantics */
+ nilfs_clear_opt(nilfs, STRICT_ORDER);
+ else if (strcmp(param->string, "strict") == 0)
+ /* Strict in-order semantics */
+ nilfs_set_opt(nilfs, STRICT_ORDER);
+ else
+ return -EINVAL;
+ break;
+ case Opt_err:
+ nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE;
+ nilfs->ns_mount_opt |= result.uint_32;
+ break;
+ case Opt_snapshot:
+ if (is_remount) {
+ struct super_block *sb = fc->root->d_sb;
+
+ nilfs_err(sb,
+ "\"%s\" option is invalid for remount",
+ param->key);
+ return -EINVAL;
}
+ if (result.uint_64 == 0) {
+ nilfs_err(NULL,
+ "invalid option \"cp=0\": invalid checkpoint number 0");
+ return -EINVAL;
+ }
+ nilfs->cno = result.uint_64;
+ break;
+ case Opt_norecovery:
+ nilfs_set_opt(nilfs, NORECOVERY);
+ break;
+ case Opt_discard:
+ if (result.negated)
+ nilfs_clear_opt(nilfs, DISCARD);
+ else
+ nilfs_set_opt(nilfs, DISCARD);
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
-}
-static inline void
-nilfs_set_default_options(struct super_block *sb,
- struct nilfs_super_block *sbp)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
-
- nilfs->ns_mount_opt =
- NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
+ return 0;
}
static int nilfs_setup_super(struct super_block *sb, int is_mount)
@@ -846,9 +860,8 @@ struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset);
}
-int nilfs_store_magic_and_option(struct super_block *sb,
- struct nilfs_super_block *sbp,
- char *data)
+int nilfs_store_magic(struct super_block *sb,
+ struct nilfs_super_block *sbp)
{
struct the_nilfs *nilfs = sb->s_fs_info;
@@ -859,14 +872,12 @@ int nilfs_store_magic_and_option(struct super_block *sb,
sb->s_flags |= SB_NOATIME;
#endif
- nilfs_set_default_options(sb, sbp);
-
nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
- return !parse_options(data, sb, 0) ? -EINVAL : 0;
+ return 0;
}
int nilfs_check_feature_compatibility(struct super_block *sb,
@@ -982,7 +993,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
* nilfs_tree_is_busy() - try to shrink dentries of a checkpoint
* @root_dentry: root dentry of the tree to be shrunk
*
- * This function returns true if the tree was in-use.
+ * Return: true if the tree was in-use, false otherwise.
*/
static bool nilfs_tree_is_busy(struct dentry *root_dentry)
{
@@ -1024,17 +1035,19 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
/**
* nilfs_fill_super() - initialize a super block instance
* @sb: super_block
- * @data: mount options
- * @silent: silent mode flag
+ * @fc: filesystem context
*
* This function is called exclusively by nilfs->ns_mount_mutex.
* So, the recovery process is protected from other simultaneous mounts.
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int
-nilfs_fill_super(struct super_block *sb, void *data, int silent)
+nilfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct the_nilfs *nilfs;
struct nilfs_root *fsroot;
+ struct nilfs_fs_context *ctx = fc->fs_private;
__u64 cno;
int err;
@@ -1044,10 +1057,13 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = nilfs;
- err = init_nilfs(nilfs, sb, (char *)data);
+ err = init_nilfs(nilfs, sb);
if (err)
goto failed_nilfs;
+ /* Copy in parsed mount options */
+ nilfs->ns_mount_opt = ctx->ns_mount_opt;
+
sb->s_op = &nilfs_sops;
sb->s_export_op = &nilfs_export_ops;
sb->s_root = NULL;
@@ -1060,6 +1076,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto failed_nilfs;
+ super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid,
+ sizeof(nilfs->ns_sbp[0]->s_uuid));
+ super_set_sysfs_name_bdev(sb);
+
cno = nilfs_last_cno(nilfs);
err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
if (err) {
@@ -1096,6 +1116,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
nilfs_put_root(fsroot);
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
@@ -1105,34 +1126,25 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
return err;
}
-static int nilfs_remount(struct super_block *sb, int *flags, char *data)
+static int nilfs_reconfigure(struct fs_context *fc)
{
+ struct nilfs_fs_context *ctx = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
struct the_nilfs *nilfs = sb->s_fs_info;
- unsigned long old_sb_flags;
- unsigned long old_mount_opt;
int err;
sync_filesystem(sb);
- old_sb_flags = sb->s_flags;
- old_mount_opt = nilfs->ns_mount_opt;
-
- if (!parse_options(data, sb, 1)) {
- err = -EINVAL;
- goto restore_opts;
- }
- sb->s_flags = (sb->s_flags & ~SB_POSIXACL);
err = -EINVAL;
if (!nilfs_valid_fs(nilfs)) {
nilfs_warn(sb,
"couldn't remount because the filesystem is in an incomplete recovery state");
- goto restore_opts;
+ goto ignore_opts;
}
-
- if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
+ if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
- if (*flags & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
sb->s_flags |= SB_RDONLY;
/*
@@ -1160,166 +1172,67 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
"couldn't remount RDWR because of unsupported optional features (%llx)",
(unsigned long long)features);
err = -EROFS;
- goto restore_opts;
+ goto ignore_opts;
}
sb->s_flags &= ~SB_RDONLY;
root = NILFS_I(d_inode(sb->s_root))->i_root;
err = nilfs_attach_log_writer(sb, root);
- if (err)
- goto restore_opts;
+ if (err) {
+ sb->s_flags |= SB_RDONLY;
+ goto ignore_opts;
+ }
down_write(&nilfs->ns_sem);
nilfs_setup_super(sb, true);
up_write(&nilfs->ns_sem);
}
out:
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL);
+ /* Copy over parsed remount options */
+ nilfs->ns_mount_opt = ctx->ns_mount_opt;
+
return 0;
- restore_opts:
- sb->s_flags = old_sb_flags;
- nilfs->ns_mount_opt = old_mount_opt;
+ ignore_opts:
return err;
}
-struct nilfs_super_data {
- struct block_device *bdev;
- __u64 cno;
- int flags;
-};
-
-static int nilfs_parse_snapshot_option(const char *option,
- const substring_t *arg,
- struct nilfs_super_data *sd)
+static int
+nilfs_get_tree(struct fs_context *fc)
{
- unsigned long long val;
- const char *msg = NULL;
+ struct nilfs_fs_context *ctx = fc->fs_private;
+ struct super_block *s;
+ dev_t dev;
int err;
- if (!(sd->flags & SB_RDONLY)) {
- msg = "read-only option is not specified";
- goto parse_error;
- }
-
- err = kstrtoull(arg->from, 0, &val);
- if (err) {
- if (err == -ERANGE)
- msg = "too large checkpoint number";
- else
- msg = "malformed argument";
- goto parse_error;
- } else if (val == 0) {
- msg = "invalid checkpoint number 0";
- goto parse_error;
+ if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) {
+ nilfs_err(NULL,
+ "invalid option \"cp=%llu\": read-only option is not specified",
+ ctx->cno);
+ return -EINVAL;
}
- sd->cno = val;
- return 0;
-
-parse_error:
- nilfs_err(NULL, "invalid option \"%s\": %s", option, msg);
- return 1;
-}
-
-/**
- * nilfs_identify - pre-read mount options needed to identify mount instance
- * @data: mount options
- * @sd: nilfs_super_data
- */
-static int nilfs_identify(char *data, struct nilfs_super_data *sd)
-{
- char *p, *options = data;
- substring_t args[MAX_OPT_ARGS];
- int token;
- int ret = 0;
-
- do {
- p = strsep(&options, ",");
- if (p != NULL && *p) {
- token = match_token(p, tokens, args);
- if (token == Opt_snapshot)
- ret = nilfs_parse_snapshot_option(p, &args[0],
- sd);
- }
- if (!options)
- break;
- BUG_ON(options == data);
- *(options - 1) = ',';
- } while (!ret);
- return ret;
-}
-
-static int nilfs_set_bdev_super(struct super_block *s, void *data)
-{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
- return 0;
-}
-
-static int nilfs_test_bdev_super(struct super_block *s, void *data)
-{
- return (void *)s->s_bdev == data;
-}
-
-static struct dentry *
-nilfs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- struct nilfs_super_data sd;
- struct super_block *s;
- fmode_t mode = FMODE_READ | FMODE_EXCL;
- struct dentry *root_dentry;
- int err, s_new = false;
-
- if (!(flags & SB_RDONLY))
- mode |= FMODE_WRITE;
- sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
- if (IS_ERR(sd.bdev))
- return ERR_CAST(sd.bdev);
+ err = lookup_bdev(fc->source, &dev);
+ if (err)
+ return err;
- sd.cno = 0;
- sd.flags = flags;
- if (nilfs_identify((char *)data, &sd)) {
- err = -EINVAL;
- goto failed;
- }
-
- /*
- * once the super is inserted into the list by sget, s_umount
- * will protect the lockfs code from trying to start a snapshot
- * while we are mounting
- */
- mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
- if (sd.bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
- err = -EBUSY;
- goto failed;
- }
- s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
- sd.bdev);
- mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s)) {
- err = PTR_ERR(s);
- goto failed;
- }
+ s = sget_dev(fc, dev);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
if (!s->s_root) {
- s_new = true;
-
- /* New superblock instance created */
- s->s_mode = mode;
- snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
- sb_set_blocksize(s, block_size(sd.bdev));
-
- err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+ err = setup_bdev_super(s, fc->sb_flags, fc);
+ if (!err)
+ err = nilfs_fill_super(s, fc);
if (err)
goto failed_super;
s->s_flags |= SB_ACTIVE;
- } else if (!sd.cno) {
+ } else if (!ctx->cno) {
if (nilfs_tree_is_busy(s->s_root)) {
- if ((flags ^ s->s_flags) & SB_RDONLY) {
+ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
nilfs_err(s,
"the device already has a %s mount.",
sb_rdonly(s) ? "read-only" : "read/write");
@@ -1328,43 +1241,75 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
}
} else {
/*
- * Try remount to setup mount states if the current
+ * Try reconfigure to setup mount states if the current
* tree is not mounted and only snapshots use this sb.
+ *
+ * Since nilfs_reconfigure() requires fc->root to be
+ * set, set it first and release it on failure.
*/
- err = nilfs_remount(s, &flags, data);
- if (err)
+ fc->root = dget(s->s_root);
+ err = nilfs_reconfigure(fc);
+ if (err) {
+ dput(fc->root);
+ fc->root = NULL; /* prevent double release */
goto failed_super;
+ }
+ return 0;
}
}
- if (sd.cno) {
- err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
+ if (ctx->cno) {
+ struct dentry *root_dentry;
+
+ err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry);
if (err)
goto failed_super;
- } else {
- root_dentry = dget(s->s_root);
+ fc->root = root_dentry;
+ return 0;
}
- if (!s_new)
- blkdev_put(sd.bdev, mode);
-
- return root_dentry;
+ fc->root = dget(s->s_root);
+ return 0;
failed_super:
deactivate_locked_super(s);
+ return err;
+}
- failed:
- if (!s_new)
- blkdev_put(sd.bdev, mode);
- return ERR_PTR(err);
+static void nilfs_free_fc(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations nilfs_context_ops = {
+ .parse_param = nilfs_parse_param,
+ .get_tree = nilfs_get_tree,
+ .reconfigure = nilfs_reconfigure,
+ .free = nilfs_free_fc,
+};
+
+static int nilfs_init_fs_context(struct fs_context *fc)
+{
+ struct nilfs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
+ fc->fs_private = ctx;
+ fc->ops = &nilfs_context_ops;
+
+ return 0;
}
struct file_system_type nilfs_fs_type = {
.owner = THIS_MODULE,
.name = "nilfs2",
- .mount = nilfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = nilfs_init_fs_context,
+ .parameters = nilfs_param_spec,
};
MODULE_ALIAS_FS("nilfs2");
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 379d22e28ed6..bc52afbfc5c7 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \
sg_##name##_kobj); \
complete(&subgroups->sg_##name##_kobj_unregister); \
} \
-static struct kobj_type nilfs_##name##_ktype = { \
+static const struct kobj_type nilfs_##name##_ktype = { \
.default_groups = nilfs_##name##_groups, \
.sysfs_ops = &nilfs_##name##_attr_ops, \
.release = nilfs_##name##_attr_release, \
@@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = {
.store = nilfs_snapshot_attr_store,
};
-static struct kobj_type nilfs_snapshot_ktype = {
+static const struct kobj_type nilfs_snapshot_ktype = {
.default_groups = nilfs_snapshot_groups,
.sysfs_ops = &nilfs_snapshot_attr_ops,
.release = nilfs_snapshot_attr_release,
@@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
- u32 major = le32_to_cpu(sbp[0]->s_rev_level);
- u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+ struct nilfs_super_block *raw_sb;
+ u32 major;
+ u16 minor;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ major = le32_to_cpu(raw_sb->s_rev_level);
+ minor = le16_to_cpu(raw_sb->s_minor_rev_level);
+ up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%d.%d\n", major, minor);
}
@@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
- u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+ struct nilfs_super_block *raw_sb;
+ u64 dev_size;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ dev_size = le64_to_cpu(raw_sb->s_dev_size);
+ up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%llu\n", dev_size);
}
@@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
- return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid);
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid);
+ up_read(&nilfs->ns_sem);
+
+ return len;
}
static
@@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n",
+ raw_sb->s_volume_name);
+ up_read(&nilfs->ns_sem);
- return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
- sbp[0]->s_volume_name);
+ return len;
}
static const char dev_readme_str[] =
@@ -967,7 +990,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = {
.store = nilfs_dev_attr_store,
};
-static struct kobj_type nilfs_dev_ktype = {
+static const struct kobj_type nilfs_dev_ktype = {
.default_groups = nilfs_dev_groups,
.sysfs_ops = &nilfs_dev_attr_ops,
.release = nilfs_dev_attr_release,
@@ -1052,7 +1075,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
************************************************************************/
static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
+ struct kobj_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%d.%d\n",
NILFS_CURRENT_REV, NILFS_MINOR_REV);
@@ -1064,7 +1087,7 @@ static const char features_readme_str[] =
"(1) revision\n\tshow current revision of NILFS file system driver.\n";
static ssize_t nilfs_feature_README_show(struct kobject *kobj,
- struct attribute *attr,
+ struct kobj_attribute *attr,
char *buf)
{
return sysfs_emit(buf, features_readme_str);
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
index 78a87a016928..d370cd5cce3f 100644
--- a/fs/nilfs2/sysfs.h
+++ b/fs/nilfs2/sysfs.h
@@ -50,16 +50,16 @@ struct nilfs_sysfs_dev_subgroups {
struct completion sg_segments_kobj_unregister;
};
-#define NILFS_COMMON_ATTR_STRUCT(name) \
+#define NILFS_KOBJ_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
struct attribute attr; \
- ssize_t (*show)(struct kobject *, struct attribute *, \
+ ssize_t (*show)(struct kobject *, struct kobj_attribute *, \
char *); \
- ssize_t (*store)(struct kobject *, struct attribute *, \
+ ssize_t (*store)(struct kobject *, struct kobj_attribute *, \
const char *, size_t); \
}
-NILFS_COMMON_ATTR_STRUCT(feature);
+NILFS_KOBJ_ATTR_STRUCT(feature);
#define NILFS_DEV_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 2064e6473d30..d0bcf744c553 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -12,7 +12,6 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
-#include <linux/random.h>
#include <linux/log2.h>
#include <linux/crc32.h>
#include "nilfs.h"
@@ -50,8 +49,8 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs,
* alloc_nilfs - allocate a nilfs object
* @sb: super block instance
*
- * Return Value: On success, pointer to the_nilfs is returned.
- * On error, NULL is returned.
+ * Return: a pointer to the allocated nilfs object on success, or NULL on
+ * failure.
*/
struct the_nilfs *alloc_nilfs(struct super_block *sb)
{
@@ -69,7 +68,6 @@ struct the_nilfs *alloc_nilfs(struct super_block *sb)
INIT_LIST_HEAD(&nilfs->ns_dirty_files);
INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
spin_lock_init(&nilfs->ns_inode_lock);
- spin_lock_init(&nilfs->ns_next_gen_lock);
spin_lock_init(&nilfs->ns_last_segment_lock);
nilfs->ns_cptree = RB_ROOT;
spin_lock_init(&nilfs->ns_cptree_lock);
@@ -87,7 +85,6 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
- nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@@ -168,6 +165,9 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri)
* containing a super root from a given super block, and initializes
* relevant information on the nilfs object preparatory for log
* scanning and recovery.
+ *
+ * Return: 0 on success, or %-EINVAL if current segment number is out
+ * of range.
*/
static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
@@ -203,8 +203,7 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
* exponent information written in @sbp and stores it in @blocksize,
* or aborts with an error message if it's too large.
*
- * Return Value: On success, 0 is returned. If the block size is too
- * large, -EINVAL is returned.
+ * Return: 0 on success, or %-EINVAL if the block size is too large.
*/
static int nilfs_get_blocksize(struct super_block *sb,
struct nilfs_super_block *sbp, int *blocksize)
@@ -229,6 +228,13 @@ static int nilfs_get_blocksize(struct super_block *sb,
* load_nilfs() searches and load the latest super root,
* attaches the last segment, and does recovery if needed.
* The caller must call this exclusively for simultaneous mounts.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - No valid segment found.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EROFS - Read only device or RO compat mode (if recovery is required)
*/
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
{
@@ -305,6 +311,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
}
+ err = nilfs_sysfs_create_device_group(sb);
+ if (unlikely(err))
+ goto sysfs_error;
+
if (valid_fs)
goto skip_recovery;
@@ -366,6 +376,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
+
+ sysfs_error:
iput(nilfs->ns_cpfile);
iput(nilfs->ns_sufile);
iput(nilfs->ns_dat);
@@ -391,6 +404,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
* nilfs_nrsvsegs - calculate the number of reserved segments
* @nilfs: nilfs object
* @nsegs: total number of segments
+ *
+ * Return: Number of reserved segments.
*/
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
{
@@ -399,6 +414,20 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
100));
}
+/**
+ * nilfs_max_segment_count - calculate the maximum number of segments
+ * @nilfs: nilfs object
+ *
+ * Return: Maximum number of segments
+ */
+static u64 nilfs_max_segment_count(struct the_nilfs *nilfs)
+{
+ u64 max_count = U64_MAX;
+
+ max_count = div64_ul(max_count, nilfs->ns_blocks_per_segment);
+ return min_t(u64, max_count, ULONG_MAX);
+}
+
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
{
nilfs->ns_nsegments = nsegs;
@@ -408,6 +437,8 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
+ u64 nsegments, nblocks;
+
if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) {
nilfs_err(nilfs->ns_sb,
"unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).",
@@ -432,6 +463,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
}
nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);
+ if (nilfs->ns_first_ino < NILFS_USER_INO) {
+ nilfs_err(nilfs->ns_sb,
+ "too small lower limit for non-reserved inode numbers: %u",
+ nilfs->ns_first_ino);
+ return -EINVAL;
+ }
nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) {
@@ -451,7 +488,34 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
return -EINVAL;
}
- nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
+ nsegments = le64_to_cpu(sbp->s_nsegments);
+ if (nsegments > nilfs_max_segment_count(nilfs)) {
+ nilfs_err(nilfs->ns_sb,
+ "segment count %llu exceeds upper limit (%llu segments)",
+ (unsigned long long)nsegments,
+ (unsigned long long)nilfs_max_segment_count(nilfs));
+ return -EINVAL;
+ }
+
+ nblocks = sb_bdev_nr_blocks(nilfs->ns_sb);
+ if (nblocks) {
+ u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment;
+ /*
+ * To avoid failing to mount early device images without a
+ * second superblock, exclude that block count from the
+ * "min_block_count" calculation.
+ */
+
+ if (nblocks < min_block_count) {
+ nilfs_err(nilfs->ns_sb,
+ "total number of segment blocks %llu exceeds device size (%llu blocks)",
+ (unsigned long long)min_block_count,
+ (unsigned long long)nblocks);
+ return -EINVAL;
+ }
+ }
+
+ nilfs_set_nsegments(nilfs, nsegments);
nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
return 0;
}
@@ -487,7 +551,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
* area, or if the parameters themselves are not normal, it is
* determined to be invalid.
*
- * Return Value: true if invalid, false if valid.
+ * Return: true if invalid, false if valid.
*/
static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
{
@@ -544,8 +608,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct buffer_head **sbh = nilfs->ns_sbh;
- u64 sb2off = NILFS_SB2_OFFSET_BYTES(bdev_nr_bytes(nilfs->ns_bdev));
- int valid[2], swp = 0;
+ u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev);
+ int valid[2], swp = 0, older;
+
+ if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) {
+ nilfs_err(sb, "device size too small");
+ return -EINVAL;
+ }
+ sb2off = NILFS_SB2_OFFSET_BYTES(devsize);
sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
&sbh[0]);
@@ -595,9 +665,25 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
if (swp)
nilfs_swap_super_block(nilfs);
+ /*
+ * Calculate the array index of the older superblock data.
+ * If one has been dropped, set index 0 pointing to the remaining one,
+ * otherwise set index 1 pointing to the old one (including if both
+ * are the same).
+ *
+ * Divided case valid[0] valid[1] swp -> older
+ * -------------------------------------------------------------
+ * Both SBs are invalid 0 0 N/A (Error)
+ * SB1 is invalid 0 1 1 0
+ * SB2 is invalid 1 0 0 0
+ * SB2 is newer 1 1 1 0
+ * SB2 is older or the same 1 1 0 1
+ */
+ older = valid[1] ^ swp;
+
nilfs->ns_sbwcount = 0;
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
- nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq);
+ nilfs->ns_prot_seq = le64_to_cpu(sbp[older]->s_last_seq);
*sbpp = sbp[0];
return 0;
}
@@ -606,23 +692,19 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
* init_nilfs - initialize a NILFS instance.
* @nilfs: the_nilfs structure
* @sb: super block
- * @data: mount options
*
* init_nilfs() performs common initialization per block device (e.g.
* reading the super block, getting disk layout information, initializing
* shared fields in the_nilfs).
*
- * Return Value: On success, 0 is returned. On error, a negative error
- * code is returned.
+ * Return: 0 on success, or a negative error code on failure.
*/
-int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
+int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
{
struct nilfs_super_block *sbp;
int blocksize;
int err;
- down_write(&nilfs->ns_sem);
-
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
if (!blocksize) {
nilfs_err(sb, "unable to set blocksize");
@@ -633,7 +715,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto out;
- err = nilfs_store_magic_and_option(sb, sbp, data);
+ err = nilfs_store_magic(sb, sbp);
if (err)
goto failed_sbh;
@@ -663,7 +745,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
goto failed_sbh;
}
nilfs_release_super_block(nilfs);
- sb_set_blocksize(sb, blocksize);
+ if (!sb_set_blocksize(sb, blocksize)) {
+ nilfs_err(sb, "bad blocksize %d", blocksize);
+ err = -EINVAL;
+ goto out;
+ }
err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
if (err)
@@ -676,9 +762,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
nilfs->ns_blocksize = blocksize;
- get_random_bytes(&nilfs->ns_next_generation,
- sizeof(nilfs->ns_next_generation));
-
err = nilfs_store_disk_layout(nilfs, sbp);
if (err)
goto failed_sbh;
@@ -691,14 +774,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
- err = nilfs_sysfs_create_device_group(sb);
- if (err)
- goto failed_sbh;
-
set_nilfs_init(nilfs);
err = 0;
out:
- up_write(&nilfs->ns_sem);
return err;
failed_sbh:
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 47c7dfbb7ea5..4776a70f01ae 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -29,6 +29,7 @@ enum {
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
+ THE_NILFS_PURGING, /* disposing dirty files for cleanup */
};
/**
@@ -70,8 +71,6 @@ enum {
* @ns_dirty_files: list of dirty files
* @ns_inode_lock: lock protecting @ns_dirty_files
* @ns_gc_inodes: dummy inodes to keep live blocks
- * @ns_next_generation: next generation number for inodes
- * @ns_next_gen_lock: lock protecting @ns_next_generation
* @ns_mount_opt: mount options
* @ns_resuid: uid for reserved blocks
* @ns_resgid: gid for reserved blocks
@@ -160,10 +159,6 @@ struct the_nilfs {
/* GC inode list */
struct list_head ns_gc_inodes;
- /* Inode allocator */
- u32 ns_next_generation;
- spinlock_t ns_next_gen_lock;
-
/* Mount options */
unsigned long ns_mount_opt;
@@ -181,7 +176,7 @@ struct the_nilfs {
unsigned long ns_nrsvsegs;
unsigned long ns_first_data_block;
int ns_inode_size;
- int ns_first_ino;
+ unsigned int ns_first_ino;
u32 ns_crc_seed;
/* /sys/fs/<nilfs>/<device> */
@@ -208,6 +203,7 @@ THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
/*
* Mount option operations
@@ -217,10 +213,6 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
#define nilfs_set_opt(nilfs, opt) \
((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt)
#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
-#define nilfs_write_opt(nilfs, mask, opt) \
- ((nilfs)->ns_mount_opt = \
- (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \
- NILFS_MOUNT_##opt)) \
/**
* struct nilfs_root - nilfs root object
@@ -274,7 +266,7 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
struct the_nilfs *alloc_nilfs(struct super_block *sb);
void destroy_nilfs(struct the_nilfs *nilfs);
-int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
+int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);