summaryrefslogtreecommitdiff
path: root/fs/nilfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nilfs2')
-rw-r--r--fs/nilfs2/Kconfig3
-rw-r--r--fs/nilfs2/Makefile3
-rw-r--r--fs/nilfs2/alloc.c608
-rw-r--r--fs/nilfs2/alloc.h37
-rw-r--r--fs/nilfs2/bmap.c214
-rw-r--r--fs/nilfs2/bmap.h68
-rw-r--r--fs/nilfs2/btnode.c255
-rw-r--r--fs/nilfs2/btnode.h26
-rw-r--r--fs/nilfs2/btree.c327
-rw-r--r--fs/nilfs2/btree.h22
-rw-r--r--fs/nilfs2/cpfile.c815
-rw-r--r--fs/nilfs2/cpfile.h36
-rw-r--r--fs/nilfs2/dat.c251
-rw-r--r--fs/nilfs2/dat.h22
-rw-r--r--fs/nilfs2/dir.c505
-rw-r--r--fs/nilfs2/direct.c72
-rw-r--r--fs/nilfs2/direct.h29
-rw-r--r--fs/nilfs2/export.h3
-rw-r--r--fs/nilfs2/file.c100
-rw-r--r--fs/nilfs2/gcinode.c84
-rw-r--r--fs/nilfs2/ifile.c97
-rw-r--r--fs/nilfs2/ifile.h35
-rw-r--r--fs/nilfs2/inode.c678
-rw-r--r--fs/nilfs2/ioctl.c758
-rw-r--r--fs/nilfs2/mdt.c382
-rw-r--r--fs/nilfs2/mdt.h52
-rw-r--r--fs/nilfs2/namei.c232
-rw-r--r--fs/nilfs2/nilfs.h216
-rw-r--r--fs/nilfs2/page.c522
-rw-r--r--fs/nilfs2/page.h50
-rw-r--r--fs/nilfs2/recovery.c260
-rw-r--r--fs/nilfs2/segbuf.c154
-rw-r--r--fs/nilfs2/segbuf.h26
-rw-r--r--fs/nilfs2/segment.c1213
-rw-r--r--fs/nilfs2/segment.h78
-rw-r--r--fs/nilfs2/sufile.c714
-rw-r--r--fs/nilfs2/sufile.h48
-rw-r--r--fs/nilfs2/super.c805
-rw-r--r--fs/nilfs2/sysfs.c1140
-rw-r--r--fs/nilfs2/sysfs.h167
-rw-r--r--fs/nilfs2/the_nilfs.c398
-rw-r--r--fs/nilfs2/the_nilfs.h116
42 files changed, 7379 insertions, 4242 deletions
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 80da8eb27393..7dae168e346e 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
config NILFS2_FS
tristate "NILFS2 file system support"
+ select BUFFER_HEAD
select CRC32
+ select LEGACY_DIRECT_IO
help
NILFS2 is a log-structured file system (LFS) supporting continuous
snapshotting. In addition to versioning capability of the entire
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
index 85c98737a146..43b60b8a4d07 100644
--- a/fs/nilfs2/Makefile
+++ b/fs/nilfs2/Makefile
@@ -1,5 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_NILFS2_FS) += nilfs2.o
nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
btnode.o bmap.o btree.o direct.o dat.o recovery.o \
the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
- ifile.o alloc.o gcinode.o ioctl.o
+ ifile.o alloc.o gcinode.o ioctl.o sysfs.o
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 741fd02e0444..6b506995818d 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -1,25 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * alloc.c - NILFS dat/inode allocator
+ * NILFS dat/inode allocator
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Original code was written by Koji Sato <koji@osrg.net>.
- * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
- * Amagai Yoshiji <amagai@osrg.net>.
+ * Originally written by Koji Sato.
+ * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji.
*/
#include <linux/types.h>
@@ -35,17 +21,21 @@
* nilfs_palloc_groups_per_desc_block - get the number of groups that a group
* descriptor block can maintain
* @inode: inode of metadata file using this allocator
+ *
+ * Return: Number of groups that a group descriptor block can maintain.
*/
static inline unsigned long
nilfs_palloc_groups_per_desc_block(const struct inode *inode)
{
- return (1UL << inode->i_blkbits) /
+ return i_blocksize(inode) /
sizeof(struct nilfs_palloc_group_desc);
}
/**
* nilfs_palloc_groups_count - get maximum number of groups
* @inode: inode of metadata file using this allocator
+ *
+ * Return: Maximum number of groups.
*/
static inline unsigned long
nilfs_palloc_groups_count(const struct inode *inode)
@@ -57,8 +47,10 @@ nilfs_palloc_groups_count(const struct inode *inode)
* nilfs_palloc_init_blockgroup - initialize private variables for allocator
* @inode: inode of metadata file using this allocator
* @entry_size: size of the persistent object
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
-int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
+int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
@@ -73,13 +65,17 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
mi->mi_blocks_per_group =
DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode),
mi->mi_entries_per_block) + 1;
- /* Number of blocks in a group including entry blocks and
- a bitmap block */
+ /*
+ * Number of blocks in a group including entry blocks
+ * and a bitmap block
+ */
mi->mi_blocks_per_desc_block =
nilfs_palloc_groups_per_desc_block(inode) *
mi->mi_blocks_per_group + 1;
- /* Number of blocks per descriptor including the
- descriptor block */
+ /*
+ * Number of blocks per descriptor including the
+ * descriptor block
+ */
return 0;
}
@@ -88,6 +84,9 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
* @offset: pointer to store offset number in the group
+ *
+ * Return: Number of the group that contains the entry with the index
+ * specified by @nr.
*/
static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
unsigned long *offset)
@@ -103,8 +102,8 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
* @inode: inode of metadata file using this allocator
* @group: group number
*
- * nilfs_palloc_desc_blkoff() returns block offset of the descriptor
- * block which contains a descriptor of the specified group.
+ * Return: Index number in the metadata file of the descriptor block of
+ * the group specified by @group.
*/
static unsigned long
nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
@@ -121,6 +120,9 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
*
* nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap
* block used to allocate/deallocate entries in the specified group.
+ *
+ * Return: Index number in the metadata file of the bitmap block of
+ * the group specified by @group.
*/
static unsigned long
nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
@@ -133,44 +135,52 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
/**
* nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
- * @inode: inode of metadata file using this allocator
- * @group: group number
* @desc: pointer to descriptor structure for the group
+ * @lock: spin lock protecting @desc
+ *
+ * Return: Number of free entries written in the group descriptor @desc.
*/
static unsigned long
-nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
- const struct nilfs_palloc_group_desc *desc)
+nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc,
+ spinlock_t *lock)
{
unsigned long nfree;
- spin_lock(nilfs_mdt_bgl_lock(inode, group));
+ spin_lock(lock);
nfree = le32_to_cpu(desc->pg_nfrees);
- spin_unlock(nilfs_mdt_bgl_lock(inode, group));
+ spin_unlock(lock);
return nfree;
}
/**
* nilfs_palloc_group_desc_add_entries - adjust count of free entries
- * @inode: inode of metadata file using this allocator
- * @group: group number
* @desc: pointer to descriptor structure for the group
+ * @lock: spin lock protecting @desc
* @n: delta to be added
+ *
+ * Return: Number of free entries after adjusting the group descriptor
+ * @desc.
*/
-static void
-nilfs_palloc_group_desc_add_entries(struct inode *inode,
- unsigned long group,
- struct nilfs_palloc_group_desc *desc,
- u32 n)
+static u32
+nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc,
+ spinlock_t *lock, u32 n)
{
- spin_lock(nilfs_mdt_bgl_lock(inode, group));
+ u32 nfree;
+
+ spin_lock(lock);
le32_add_cpu(&desc->pg_nfrees, n);
- spin_unlock(nilfs_mdt_bgl_lock(inode, group));
+ nfree = le32_to_cpu(desc->pg_nfrees);
+ spin_unlock(lock);
+ return nfree;
}
/**
* nilfs_palloc_entry_blkoff - get block offset of an entry block
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
+ *
+ * Return: Index number in the metadata file of the block containing
+ * the entry specified by @nr.
*/
static unsigned long
nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
@@ -187,12 +197,14 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
* nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block
* @inode: inode of metadata file
* @bh: buffer head of the buffer to be initialized
- * @kaddr: kernel address mapped for the page including the buffer
+ * @from: kernel address mapped for a chunk of the block
+ *
+ * This function does not yet support the case where block size > PAGE_SIZE.
*/
static void nilfs_palloc_desc_block_init(struct inode *inode,
- struct buffer_head *bh, void *kaddr)
+ struct buffer_head *bh, void *from)
{
- struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh);
+ struct nilfs_palloc_group_desc *desc = from;
unsigned long n = nilfs_palloc_groups_per_desc_block(inode);
__le32 nfrees;
@@ -215,7 +227,8 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
int ret;
spin_lock(lock);
- if (prev->bh && blkoff == prev->blkoff) {
+ if (prev->bh && blkoff == prev->blkoff &&
+ likely(buffer_uptodate(prev->bh))) {
get_bh(prev->bh);
*bhp = prev->bh;
spin_unlock(lock);
@@ -240,11 +253,39 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
}
/**
+ * nilfs_palloc_delete_block - delete a block on the persistent allocator file
+ * @inode: inode of metadata file using this allocator
+ * @blkoff: block offset
+ * @prev: nilfs_bh_assoc struct of the last used buffer
+ * @lock: spin lock protecting @prev
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Non-existent block.
+ * * %-ENOMEM - Insufficient memory available.
+ */
+static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff,
+ struct nilfs_bh_assoc *prev,
+ spinlock_t *lock)
+{
+ spin_lock(lock);
+ if (prev->bh && blkoff == prev->blkoff) {
+ brelse(prev->bh);
+ prev->bh = NULL;
+ }
+ spin_unlock(lock);
+ return nilfs_mdt_delete_block(inode, blkoff);
+}
+
+/**
* nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
* @inode: inode of metadata file using this allocator
* @group: group number
* @create: create flag
* @bhp: pointer to store the resultant buffer head
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_get_desc_block(struct inode *inode,
unsigned long group,
@@ -264,6 +305,8 @@ static int nilfs_palloc_get_desc_block(struct inode *inode,
* @group: group number
* @create: create flag
* @bhp: pointer to store the resultant buffer head
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_get_bitmap_block(struct inode *inode,
unsigned long group,
@@ -278,11 +321,31 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode,
}
/**
+ * nilfs_palloc_delete_bitmap_block - delete a bitmap block
+ * @inode: inode of metadata file using this allocator
+ * @group: group number
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int nilfs_palloc_delete_bitmap_block(struct inode *inode,
+ unsigned long group)
+{
+ struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+
+ return nilfs_palloc_delete_block(inode,
+ nilfs_palloc_bitmap_blkoff(inode,
+ group),
+ &cache->prev_bitmap, &cache->lock);
+}
+
+/**
* nilfs_palloc_get_entry_block - get buffer head of an entry block
* @inode: inode of metadata file using this allocator
* @nr: serial number of the entry (e.g. inode number)
* @create: create flag
* @bhp: pointer to store the resultant buffer head
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
int create, struct buffer_head **bhp)
@@ -296,87 +359,115 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
}
/**
- * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
+ * nilfs_palloc_delete_entry_block - delete an entry block
+ * @inode: inode of metadata file using this allocator
+ * @nr: serial number of the entry
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr)
+{
+ struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+
+ return nilfs_palloc_delete_block(inode,
+ nilfs_palloc_entry_blkoff(inode, nr),
+ &cache->prev_entry, &cache->lock);
+}
+
+/**
+ * nilfs_palloc_group_desc_offset - calculate the byte offset of a group
+ * descriptor in the folio containing it
* @inode: inode of metadata file using this allocator
* @group: group number
- * @bh: buffer head of the buffer storing the group descriptor block
- * @kaddr: kernel address mapped for the page including the buffer
+ * @bh: buffer head of the group descriptor block
+ *
+ * Return: Byte offset in the folio of the group descriptor for @group.
*/
-static struct nilfs_palloc_group_desc *
-nilfs_palloc_block_get_group_desc(const struct inode *inode,
- unsigned long group,
- const struct buffer_head *bh, void *kaddr)
+static size_t nilfs_palloc_group_desc_offset(const struct inode *inode,
+ unsigned long group,
+ const struct buffer_head *bh)
{
- return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) +
- group % nilfs_palloc_groups_per_desc_block(inode);
+ return offset_in_folio(bh->b_folio, bh->b_data) +
+ sizeof(struct nilfs_palloc_group_desc) *
+ (group % nilfs_palloc_groups_per_desc_block(inode));
}
/**
- * nilfs_palloc_block_get_entry - get kernel address of an entry
+ * nilfs_palloc_bitmap_offset - calculate the byte offset of a bitmap block
+ * in the folio containing it
+ * @bh: buffer head of the bitmap block
+ *
+ * Return: Byte offset in the folio of the bitmap block for @bh.
+ */
+static size_t nilfs_palloc_bitmap_offset(const struct buffer_head *bh)
+{
+ return offset_in_folio(bh->b_folio, bh->b_data);
+}
+
+/**
+ * nilfs_palloc_entry_offset - calculate the byte offset of an entry in the
+ * folio containing it
* @inode: inode of metadata file using this allocator
- * @nr: serial number of the entry (e.g. inode number)
- * @bh: buffer head of the buffer storing the entry block
- * @kaddr: kernel address mapped for the page including the buffer
+ * @nr: serial number of the entry (e.g. inode number)
+ * @bh: buffer head of the entry block
+ *
+ * Return: Byte offset in the folio of the entry @nr.
*/
-void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
- const struct buffer_head *bh, void *kaddr)
+size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr,
+ const struct buffer_head *bh)
{
- unsigned long entry_offset, group_offset;
+ unsigned long entry_index_in_group, entry_index_in_block;
- nilfs_palloc_group(inode, nr, &group_offset);
- entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block;
+ nilfs_palloc_group(inode, nr, &entry_index_in_group);
+ entry_index_in_block = entry_index_in_group %
+ NILFS_MDT(inode)->mi_entries_per_block;
- return kaddr + bh_offset(bh) +
- entry_offset * NILFS_MDT(inode)->mi_entry_size;
+ return offset_in_folio(bh->b_folio, bh->b_data) +
+ entry_index_in_block * NILFS_MDT(inode)->mi_entry_size;
}
/**
* nilfs_palloc_find_available_slot - find available slot in a group
- * @inode: inode of metadata file using this allocator
- * @group: group number
- * @target: offset number of an entry in the group (start point)
* @bitmap: bitmap of the group
+ * @target: offset number of an entry in the group (start point)
* @bsize: size in bits
+ * @lock: spin lock protecting @bitmap
+ * @wrap: whether to wrap around
+ *
+ * Return: Offset number within the group of the found free entry, or
+ * %-ENOSPC if not found.
*/
-static int nilfs_palloc_find_available_slot(struct inode *inode,
- unsigned long group,
+static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
unsigned long target,
- unsigned char *bitmap,
- int bsize)
-{
- int curr, pos, end, i;
-
- if (target > 0) {
- end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1);
- if (end > bsize)
- end = bsize;
- pos = nilfs_find_next_zero_bit(bitmap, end, target);
- if (pos < end &&
- !nilfs_set_bit_atomic(
- nilfs_mdt_bgl_lock(inode, group), pos, bitmap))
- return pos;
- } else
- end = 0;
-
- for (i = 0, curr = end;
- i < bsize;
- i += BITS_PER_LONG, curr += BITS_PER_LONG) {
- /* wrap around */
- if (curr >= bsize)
- curr = 0;
- while (*((unsigned long *)bitmap + curr / BITS_PER_LONG)
- != ~0UL) {
- end = curr + BITS_PER_LONG;
- if (end > bsize)
- end = bsize;
- pos = nilfs_find_next_zero_bit(bitmap, end, curr);
- if ((pos < end) &&
- !nilfs_set_bit_atomic(
- nilfs_mdt_bgl_lock(inode, group), pos,
- bitmap))
+ unsigned int bsize,
+ spinlock_t *lock, bool wrap)
+{
+ int pos, end = bsize;
+
+ if (likely(target < bsize)) {
+ pos = target;
+ do {
+ pos = nilfs_find_next_zero_bit(bitmap, end, pos);
+ if (pos >= end)
+ break;
+ if (!nilfs_set_bit_atomic(lock, pos, bitmap))
return pos;
- }
+ } while (++pos < end);
+
+ end = target;
+ }
+ if (!wrap)
+ return -ENOSPC;
+
+ /* wrap around */
+ for (pos = 0; pos < end; pos++) {
+ pos = nilfs_find_next_zero_bit(bitmap, end, pos);
+ if (pos >= end)
+ break;
+ if (!nilfs_set_bit_atomic(lock, pos, bitmap))
+ return pos;
}
+
return -ENOSPC;
}
@@ -386,6 +477,9 @@ static int nilfs_palloc_find_available_slot(struct inode *inode,
* @inode: inode of metadata file using this allocator
* @curr: current group number
* @max: maximum number of groups
+ *
+ * Return: Number of remaining descriptors (= groups) managed by the descriptor
+ * block.
*/
static unsigned long
nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
@@ -401,17 +495,20 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
* nilfs_palloc_count_desc_blocks - count descriptor blocks number
* @inode: inode of metadata file using this allocator
* @desc_blocks: descriptor blocks number [out]
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_palloc_count_desc_blocks(struct inode *inode,
unsigned long *desc_blocks)
{
- unsigned long blknum;
+ __u64 blknum;
int ret;
ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum);
if (likely(!ret))
*desc_blocks = DIV_ROUND_UP(
- blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block);
+ (unsigned long)blknum,
+ NILFS_MDT(inode)->mi_blocks_per_desc_block);
return ret;
}
@@ -420,6 +517,8 @@ static int nilfs_palloc_count_desc_blocks(struct inode *inode,
* MDT file growing
* @inode: inode of metadata file using this allocator
* @desc_blocks: known current descriptor blocks count
+ *
+ * Return: true if a group can be added in the metadata file, false if not.
*/
static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode,
unsigned long desc_blocks)
@@ -434,6 +533,12 @@ static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode,
* @inode: inode of metadata file using this allocator
* @nused: current number of used entries
* @nmaxp: max number of entries [out]
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ERANGE - Number of entries in use is out of range.
*/
int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
{
@@ -464,28 +569,36 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
* nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
+ * @wrap: whether to wrap around
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - Entries exhausted (No entries available for allocation).
+ * * %-EROFS - Read only filesystem
*/
int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
- struct nilfs_palloc_req *req)
+ struct nilfs_palloc_req *req, bool wrap)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
- void *desc_kaddr, *bitmap_kaddr;
+ size_t doff, boff;
unsigned long group, maxgroup, ngroups;
unsigned long group_offset, maxgroup_offset;
- unsigned long n, entries_per_group, groups_per_desc_block;
+ unsigned long n, entries_per_group;
unsigned long i, j;
+ spinlock_t *lock;
int pos, ret;
ngroups = nilfs_palloc_groups_count(inode);
maxgroup = ngroups - 1;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
entries_per_group = nilfs_palloc_entries_per_group(inode);
- groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode);
for (i = 0; i < ngroups; i += n) {
- if (group >= ngroups) {
+ if (group >= ngroups && wrap) {
/* wrap around */
group = 0;
maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
@@ -494,54 +607,64 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
if (ret < 0)
return ret;
- desc_kaddr = kmap(desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(
- inode, group, desc_bh, desc_kaddr);
+
+ doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh);
+ desc = kmap_local_folio(desc_bh->b_folio, doff);
n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
maxgroup);
- for (j = 0; j < n; j++, desc++, group++) {
- if (nilfs_palloc_group_desc_nfrees(inode, group, desc)
- > 0) {
- ret = nilfs_palloc_get_bitmap_block(
- inode, group, 1, &bitmap_bh);
- if (ret < 0)
- goto out_desc;
- bitmap_kaddr = kmap(bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
- pos = nilfs_palloc_find_available_slot(
- inode, group, group_offset, bitmap,
- entries_per_group);
- if (pos >= 0) {
- /* found a free entry */
- nilfs_palloc_group_desc_add_entries(
- inode, group, desc, -1);
- req->pr_entry_nr =
- entries_per_group * group + pos;
- kunmap(desc_bh->b_page);
- kunmap(bitmap_bh->b_page);
-
- req->pr_desc_bh = desc_bh;
- req->pr_bitmap_bh = bitmap_bh;
- return 0;
- }
- kunmap(bitmap_bh->b_page);
- brelse(bitmap_bh);
+ for (j = 0; j < n; j++, group++, group_offset = 0) {
+ lock = nilfs_mdt_bgl_lock(inode, group);
+ if (nilfs_palloc_group_desc_nfrees(&desc[j], lock) == 0)
+ continue;
+
+ kunmap_local(desc);
+ ret = nilfs_palloc_get_bitmap_block(inode, group, 1,
+ &bitmap_bh);
+ if (unlikely(ret < 0)) {
+ brelse(desc_bh);
+ return ret;
}
- group_offset = 0;
+ /*
+ * Re-kmap the folio containing the first (and
+ * subsequent) group descriptors.
+ */
+ desc = kmap_local_folio(desc_bh->b_folio, doff);
+
+ boff = nilfs_palloc_bitmap_offset(bitmap_bh);
+ bitmap = kmap_local_folio(bitmap_bh->b_folio, boff);
+ pos = nilfs_palloc_find_available_slot(
+ bitmap, group_offset, entries_per_group, lock,
+ wrap);
+ /*
+ * Since the search for a free slot in the second and
+ * subsequent bitmap blocks always starts from the
+ * beginning, the wrap flag only has an effect on the
+ * first search.
+ */
+ kunmap_local(bitmap);
+ if (pos >= 0)
+ goto found;
+
+ brelse(bitmap_bh);
}
- kunmap(desc_bh->b_page);
+ kunmap_local(desc);
brelse(desc_bh);
}
/* no entries left */
return -ENOSPC;
- out_desc:
- kunmap(desc_bh->b_page);
- brelse(desc_bh);
- return ret;
+found:
+ /* found a free entry */
+ nilfs_palloc_group_desc_add_entries(&desc[j], lock, -1);
+ req->pr_entry_nr = entries_per_group * group + pos;
+ kunmap_local(desc);
+
+ req->pr_desc_bh = desc_bh;
+ req->pr_bitmap_bh = bitmap_bh;
+ return 0;
}
/**
@@ -568,27 +691,30 @@ void nilfs_palloc_commit_alloc_entry(struct inode *inode,
void nilfs_palloc_commit_free_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
- struct nilfs_palloc_group_desc *desc;
unsigned long group, group_offset;
+ size_t doff, boff;
+ struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
- void *desc_kaddr, *bitmap_kaddr;
+ spinlock_t *lock;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
- desc_kaddr = kmap(req->pr_desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(inode, group,
- req->pr_desc_bh, desc_kaddr);
- bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
-
- if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
- group_offset, bitmap))
- printk(KERN_WARNING "%s: entry number %llu already freed\n",
- __func__, (unsigned long long)req->pr_entry_nr);
+ doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh);
+ desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff);
+
+ boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh);
+ bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff);
+ lock = nilfs_mdt_bgl_lock(inode, group);
+
+ if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
+ nilfs_warn(inode->i_sb,
+ "%s (ino=%lu): entry number %llu already freed",
+ __func__, inode->i_ino,
+ (unsigned long long)req->pr_entry_nr);
else
- nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
+ nilfs_palloc_group_desc_add_entries(desc, lock, 1);
- kunmap(req->pr_bitmap_bh->b_page);
- kunmap(req->pr_desc_bh->b_page);
+ kunmap_local(bitmap);
+ kunmap_local(desc);
mark_buffer_dirty(req->pr_desc_bh);
mark_buffer_dirty(req->pr_bitmap_bh);
@@ -607,25 +733,29 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
struct nilfs_palloc_group_desc *desc;
- void *desc_kaddr, *bitmap_kaddr;
+ size_t doff, boff;
unsigned char *bitmap;
unsigned long group, group_offset;
+ spinlock_t *lock;
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
- desc_kaddr = kmap(req->pr_desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(inode, group,
- req->pr_desc_bh, desc_kaddr);
- bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
- if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
- group_offset, bitmap))
- printk(KERN_WARNING "%s: entry number %llu already freed\n",
- __func__, (unsigned long long)req->pr_entry_nr);
+ doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh);
+ desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff);
+
+ boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh);
+ bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff);
+ lock = nilfs_mdt_bgl_lock(inode, group);
+
+ if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
+ nilfs_warn(inode->i_sb,
+ "%s (ino=%lu): entry number %llu already freed",
+ __func__, inode->i_ino,
+ (unsigned long long)req->pr_entry_nr);
else
- nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
+ nilfs_palloc_group_desc_add_entries(desc, lock, 1);
- kunmap(req->pr_bitmap_bh->b_page);
- kunmap(req->pr_desc_bh->b_page);
+ kunmap_local(bitmap);
+ kunmap_local(desc);
brelse(req->pr_bitmap_bh);
brelse(req->pr_desc_bh);
@@ -639,6 +769,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
* nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the removal
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_palloc_prepare_free_entry(struct inode *inode,
struct nilfs_palloc_req *req)
@@ -679,37 +811,32 @@ void nilfs_palloc_abort_free_entry(struct inode *inode,
}
/**
- * nilfs_palloc_group_is_in - judge if an entry is in a group
- * @inode: inode of metadata file using this allocator
- * @group: group number
- * @nr: serial number of the entry (e.g. inode number)
- */
-static int
-nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
-{
- __u64 first, last;
-
- first = group * nilfs_palloc_entries_per_group(inode);
- last = first + nilfs_palloc_entries_per_group(inode) - 1;
- return (nr >= first) && (nr <= last);
-}
-
-/**
* nilfs_palloc_freev - deallocate a set of persistent objects
* @inode: inode of metadata file using this allocator
* @entry_nrs: array of entry numbers to be deallocated
* @nitems: number of entries stored in @entry_nrs
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
unsigned char *bitmap;
- void *desc_kaddr, *bitmap_kaddr;
+ size_t doff, boff;
unsigned long group, group_offset;
- int i, j, n, ret;
+ __u64 group_min_nr, last_nrs[8];
+ const unsigned long epg = nilfs_palloc_entries_per_group(inode);
+ const unsigned int epb = NILFS_MDT(inode)->mi_entries_per_block;
+ unsigned int entry_start, end, pos;
+ spinlock_t *lock;
+ int i, j, k, ret;
+ u32 nfree;
for (i = 0; i < nitems; i = j) {
+ int change_group = false;
+ int nempties = 0, n = 0;
+
group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
if (ret < 0)
@@ -720,38 +847,85 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
brelse(desc_bh);
return ret;
}
- desc_kaddr = kmap(desc_bh->b_page);
- desc = nilfs_palloc_block_get_group_desc(
- inode, group, desc_bh, desc_kaddr);
- bitmap_kaddr = kmap(bitmap_bh->b_page);
- bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
- for (j = i, n = 0;
- (j < nitems) && nilfs_palloc_group_is_in(inode, group,
- entry_nrs[j]);
- j++) {
- nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
- if (!nilfs_clear_bit_atomic(
- nilfs_mdt_bgl_lock(inode, group),
- group_offset, bitmap)) {
- printk(KERN_WARNING
- "%s: entry number %llu already freed\n",
- __func__,
- (unsigned long long)entry_nrs[j]);
+
+ /* Get the first entry number of the group */
+ group_min_nr = (__u64)group * epg;
+
+ boff = nilfs_palloc_bitmap_offset(bitmap_bh);
+ bitmap = kmap_local_folio(bitmap_bh->b_folio, boff);
+ lock = nilfs_mdt_bgl_lock(inode, group);
+
+ j = i;
+ entry_start = rounddown(group_offset, epb);
+ do {
+ if (!nilfs_clear_bit_atomic(lock, group_offset,
+ bitmap)) {
+ nilfs_warn(inode->i_sb,
+ "%s (ino=%lu): entry number %llu already freed",
+ __func__, inode->i_ino,
+ (unsigned long long)entry_nrs[j]);
} else {
n++;
}
- }
- nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
- kunmap(bitmap_bh->b_page);
- kunmap(desc_bh->b_page);
+ j++;
+ if (j >= nitems || entry_nrs[j] < group_min_nr ||
+ entry_nrs[j] >= group_min_nr + epg) {
+ change_group = true;
+ } else {
+ group_offset = entry_nrs[j] - group_min_nr;
+ if (group_offset >= entry_start &&
+ group_offset < entry_start + epb) {
+ /* This entry is in the same block */
+ continue;
+ }
+ }
+
+ /* Test if the entry block is empty or not */
+ end = entry_start + epb;
+ pos = nilfs_find_next_bit(bitmap, end, entry_start);
+ if (pos >= end) {
+ last_nrs[nempties++] = entry_nrs[j - 1];
+ if (nempties >= ARRAY_SIZE(last_nrs))
+ break;
+ }
+
+ if (change_group)
+ break;
- mark_buffer_dirty(desc_bh);
- mark_buffer_dirty(bitmap_bh);
- nilfs_mdt_mark_dirty(inode);
+ /* Go on to the next entry block */
+ entry_start = rounddown(group_offset, epb);
+ } while (true);
+ kunmap_local(bitmap);
+ mark_buffer_dirty(bitmap_bh);
brelse(bitmap_bh);
+
+ for (k = 0; k < nempties; k++) {
+ ret = nilfs_palloc_delete_entry_block(inode,
+ last_nrs[k]);
+ if (ret && ret != -ENOENT)
+ nilfs_warn(inode->i_sb,
+ "error %d deleting block that object (entry=%llu, ino=%lu) belongs to",
+ ret, (unsigned long long)last_nrs[k],
+ inode->i_ino);
+ }
+
+ doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh);
+ desc = kmap_local_folio(desc_bh->b_folio, doff);
+ nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n);
+ kunmap_local(desc);
+ mark_buffer_dirty(desc_bh);
+ nilfs_mdt_mark_dirty(inode);
brelse(desc_bh);
+
+ if (nfree == nilfs_palloc_entries_per_group(inode)) {
+ ret = nilfs_palloc_delete_bitmap_block(inode, group);
+ if (ret && ret != -ENOENT)
+ nilfs_warn(inode->i_sb,
+ "error %d deleting bitmap block of group=%lu, ino=%lu",
+ ret, group, inode->i_ino);
+ }
}
return 0;
}
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 4bd6451b5703..046d876ea3e0 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -1,25 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator
+ * Persistent object (dat entry/disk inode) allocator/deallocator
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Original code was written by Koji Sato <koji@osrg.net>.
- * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
- * Amagai Yoshiji <amagai@osrg.net>.
+ * Originally written by Koji Sato.
+ * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji.
*/
#ifndef _NILFS_ALLOC_H
@@ -35,6 +21,8 @@
*
* The number of entries per group is defined by the number of bits
* that a bitmap block can maintain.
+ *
+ * Return: Number of entries per group.
*/
static inline unsigned long
nilfs_palloc_entries_per_group(const struct inode *inode)
@@ -42,16 +30,16 @@ nilfs_palloc_entries_per_group(const struct inode *inode)
return 1UL << (inode->i_blkbits + 3 /* log2(8 = CHAR_BITS) */);
}
-int nilfs_palloc_init_blockgroup(struct inode *, unsigned);
+int nilfs_palloc_init_blockgroup(struct inode *, unsigned int);
int nilfs_palloc_get_entry_block(struct inode *, __u64, int,
struct buffer_head **);
-void *nilfs_palloc_block_get_entry(const struct inode *, __u64,
- const struct buffer_head *, void *);
+size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr,
+ const struct buffer_head *bh);
int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *);
/**
- * nilfs_palloc_req - persistent allocator request and reply
+ * struct nilfs_palloc_req - persistent allocator request and reply
* @pr_entry_nr: entry number (vblocknr or inode number)
* @pr_desc_bh: buffer head of the buffer containing block group descriptors
* @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap
@@ -64,8 +52,8 @@ struct nilfs_palloc_req {
struct buffer_head *pr_entry_bh;
};
-int nilfs_palloc_prepare_alloc_entry(struct inode *,
- struct nilfs_palloc_req *);
+int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
+ struct nilfs_palloc_req *req, bool wrap);
void nilfs_palloc_commit_alloc_entry(struct inode *,
struct nilfs_palloc_req *);
void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *);
@@ -77,6 +65,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
#define nilfs_set_bit_atomic ext2_set_bit_atomic
#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
#define nilfs_find_next_zero_bit find_next_zero_bit_le
+#define nilfs_find_next_bit find_next_bit_le
/**
* struct nilfs_bh_assoc - block offset and buffer head association
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index aadbd0b5e3e8..ccc1a7aa52d2 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * bmap.c - NILFS block mapping.
+ * NILFS block mapping.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#include <linux/fs.h>
@@ -45,8 +32,8 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
struct inode *inode = bmap->b_inode;
if (err == -EINVAL) {
- nilfs_error(inode->i_sb, fname,
- "broken bmap (inode number=%lu)\n", inode->i_ino);
+ __nilfs_error(inode->i_sb, fname,
+ "broken bmap (inode number=%lu)", inode->i_ino);
err = -EIO;
}
return err;
@@ -60,17 +47,14 @@ static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
* @ptrp: place to store the value associated to @key
*
* Description: nilfs_bmap_lookup_at_level() finds a record whose key
- * matches @key in the block at @level of the bmap.
- *
- * Return Value: On success, 0 is returned and the record associated with @key
- * is stored in the place pointed by @ptrp. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - A record associated with @key does not exist.
+ * matches @key in the block at @level of the bmap. The record associated
+ * with @key is stored in the place pointed to by @ptrp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - A record associated with @key does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
__u64 *ptrp)
@@ -80,24 +64,32 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
- if (ret < 0) {
- ret = nilfs_bmap_convert_error(bmap, __func__, ret);
+ if (ret < 0)
goto out;
- }
+
if (NILFS_BMAP_USE_VBN(bmap)) {
ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
&blocknr);
if (!ret)
*ptrp = blocknr;
+ else if (ret == -ENOENT) {
+ /*
+ * If there was no valid entry in DAT for the block
+ * address obtained by b_ops->bop_lookup, then pass
+ * internal code -EINVAL to nilfs_bmap_convert_error
+ * to treat it as metadata corruption.
+ */
+ ret = -EINVAL;
+ }
}
out:
up_read(&bmap->b_sem);
- return ret;
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
- unsigned maxblocks)
+ unsigned int maxblocks)
{
int ret;
@@ -143,18 +135,13 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
* Description: nilfs_bmap_insert() inserts the new key-record pair specified
* by @key and @rec into @bmap.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EEXIST - A record associated with @key already exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EEXIST - A record associated with @key already exists.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
-int nilfs_bmap_insert(struct nilfs_bmap *bmap,
- unsigned long key,
- unsigned long rec)
+int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec)
{
int ret;
@@ -191,19 +178,44 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
return bmap->b_ops->bop_delete(bmap, key);
}
-int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
+/**
+ * nilfs_bmap_seek_key - seek a valid entry and return its key
+ * @bmap: bmap struct
+ * @start: start key number
+ * @keyp: place to store valid key
+ *
+ * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap
+ * starting from @start, and stores it to @keyp if found.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No valid entry was found.
+ * * %-ENOMEM - Insufficient memory available.
+ */
+int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp)
{
- __u64 lastkey;
int ret;
down_read(&bmap->b_sem);
- ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
+ ret = bmap->b_ops->bop_seek_key(bmap, start, keyp);
+ up_read(&bmap->b_sem);
+
+ if (ret < 0)
+ ret = nilfs_bmap_convert_error(bmap, __func__, ret);
+ return ret;
+}
+
+int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp)
+{
+ int ret;
+
+ down_read(&bmap->b_sem);
+ ret = bmap->b_ops->bop_last_key(bmap, keyp);
up_read(&bmap->b_sem);
if (ret < 0)
ret = nilfs_bmap_convert_error(bmap, __func__, ret);
- else
- *key = lastkey;
return ret;
}
@@ -215,16 +227,13 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
* Description: nilfs_bmap_delete() deletes the key-record pair specified by
* @key from @bmap.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - A record associated with @key does not exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - A record associated with @key does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
-int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
+int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key)
{
int ret;
@@ -235,7 +244,7 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
-static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
+static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key)
{
__u64 lastkey;
int ret;
@@ -269,14 +278,12 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
* Description: nilfs_bmap_truncate() removes key-record pairs whose keys are
* greater than or equal to @key from @bmap.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
-int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key)
+int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key)
{
int ret;
@@ -309,12 +316,10 @@ void nilfs_bmap_clear(struct nilfs_bmap *bmap)
* Description: nilfs_bmap_propagate() marks the buffers that directly or
* indirectly refer to the block specified by @bh dirty.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
{
@@ -328,7 +333,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
}
/**
- * nilfs_bmap_lookup_dirty_buffers -
+ * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers
* @bmap: bmap
* @listp: pointer to buffer head list
*/
@@ -341,22 +346,22 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap,
/**
* nilfs_bmap_assign - assign a new block number to a block
- * @bmap: bmap
- * @bhp: pointer to buffer head
+ * @bmap: bmap
+ * @bh: place to store a pointer to the buffer head to which a block
+ * address is assigned (in/out)
* @blocknr: block number
- * @binfo: block information
+ * @binfo: block information
*
* Description: nilfs_bmap_assign() assigns the block number @blocknr to the
- * buffer specified by @bh.
- *
- * Return Value: On success, 0 is returned and the buffer head of a newly
- * create buffer and the block information associated with the buffer are
- * stored in the place pointed by @bh and @binfo, respectively. On error, one
- * of the following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * buffer specified by @bh. The block information is stored in the memory
+ * pointed to by @binfo, and the buffer head may be replaced as a block
+ * address is assigned, in which case a pointer to the new buffer head is
+ * stored in the memory pointed to by @bh.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_assign(struct nilfs_bmap *bmap,
struct buffer_head **bh,
@@ -381,12 +386,10 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap,
* Description: nilfs_bmap_mark() marks the block specified by @key and @level
* as dirty.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
{
@@ -409,7 +412,7 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
* Description: nilfs_test_and_clear() is the atomic operation to test and
* clear the dirty state of @bmap.
*
- * Return Value: 1 is returned if @bmap is dirty, or 0 if clear.
+ * Return: 1 if @bmap is dirty, or 0 if clear.
*/
int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
{
@@ -429,15 +432,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
const struct buffer_head *bh)
{
- struct buffer_head *pbh;
- __u64 key;
-
- key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
- bmap->b_inode->i_blkbits);
- for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
- key++;
+ loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh);
- return key;
+ return pos >> bmap->b_inode->i_blkbits;
}
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
@@ -475,10 +472,10 @@ static struct lock_class_key nilfs_bmap_mdt_lock_key;
*
* Description: nilfs_bmap_read() initializes the bmap @bmap.
*
- * Return Value: On success, 0 is returned. On error, the following negative
- * error code is returned.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (corrupted bmap).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
{
@@ -506,7 +503,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
break;
case NILFS_IFILE_INO:
lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
- /* Fall through */
+ fallthrough;
default:
bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
bmap->b_last_allocated_key = 0;
@@ -527,13 +524,10 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
*/
void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
{
- down_write(&bmap->b_sem);
memcpy(raw_inode->i_bmap, bmap->b_u.u_data,
NILFS_INODE_BMAP_SIZE * sizeof(__le64));
if (bmap->b_inode->i_ino == NILFS_DAT_INO)
bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
-
- up_write(&bmap->b_sem);
}
void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index b89e68076adc..4656df392722 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * bmap.h - NILFS block mapping.
+ * NILFS block mapping.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#ifndef _NILFS_BMAP_H
@@ -26,7 +13,7 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
-#include <linux/nilfs2_fs.h>
+#include <linux/nilfs2_ondisk.h> /* nilfs_binfo, nilfs_inode, etc */
#include "alloc.h"
#include "dat.h"
@@ -57,11 +44,24 @@ struct nilfs_bmap_stats {
/**
* struct nilfs_bmap_operations - bmap operation table
+ * @bop_lookup: single block search operation
+ * @bop_lookup_contig: consecutive block search operation
+ * @bop_insert: block insertion operation
+ * @bop_delete: block delete operation
+ * @bop_clear: block mapping resource release operation
+ * @bop_propagate: operation to propagate dirty state towards the
+ * mapping root
+ * @bop_lookup_dirty_buffers: operation to collect dirty block buffers
+ * @bop_assign: disk block address assignment operation
+ * @bop_mark: operation to mark in-use blocks as dirty for
+ * relocation by GC
+ * @bop_seek_key: find valid block key operation
+ * @bop_last_key: find last valid block key operation
*/
struct nilfs_bmap_operations {
int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *);
int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *,
- unsigned);
+ unsigned int);
int (*bop_insert)(struct nilfs_bmap *, __u64, __u64);
int (*bop_delete)(struct nilfs_bmap *, __u64);
void (*bop_clear)(struct nilfs_bmap *);
@@ -76,8 +76,10 @@ struct nilfs_bmap_operations {
union nilfs_binfo *);
int (*bop_mark)(struct nilfs_bmap *, __u64, int);
- /* The following functions are internal use only. */
+ int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *);
int (*bop_last_key)(const struct nilfs_bmap *, __u64 *);
+
+ /* private: internal use only */
int (*bop_check_insert)(const struct nilfs_bmap *, __u64);
int (*bop_check_delete)(struct nilfs_bmap *, __u64);
int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int);
@@ -85,9 +87,8 @@ struct nilfs_bmap_operations {
#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64))
-#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */)
-#define NILFS_BMAP_NEW_PTR_INIT \
- (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1))
+#define NILFS_BMAP_KEY_BIT BITS_PER_LONG
+#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1))
static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
{
@@ -124,10 +125,14 @@ struct nilfs_bmap {
/* pointer type */
#define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */
-#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single
- version) */
-#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple
- versions) */
+#define NILFS_BMAP_PTR_VS 1 /*
+ * virtual block number (single
+ * version)
+ */
+#define NILFS_BMAP_PTR_VM 2 /*
+ * virtual block number (has multiple
+ * versions)
+ */
#define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */
#define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0)
@@ -152,11 +157,12 @@ struct nilfs_bmap_store {
int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
-int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
-int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
-int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
-int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *);
-int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long);
+int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned int);
+int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec);
+int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key);
+int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp);
+int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp);
+int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key);
void nilfs_bmap_clear(struct nilfs_bmap *);
int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *);
void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index a35ae35e6932..568367129092 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -1,25 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * btnode.c - NILFS B-tree node cache
+ * NILFS B-tree node cache
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * This file was originally written by Seiji Kihara <kihara@osrg.net>
- * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for
- * stabilization and simplification.
+ * Originally written by Seiji Kihara.
+ * Fully revised by Ryusuke Konishi for stabilization and simplification.
*
*/
@@ -34,6 +20,24 @@
#include "page.h"
#include "btnode.h"
+
+/**
+ * nilfs_init_btnc_inode - initialize B-tree node cache inode
+ * @btnc_inode: inode to be initialized
+ *
+ * nilfs_init_btnc_inode() sets up an inode for B-tree node cache.
+ */
+void nilfs_init_btnc_inode(struct inode *btnc_inode)
+{
+ struct nilfs_inode_info *ii = NILFS_I(btnc_inode);
+
+ btnc_inode->i_mode = S_IFREG;
+ ii->i_flags = 0;
+ memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap));
+ mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS);
+ btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
+}
+
void nilfs_btnode_cache_clear(struct address_space *btnc)
{
invalidate_mapping_pages(btnc, 0, -1);
@@ -43,44 +47,58 @@ void nilfs_btnode_cache_clear(struct address_space *btnc)
struct buffer_head *
nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
{
- struct inode *inode = NILFS_BTNC_I(btnc);
+ struct inode *inode = btnc->host;
struct buffer_head *bh;
- bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
+ bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
if (unlikely(!bh))
- return NULL;
+ return ERR_PTR(-ENOMEM);
if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
buffer_dirty(bh))) {
- brelse(bh);
- BUG();
+ /*
+ * The block buffer at the specified new address was already
+ * in use. This can happen if it is a virtual block number
+ * and has been reallocated due to corruption of the bitmap
+ * used to manage its allocation state (if not, the buffer
+ * clearing of an abandoned b-tree node is missing somewhere).
+ */
+ nilfs_error(inode->i_sb,
+ "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)",
+ (unsigned long long)blocknr, inode->i_ino);
+ goto failed;
}
- memset(bh->b_data, 0, 1 << inode->i_blkbits);
- bh->b_bdev = inode->i_sb->s_bdev;
+ memset(bh->b_data, 0, i_blocksize(inode));
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
set_buffer_uptodate(bh);
- unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
return bh;
+
+failed:
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
+ brelse(bh);
+ return ERR_PTR(-EIO);
}
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
- sector_t pblocknr, int mode,
+ sector_t pblocknr, blk_opf_t opf,
struct buffer_head **pbh, sector_t *submit_ptr)
{
struct buffer_head *bh;
- struct inode *inode = NILFS_BTNC_I(btnc);
- struct page *page;
+ struct inode *inode = btnc->host;
+ struct folio *folio;
int err;
- bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
+ bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
if (unlikely(!bh))
return -ENOMEM;
err = -EEXIST; /* internal code */
- page = bh->b_page;
+ folio = bh->b_folio;
if (buffer_uptodate(bh) || buffer_dirty(bh))
goto found;
@@ -100,13 +118,13 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
}
}
- if (mode == READA) {
+ if (opf & REQ_RAHEAD) {
if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) {
err = -EBUSY; /* internal code */
brelse(bh);
goto out_locked;
}
- } else { /* mode == READ */
+ } else { /* opf == REQ_OP_READ */
lock_buffer(bh);
}
if (buffer_uptodate(bh)) {
@@ -115,11 +133,10 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
goto found;
}
set_buffer_mapped(bh);
- bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = pblocknr; /* set block address for read */
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(mode, bh);
+ submit_bh(opf, bh);
bh->b_blocknr = blocknr; /* set back to the given block address */
*submit_ptr = pblocknr;
err = 0;
@@ -127,8 +144,8 @@ found:
*pbh = bh;
out_locked:
- unlock_page(page);
- page_cache_release(page);
+ folio_unlock(folio);
+ folio_put(folio);
return err;
}
@@ -142,36 +159,58 @@ out_locked:
void nilfs_btnode_delete(struct buffer_head *bh)
{
struct address_space *mapping;
- struct page *page = bh->b_page;
- pgoff_t index = page_index(page);
+ struct folio *folio = bh->b_folio;
+ pgoff_t index = folio->index;
int still_dirty;
- page_cache_get(page);
- lock_page(page);
- wait_on_page_writeback(page);
+ folio_get(folio);
+ folio_lock(folio);
+ folio_wait_writeback(folio);
nilfs_forget_buffer(bh);
- still_dirty = PageDirty(page);
- mapping = page->mapping;
- unlock_page(page);
- page_cache_release(page);
+ still_dirty = folio_test_dirty(folio);
+ mapping = folio->mapping;
+ folio_unlock(folio);
+ folio_put(folio);
if (!still_dirty && mapping)
invalidate_inode_pages2_range(mapping, index, index);
}
/**
- * nilfs_btnode_prepare_change_key
- * prepare to move contents of the block for old key to one of new key.
- * the old buffer will not be removed, but might be reused for new buffer.
- * it might return -ENOMEM because of memory allocation errors,
- * and might return -EIO because of disk read errors.
+ * nilfs_btnode_prepare_change_key - prepare to change the search key of a
+ * b-tree node block
+ * @btnc: page cache in which the b-tree node block is buffered
+ * @ctxt: structure for exchanging context information for key change
+ *
+ * nilfs_btnode_prepare_change_key() prepares to move the contents of the
+ * b-tree node block of the old key given in the "oldkey" member of @ctxt to
+ * the position of the new key given in the "newkey" member of @ctxt in the
+ * page cache @btnc. Here, the key of the block is an index in units of
+ * blocks, and if the page and block sizes match, it matches the page index
+ * in the page cache.
+ *
+ * If the page size and block size match, this function attempts to move the
+ * entire folio, and in preparation for this, inserts the original folio into
+ * the new index of the cache. If this insertion fails or if the page size
+ * and block size are different, it falls back to a copy preparation using
+ * nilfs_btnode_create_block(), inserts a new block at the position
+ * corresponding to "newkey", and stores the buffer head pointer in the
+ * "newbh" member of @ctxt.
+ *
+ * Note that the current implementation does not support folio sizes larger
+ * than the page size.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_btnode_prepare_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
{
struct buffer_head *obh, *nbh;
- struct inode *inode = NILFS_BTNC_I(btnc);
+ struct inode *inode = btnc->host;
__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
int err;
@@ -181,89 +220,94 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
obh = ctxt->bh;
ctxt->newbh = NULL;
- if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
- lock_page(obh->b_page);
- /*
- * We cannot call radix_tree_preload for the kernels older
- * than 2.6.23, because it is not exported for modules.
- */
+ if (inode->i_blkbits == PAGE_SHIFT) {
+ struct folio *ofolio = obh->b_folio;
+ folio_lock(ofolio);
retry:
- err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
- if (err)
- goto failed_unlock;
- /* BUG_ON(oldkey != obh->b_page->index); */
- if (unlikely(oldkey != obh->b_page->index))
- NILFS_PAGE_BUG(obh->b_page,
+ /* BUG_ON(oldkey != obh->b_folio->index); */
+ if (unlikely(oldkey != ofolio->index))
+ NILFS_FOLIO_BUG(ofolio,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
- spin_lock_irq(&btnc->tree_lock);
- err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
- spin_unlock_irq(&btnc->tree_lock);
+ xa_lock_irq(&btnc->i_pages);
+ err = __xa_insert(&btnc->i_pages, newkey, ofolio, GFP_NOFS);
+ xa_unlock_irq(&btnc->i_pages);
/*
- * Note: page->index will not change to newkey until
+ * Note: folio->index will not change to newkey until
* nilfs_btnode_commit_change_key() will be called.
- * To protect the page in intermediate state, the page lock
+ * To protect the folio in intermediate state, the folio lock
* is held.
*/
- radix_tree_preload_end();
if (!err)
return 0;
- else if (err != -EEXIST)
+ else if (err != -EBUSY)
goto failed_unlock;
err = invalidate_inode_pages2_range(btnc, newkey, newkey);
if (!err)
goto retry;
/* fallback to copy mode */
- unlock_page(obh->b_page);
+ folio_unlock(ofolio);
}
nbh = nilfs_btnode_create_block(btnc, newkey);
- if (!nbh)
- return -ENOMEM;
+ if (IS_ERR(nbh))
+ return PTR_ERR(nbh);
BUG_ON(nbh == obh);
ctxt->newbh = nbh;
return 0;
failed_unlock:
- unlock_page(obh->b_page);
+ folio_unlock(obh->b_folio);
return err;
}
/**
- * nilfs_btnode_commit_change_key
- * commit the change_key operation prepared by prepare_change_key().
+ * nilfs_btnode_commit_change_key - commit the change of the search key of
+ * a b-tree node block
+ * @btnc: page cache in which the b-tree node block is buffered
+ * @ctxt: structure for exchanging context information for key change
+ *
+ * nilfs_btnode_commit_change_key() executes the key change based on the
+ * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid
+ * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move),
+ * this function removes the folio from the old index and completes the move.
+ * Otherwise, it copies the block data and inherited flag states of "oldbh"
+ * to "newbh" and clears the "oldbh" from the cache. In either case, the
+ * relocated buffer is marked as dirty.
+ *
+ * As with nilfs_btnode_prepare_change_key(), the current implementation does
+ * not support folio sizes larger than the page size.
*/
void nilfs_btnode_commit_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
{
struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
- struct page *opage;
+ struct folio *ofolio;
if (oldkey == newkey)
return;
if (nbh == NULL) { /* blocksize == pagesize */
- opage = obh->b_page;
- if (unlikely(oldkey != opage->index))
- NILFS_PAGE_BUG(opage,
+ ofolio = obh->b_folio;
+ if (unlikely(oldkey != ofolio->index))
+ NILFS_FOLIO_BUG(ofolio,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
mark_buffer_dirty(obh);
- spin_lock_irq(&btnc->tree_lock);
- radix_tree_delete(&btnc->page_tree, oldkey);
- radix_tree_tag_set(&btnc->page_tree, newkey,
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irq(&btnc->tree_lock);
+ xa_lock_irq(&btnc->i_pages);
+ __xa_erase(&btnc->i_pages, oldkey);
+ __xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY);
+ xa_unlock_irq(&btnc->i_pages);
- opage->index = obh->b_blocknr = newkey;
- unlock_page(opage);
+ ofolio->index = obh->b_blocknr = newkey;
+ folio_unlock(ofolio);
} else {
nilfs_copy_buffer(nbh, obh);
mark_buffer_dirty(nbh);
@@ -275,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
}
/**
- * nilfs_btnode_abort_change_key
- * abort the change_key operation prepared by prepare_change_key().
+ * nilfs_btnode_abort_change_key - abort the change of the search key of a
+ * b-tree node block
+ * @btnc: page cache in which the b-tree node block is buffered
+ * @ctxt: structure for exchanging context information for key change
+ *
+ * nilfs_btnode_abort_change_key() cancels the key change associated with the
+ * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs
+ * any necessary cleanup. If no valid block buffer is prepared in "newbh" of
+ * @ctxt, this function removes the folio from the destination index and aborts
+ * the move. Otherwise, it clears "newbh" from the cache.
+ *
+ * As with nilfs_btnode_prepare_change_key(), the current implementation does
+ * not support folio sizes larger than the page size.
*/
void nilfs_btnode_abort_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
@@ -288,10 +343,16 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
return;
if (nbh == NULL) { /* blocksize == pagesize */
- spin_lock_irq(&btnc->tree_lock);
- radix_tree_delete(&btnc->page_tree, newkey);
- spin_unlock_irq(&btnc->tree_lock);
- unlock_page(ctxt->bh->b_page);
- } else
- brelse(nbh);
+ xa_erase_irq(&btnc->i_pages, newkey);
+ folio_unlock(ctxt->bh->b_folio);
+ } else {
+ /*
+ * When canceling a buffer that a prepare operation has
+ * allocated to copy a node block to another location, use
+ * nilfs_btnode_delete() to initialize and release the buffer
+ * so that the buffer flags will not be in an inconsistent
+ * state when it is reallocated.
+ */
+ nilfs_btnode_delete(nbh);
+ }
}
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index d876b565ce64..4bc5612dff94 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -1,24 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * btnode.h - NILFS B-tree node cache
+ * NILFS B-tree node cache
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Seiji Kihara <kihara@osrg.net>
- * Revised by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Seiji Kihara.
+ * Revised by Ryusuke Konishi.
*/
#ifndef _NILFS_BTNODE_H
@@ -43,11 +30,12 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
+void nilfs_init_btnc_inode(struct inode *btnc_inode);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
__u64 blocknr);
-int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int,
- struct buffer_head **, sector_t *);
+int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
+ blk_opf_t, struct buffer_head **, sector_t *);
void nilfs_btnode_delete(struct buffer_head *);
int nilfs_btnode_prepare_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b2e3ff347620..dd0c8e560ef6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * btree.c - NILFS B-tree.
+ * NILFS B-tree.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#include <linux/slab.h>
@@ -31,6 +18,8 @@
#include "alloc.h"
#include "dat.h"
+static void __nilfs_btree_init(struct nilfs_bmap *bmap);
+
static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
{
struct nilfs_btree_path *path;
@@ -69,12 +58,13 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path)
static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
__u64 ptr, struct buffer_head **bhp)
{
- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
+ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
+ struct address_space *btnc = btnc_inode->i_mapping;
struct buffer_head *bh;
bh = nilfs_btnode_create_block(btnc, ptr);
- if (!bh)
- return -ENOMEM;
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
set_buffer_nilfs_volatile(bh);
*bhp = bh;
@@ -121,7 +111,7 @@ nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
{
- return 1 << btree->b_inode->i_blkbits;
+ return i_blocksize(btree->b_inode);
}
static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
@@ -341,12 +331,14 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
* nilfs_btree_node_broken - verify consistency of btree node
* @node: btree node block to be examined
* @size: node size (in bytes)
+ * @inode: host inode of btree
* @blocknr: block number
*
- * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ * Return: 0 if normal, 1 if the node is broken.
*/
static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
- size_t size, sector_t blocknr)
+ size_t size, struct inode *inode,
+ sector_t blocknr)
{
int level, flags, nchildren;
int ret = 0;
@@ -358,11 +350,42 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
level >= NILFS_BTREE_LEVEL_MAX ||
(flags & NILFS_BTREE_NODE_ROOT) ||
- nchildren < 0 ||
+ nchildren <= 0 ||
nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
- printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
- "level = %d, flags = 0x%x, nchildren = %d\n",
- (unsigned long long)blocknr, level, flags, nchildren);
+ nilfs_crit(inode->i_sb,
+ "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
+ inode->i_ino, (unsigned long long)blocknr, level,
+ flags, nchildren);
+ ret = 1;
+ }
+ return ret;
+}
+
+/**
+ * nilfs_btree_root_broken - verify consistency of btree root node
+ * @node: btree root node to be examined
+ * @inode: host inode of btree
+ *
+ * Return: 0 if normal, 1 if the root node is broken.
+ */
+static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
+ struct inode *inode)
+{
+ int level, flags, nchildren;
+ int ret = 0;
+
+ level = nilfs_btree_node_get_level(node);
+ flags = nilfs_btree_node_get_flags(node);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+
+ if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
+ level >= NILFS_BTREE_LEVEL_MAX ||
+ nchildren < 0 ||
+ nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX ||
+ (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) {
+ nilfs_crit(inode->i_sb,
+ "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
+ inode->i_ino, level, flags, nchildren);
ret = 1;
}
return ret;
@@ -370,13 +393,15 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
int nilfs_btree_broken_node_block(struct buffer_head *bh)
{
+ struct inode *inode;
int ret;
if (buffer_nilfs_checked(bh))
return 0;
+ inode = bh->b_folio->mapping->host;
ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
- bh->b_size, bh->b_blocknr);
+ bh->b_size, inode, bh->b_blocknr);
if (likely(!ret))
set_buffer_nilfs_checked(bh);
return ret;
@@ -422,13 +447,15 @@ nilfs_btree_get_node(const struct nilfs_bmap *btree,
return node;
}
-static int
-nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
+static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
+ struct nilfs_btree_node *node, int level)
{
if (unlikely(nilfs_btree_node_get_level(node) != level)) {
dump_stack();
- printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
- nilfs_btree_node_get_level(node), level);
+ nilfs_crit(btree->b_inode->i_sb,
+ "btree level mismatch (ino=%lu): %d != %d",
+ btree->b_inode->i_ino,
+ nilfs_btree_node_get_level(node), level);
return 1;
}
return 0;
@@ -445,16 +472,27 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
struct buffer_head **bhp,
const struct nilfs_btree_readahead_info *ra)
{
- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
+ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
+ struct address_space *btnc = btnc_inode->i_mapping;
struct buffer_head *bh, *ra_bh;
sector_t submit_ptr = 0;
int ret;
- ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
+ ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
+ &submit_ptr);
if (ret) {
- if (ret != -EEXIST)
- return ret;
- goto out_check;
+ if (likely(ret == -EEXIST))
+ goto out_check;
+ if (ret == -ENOENT) {
+ /*
+ * Block address translation failed due to invalid
+ * value of 'ptr'. In this case, return internal code
+ * -EINVAL (broken bmap) to notify bmap layer of fatal
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
if (ra) {
@@ -466,8 +504,9 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
n > 0 && i < ra->ncmax; n--, i++) {
ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
- ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
- &ra_bh, &submit_ptr);
+ ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
+ REQ_OP_READ | REQ_RAHEAD,
+ &ra_bh, &submit_ptr);
if (likely(!ret || ret == -EEXIST))
brelse(ra_bh);
else if (ret != -EBUSY)
@@ -481,6 +520,9 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
out_no_wait:
if (!buffer_uptodate(bh)) {
+ nilfs_err(btree->b_inode->i_sb,
+ "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
+ btree->b_inode->i_ino, (unsigned long long)ptr);
brelse(bh);
return -EIO;
}
@@ -540,7 +582,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
return ret;
node = nilfs_btree_get_nonroot_node(path, level);
- if (nilfs_btree_bad_node(node, level))
+ if (nilfs_btree_bad_node(btree, node, level))
return -EINVAL;
if (!found)
found = nilfs_btree_node_lookup(node, key, &index);
@@ -588,7 +630,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
if (ret < 0)
return ret;
node = nilfs_btree_get_nonroot_node(path, level);
- if (nilfs_btree_bad_node(node, level))
+ if (nilfs_btree_bad_node(btree, node, level))
return -EINVAL;
index = nilfs_btree_node_get_nchildren(node) - 1;
ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
@@ -603,6 +645,43 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
return 0;
}
+/**
+ * nilfs_btree_get_next_key - get next valid key from btree path array
+ * @btree: bmap struct of btree
+ * @path: array of nilfs_btree_path struct
+ * @minlevel: start level
+ * @nextkey: place to store the next valid key
+ *
+ * Return: 0 if the next key was found, %-ENOENT if not found.
+ */
+static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
+ const struct nilfs_btree_path *path,
+ int minlevel, __u64 *nextkey)
+{
+ struct nilfs_btree_node *node;
+ int maxlevel = nilfs_btree_height(btree) - 1;
+ int index, next_adj, level;
+
+ /* Next index is already set to bp_index for leaf nodes. */
+ next_adj = 0;
+ for (level = minlevel; level <= maxlevel; level++) {
+ if (level == maxlevel)
+ node = nilfs_btree_get_root(btree);
+ else
+ node = nilfs_btree_get_nonroot_node(path, level);
+
+ index = path[level].bp_index + next_adj;
+ if (index < nilfs_btree_node_get_nchildren(node)) {
+ /* Next key is in this node */
+ *nextkey = nilfs_btree_node_get_key(node, index);
+ return 0;
+ }
+ /* For non-leaf nodes, next index is stored at bp_index + 1. */
+ next_adj = 1;
+ }
+ return -ENOENT;
+}
+
static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
__u64 key, int level, __u64 *ptrp)
{
@@ -621,7 +700,8 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
}
static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
- __u64 key, __u64 *ptrp, unsigned maxblocks)
+ __u64 key, __u64 *ptrp,
+ unsigned int maxblocks)
{
struct nilfs_btree_path *path;
struct nilfs_btree_node *node;
@@ -644,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
dat = nilfs_bmap_get_dat(btree);
ret = nilfs_dat_translate(dat, ptr, &blocknr);
if (ret < 0)
- goto out;
+ goto dat_error;
ptr = blocknr;
}
cnt = 1;
@@ -663,13 +743,12 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
if (dat) {
ret = nilfs_dat_translate(dat, ptr2, &blocknr);
if (ret < 0)
- goto out;
+ goto dat_error;
ptr2 = blocknr;
}
if (ptr2 != ptr + cnt || ++cnt == maxblocks)
goto end;
index++;
- continue;
}
if (level == maxlevel)
break;
@@ -702,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
out:
nilfs_btree_free_path(path);
return ret;
+
+ dat_error:
+ if (ret == -ENOENT)
+ ret = -EINVAL; /* Notify bmap layer of metadata corruption */
+ goto out;
}
static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
@@ -851,8 +935,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
int level, __u64 *keyp, __u64 *ptrp)
{
struct nilfs_btree_node *node, *right;
- __u64 newkey;
- __u64 newptr;
int nchildren, n, move, ncblk;
node = nilfs_btree_get_nonroot_node(path, level);
@@ -874,9 +956,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
if (!buffer_dirty(path[level].bp_sib_bh))
mark_buffer_dirty(path[level].bp_sib_bh);
- newkey = nilfs_btree_node_get_key(right, 0);
- newptr = path[level].bp_newreq.bpr_ptr;
-
if (move) {
path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
nilfs_btree_node_insert(right, path[level].bp_index,
@@ -969,12 +1048,12 @@ static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
if (ptr != NILFS_BMAP_INVALID_PTR)
/* sequential access */
return ptr;
- else {
- ptr = nilfs_btree_find_near(btree, path);
- if (ptr != NILFS_BMAP_INVALID_PTR)
- /* near */
- return ptr;
- }
+
+ ptr = nilfs_btree_find_near(btree, path);
+ if (ptr != NILFS_BMAP_INVALID_PTR)
+ /* near */
+ return ptr;
+
/* block group */
return nilfs_bmap_find_target_in_group(btree);
}
@@ -1533,6 +1612,27 @@ out:
return ret;
}
+static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
+ __u64 *keyp)
+{
+ struct nilfs_btree_path *path;
+ const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
+ int ret;
+
+ path = nilfs_btree_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
+ if (!ret)
+ *keyp = start;
+ else if (ret == -ENOENT)
+ ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
+
+ nilfs_btree_free_path(path);
+ return ret;
+}
+
static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
{
struct nilfs_btree_path *path;
@@ -1558,13 +1658,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
int nchildren, ret;
root = nilfs_btree_get_root(btree);
+ nchildren = nilfs_btree_node_get_nchildren(root);
+ if (unlikely(nchildren == 0))
+ return 0;
+
switch (nilfs_btree_height(btree)) {
case 2:
bh = NULL;
node = root;
break;
case 3:
- nchildren = nilfs_btree_node_get_nchildren(root);
if (nchildren > 1)
return 0;
ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
@@ -1573,17 +1676,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
if (ret < 0)
return ret;
node = (struct nilfs_btree_node *)bh->b_data;
+ nchildren = nilfs_btree_node_get_nchildren(node);
break;
default:
return 0;
}
- nchildren = nilfs_btree_node_get_nchildren(node);
maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
nextmaxkey = (nchildren > 1) ?
nilfs_btree_node_get_key(node, nchildren - 2) : 0;
- if (bh != NULL)
- brelse(bh);
+ brelse(bh);
return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
}
@@ -1631,8 +1733,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
ptrs[i] = le64_to_cpu(dptrs[i]);
}
- if (bh != NULL)
- brelse(bh);
+ brelse(bh);
return nitems;
}
@@ -1657,6 +1758,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
dat = nilfs_bmap_get_dat(btree);
}
+ ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
+ if (ret < 0)
+ return ret;
+
ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
if (ret < 0)
return ret;
@@ -1713,7 +1818,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
/* convert and insert */
dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
- nilfs_btree_init(btree);
+ __nilfs_btree_init(btree);
if (nreq != NULL) {
nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
@@ -1755,19 +1860,28 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
}
/**
- * nilfs_btree_convert_and_insert -
- * @bmap:
- * @key:
- * @ptr:
- * @keys:
- * @ptrs:
- * @n:
+ * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree
+ * @btree: NILFS B-tree structure
+ * @key: Key of the new entry to be inserted
+ * @ptr: Pointer (block number) associated with the key to be inserted
+ * @keys: Array of keys to be inserted in addition to @key
+ * @ptrs: Array of pointers associated with @keys
+ * @n: Number of keys and pointers in @keys and @ptrs
+ *
+ * This function is used to insert a new entry specified by @key and @ptr,
+ * along with additional entries specified by @keys and @ptrs arrays, into a
+ * NILFS B-tree.
+ * It prepares the necessary changes by allocating the required blocks and any
+ * necessary intermediate nodes. It converts configurations from other forms of
+ * block mapping (the one that currently exists is direct mapping) to a B-tree.
+ *
+ * Return: 0 on success or a negative error code on failure.
*/
int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
__u64 key, __u64 ptr,
const __u64 *keys, const __u64 *ptrs, int n)
{
- struct buffer_head *bh;
+ struct buffer_head *bh = NULL;
union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
struct nilfs_bmap_stats stats;
int ret;
@@ -1776,7 +1890,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
di = &dreq;
ni = NULL;
} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
- 1 << btree->b_inode->i_blkbits)) {
+ nilfs_btree_node_size(btree))) {
di = &dreq;
ni = &nreq;
} else {
@@ -1829,7 +1943,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
path[level].bp_ctxt.bh = path[level].bp_bh;
ret = nilfs_btnode_prepare_change_key(
- &NILFS_BMAP_I(btree)->i_btnode_cache,
+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
if (ret < 0) {
nilfs_dat_abort_update(dat,
@@ -1855,7 +1969,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
if (buffer_nilfs_node(path[level].bp_bh)) {
nilfs_btnode_commit_change_key(
- &NILFS_BMAP_I(btree)->i_btnode_cache,
+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
path[level].bp_bh = path[level].bp_ctxt.bh;
}
@@ -1874,7 +1988,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
&path[level].bp_newreq.bpr_req);
if (buffer_nilfs_node(path[level].bp_bh))
nilfs_btnode_abort_change_key(
- &NILFS_BMAP_I(btree)->i_btnode_cache,
+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
}
@@ -1988,9 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
if (ret < 0) {
- if (unlikely(ret == -ENOENT))
- printk(KERN_CRIT "%s: key = %llu, level == %d\n",
- __func__, (unsigned long long)key, level);
+ if (unlikely(ret == -ENOENT)) {
+ nilfs_crit(btree->b_inode->i_sb,
+ "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
+ btree->b_inode->i_ino,
+ (unsigned long long)key, level);
+ ret = -EINVAL;
+ }
goto out;
}
@@ -2027,12 +2145,11 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
level >= NILFS_BTREE_LEVEL_MAX) {
dump_stack();
- printk(KERN_WARNING
- "%s: invalid btree level: %d (key=%llu, ino=%lu, "
- "blocknr=%llu)\n",
- __func__, level, (unsigned long long)key,
- NILFS_BMAP_I(btree)->vfs_inode.i_ino,
- (unsigned long long)bh->b_blocknr);
+ nilfs_warn(btree->b_inode->i_sb,
+ "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
+ level, (unsigned long long)key,
+ btree->b_inode->i_ino,
+ (unsigned long long)bh->b_blocknr);
return;
}
@@ -2049,9 +2166,10 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
struct list_head *listp)
{
- struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
+ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
+ struct address_space *btcache = btnc_inode->i_mapping;
struct list_head lists[NILFS_BTREE_LEVEL_MAX];
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct buffer_head *bh, *head;
pgoff_t index = 0;
int level, i;
@@ -2061,19 +2179,19 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
level++)
INIT_LIST_HEAD(&lists[level]);
- pagevec_init(&pvec, 0);
+ folio_batch_init(&fbatch);
- while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- bh = head = page_buffers(pvec.pages[i]);
+ while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ bh = head = folio_buffers(fbatch.folios[i]);
do {
if (buffer_dirty(bh))
nilfs_btree_add_dirty_buffer(btree,
lists, bh);
} while ((bh = bh->b_this_page) != head);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
@@ -2103,12 +2221,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
path[level].bp_ctxt.newkey = blocknr;
path[level].bp_ctxt.bh = *bh;
ret = nilfs_btnode_prepare_change_key(
- &NILFS_BMAP_I(btree)->i_btnode_cache,
+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
if (ret < 0)
return ret;
nilfs_btnode_commit_change_key(
- &NILFS_BMAP_I(btree)->i_btnode_cache,
+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
*bh = path[level].bp_ctxt.bh;
}
@@ -2120,6 +2238,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
/* on-disk format */
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = level;
+ memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}
@@ -2268,7 +2387,9 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = {
.bop_assign = nilfs_btree_assign,
.bop_mark = nilfs_btree_mark,
+ .bop_seek_key = nilfs_btree_seek_key,
.bop_last_key = nilfs_btree_last_key,
+
.bop_check_insert = NULL,
.bop_check_delete = nilfs_btree_check_delete,
.bop_gather_data = nilfs_btree_gather_data,
@@ -2288,18 +2409,34 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
.bop_assign = nilfs_btree_assign_gc,
.bop_mark = NULL,
+ .bop_seek_key = NULL,
.bop_last_key = NULL,
+
.bop_check_insert = NULL,
.bop_check_delete = NULL,
.bop_gather_data = NULL,
};
-int nilfs_btree_init(struct nilfs_bmap *bmap)
+static void __nilfs_btree_init(struct nilfs_bmap *bmap)
{
bmap->b_ops = &nilfs_btree_ops;
bmap->b_nchildren_per_block =
NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
- return 0;
+}
+
+int nilfs_btree_init(struct nilfs_bmap *bmap)
+{
+ int ret = 0;
+
+ __nilfs_btree_init(bmap);
+
+ if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
+ ret = -EIO;
+ else
+ ret = nilfs_attach_btree_node_cache(
+ &NILFS_BMAP_I(bmap)->vfs_inode);
+
+ return ret;
}
void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 22c02e35b6ef..2a220f716c91 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * btree.h - NILFS B-tree.
+ * NILFS B-tree.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#ifndef _NILFS_BTREE_H
@@ -26,7 +13,7 @@
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/list.h>
-#include <linux/nilfs2_fs.h>
+#include <linux/nilfs2_ondisk.h> /* nilfs_btree_node */
#include "btnode.h"
#include "bmap.h"
@@ -37,6 +24,7 @@
* @bp_index: index of child node
* @bp_oldreq: ptr end request for old ptr
* @bp_newreq: ptr alloc request for new ptr
+ * @bp_ctxt: context information for changing the key of a b-tree node block
* @bp_op: rebalance operation
*/
struct nilfs_btree_path {
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index deaa3d33a0aa..4bbdc832d7f2 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * cpfile.c - NILFS checkpoint file.
+ * NILFS checkpoint file.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#include <linux/kernel.h>
@@ -25,7 +12,6 @@
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/errno.h>
-#include <linux/nilfs2_fs.h>
#include "mdt.h"
#include "cpfile.h"
@@ -41,7 +27,8 @@ static unsigned long
nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno)
{
__u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
- do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
+
+ tcno = div64_ul(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
return (unsigned long)tcno;
}
@@ -50,9 +37,17 @@ static unsigned long
nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno)
{
__u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
+
return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
}
+static __u64 nilfs_cpfile_first_checkpoint_in_block(const struct inode *cpfile,
+ unsigned long blkoff)
+{
+ return (__u64)nilfs_cpfile_checkpoints_per_block(cpfile) * blkoff
+ + 1 - NILFS_MDT(cpfile)->mi_first_entry_offset;
+}
+
static unsigned long
nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile,
__u64 curr,
@@ -73,54 +68,41 @@ static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile,
static unsigned int
nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile,
struct buffer_head *bh,
- void *kaddr,
unsigned int n)
{
- struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
+ struct nilfs_checkpoint *cp;
unsigned int count;
+ cp = kmap_local_folio(bh->b_folio,
+ offset_in_folio(bh->b_folio, bh->b_data));
count = le32_to_cpu(cp->cp_checkpoints_count) + n;
cp->cp_checkpoints_count = cpu_to_le32(count);
+ kunmap_local(cp);
return count;
}
static unsigned int
nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile,
struct buffer_head *bh,
- void *kaddr,
unsigned int n)
{
- struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
+ struct nilfs_checkpoint *cp;
unsigned int count;
+ cp = kmap_local_folio(bh->b_folio,
+ offset_in_folio(bh->b_folio, bh->b_data));
WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n);
count = le32_to_cpu(cp->cp_checkpoints_count) - n;
cp->cp_checkpoints_count = cpu_to_le32(count);
+ kunmap_local(cp);
return count;
}
-static inline struct nilfs_cpfile_header *
-nilfs_cpfile_block_get_header(const struct inode *cpfile,
- struct buffer_head *bh,
- void *kaddr)
-{
- return kaddr + bh_offset(bh);
-}
-
-static struct nilfs_checkpoint *
-nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno,
- struct buffer_head *bh,
- void *kaddr)
-{
- return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) *
- NILFS_MDT(cpfile)->mi_entry_size;
-}
-
static void nilfs_cpfile_block_init(struct inode *cpfile,
struct buffer_head *bh,
- void *kaddr)
+ void *from)
{
- struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
+ struct nilfs_checkpoint *cp = from;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
int n = nilfs_cpfile_checkpoints_per_block(cpfile);
@@ -130,10 +112,65 @@ static void nilfs_cpfile_block_init(struct inode *cpfile,
}
}
-static inline int nilfs_cpfile_get_header_block(struct inode *cpfile,
- struct buffer_head **bhp)
+/**
+ * nilfs_cpfile_checkpoint_offset - calculate the byte offset of a checkpoint
+ * entry in the folio containing it
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number
+ * @bh: buffer head of block containing checkpoint indexed by @cno
+ *
+ * Return: Byte offset in the folio of the checkpoint specified by @cno.
+ */
+static size_t nilfs_cpfile_checkpoint_offset(const struct inode *cpfile,
+ __u64 cno,
+ struct buffer_head *bh)
+{
+ return offset_in_folio(bh->b_folio, bh->b_data) +
+ nilfs_cpfile_get_offset(cpfile, cno) *
+ NILFS_MDT(cpfile)->mi_entry_size;
+}
+
+/**
+ * nilfs_cpfile_cp_snapshot_list_offset - calculate the byte offset of a
+ * checkpoint snapshot list in the folio
+ * containing it
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number
+ * @bh: buffer head of block containing checkpoint indexed by @cno
+ *
+ * Return: Byte offset in the folio of the checkpoint snapshot list specified
+ * by @cno.
+ */
+static size_t nilfs_cpfile_cp_snapshot_list_offset(const struct inode *cpfile,
+ __u64 cno,
+ struct buffer_head *bh)
+{
+ return nilfs_cpfile_checkpoint_offset(cpfile, cno, bh) +
+ offsetof(struct nilfs_checkpoint, cp_snapshot_list);
+}
+
+/**
+ * nilfs_cpfile_ch_snapshot_list_offset - calculate the byte offset of the
+ * snapshot list in the header
+ *
+ * Return: Byte offset in the folio of the checkpoint snapshot list
+ */
+static size_t nilfs_cpfile_ch_snapshot_list_offset(void)
+{
+ return offsetof(struct nilfs_cpfile_header, ch_snapshot_list);
+}
+
+static int nilfs_cpfile_get_header_block(struct inode *cpfile,
+ struct buffer_head **bhp)
{
- return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp);
+ int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp);
+
+ if (unlikely(err == -ENOENT)) {
+ nilfs_error(cpfile->i_sb,
+ "missing header block in checkpoint metadata");
+ err = -EIO;
+ }
+ return err;
}
static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
@@ -146,6 +183,41 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
create, nilfs_cpfile_block_init, bhp);
}
+/**
+ * nilfs_cpfile_find_checkpoint_block - find and get a buffer on cpfile
+ * @cpfile: inode of cpfile
+ * @start_cno: start checkpoint number (inclusive)
+ * @end_cno: end checkpoint number (inclusive)
+ * @cnop: place to store the next checkpoint number
+ * @bhp: place to store a pointer to buffer_head struct
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - no block exists in the range.
+ * * %-ENOMEM - Insufficient memory available.
+ */
+static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile,
+ __u64 start_cno, __u64 end_cno,
+ __u64 *cnop,
+ struct buffer_head **bhp)
+{
+ unsigned long start, end, blkoff;
+ int ret;
+
+ if (unlikely(start_cno > end_cno))
+ return -ENOENT;
+
+ start = nilfs_cpfile_get_blkoff(cpfile, start_cno);
+ end = nilfs_cpfile_get_blkoff(cpfile, end_cno);
+
+ ret = nilfs_mdt_find_block(cpfile, start, end, &blkoff, bhp);
+ if (!ret)
+ *cnop = (blkoff == start) ? start_cno :
+ nilfs_cpfile_first_checkpoint_in_block(cpfile, blkoff);
+ return ret;
+}
+
static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
__u64 cno)
{
@@ -154,126 +226,232 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
}
/**
- * nilfs_cpfile_get_checkpoint - get a checkpoint
- * @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- * @create: create flag
- * @cpp: pointer to a checkpoint
- * @bhp: pointer to a buffer head
- *
- * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint
- * specified by @cno. A new checkpoint will be created if @cno is the current
- * checkpoint number and @create is nonzero.
+ * nilfs_cpfile_read_checkpoint - read a checkpoint entry in cpfile
+ * @cpfile: checkpoint file inode
+ * @cno: number of checkpoint entry to read
+ * @root: nilfs root object
+ * @ifile: ifile's inode to read and attach to @root
*
- * Return Value: On success, 0 is returned, and the checkpoint and the
- * buffer head of the buffer on which the checkpoint is located are stored in
- * the place pointed by @cpp and @bhp, respectively. On error, one of the
- * following negative error codes is returned.
+ * This function imports checkpoint information from the checkpoint file and
+ * stores it to the inode file given by @ifile and the nilfs root object
+ * given by @root.
*
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
+ */
+int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, struct inode *ifile)
+{
+ struct buffer_head *cp_bh;
+ struct nilfs_checkpoint *cp;
+ size_t offset;
+ int ret;
+
+ if (cno < 1 || cno > nilfs_mdt_cno(cpfile))
+ return -EINVAL;
+
+ down_read(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
+ if (unlikely(ret < 0)) {
+ if (ret == -ENOENT)
+ ret = -EINVAL;
+ goto out_sem;
+ }
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
+ if (nilfs_checkpoint_invalid(cp)) {
+ ret = -EINVAL;
+ goto put_cp;
+ }
+
+ ret = nilfs_read_inode_common(ifile, &cp->cp_ifile_inode);
+ if (unlikely(ret)) {
+ /*
+ * Since this inode is on a checkpoint entry, treat errors
+ * as metadata corruption.
+ */
+ nilfs_err(cpfile->i_sb,
+ "ifile inode (checkpoint number=%llu) corrupted",
+ (unsigned long long)cno);
+ ret = -EIO;
+ goto put_cp;
+ }
+
+ /* Configure the nilfs root object */
+ atomic64_set(&root->inodes_count, le64_to_cpu(cp->cp_inodes_count));
+ atomic64_set(&root->blocks_count, le64_to_cpu(cp->cp_blocks_count));
+ root->ifile = ifile;
+
+put_cp:
+ kunmap_local(cp);
+ brelse(cp_bh);
+out_sem:
+ up_read(&NILFS_MDT(cpfile)->mi_sem);
+ return ret;
+}
+
+/**
+ * nilfs_cpfile_create_checkpoint - create a checkpoint entry on cpfile
+ * @cpfile: checkpoint file inode
+ * @cno: number of checkpoint to set up
*
- * %-ENOENT - No such checkpoint.
+ * This function creates a checkpoint with the number specified by @cno on
+ * cpfile. If the specified checkpoint entry already exists due to a past
+ * failure, it will be reused without returning an error.
+ * In either case, the buffer of the block containing the checkpoint entry
+ * and the cpfile inode are made dirty for inclusion in the write log.
*
- * %-EINVAL - invalid checkpoint.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-EROFS - Read only filesystem
*/
-int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
- __u64 cno,
- int create,
- struct nilfs_checkpoint **cpp,
- struct buffer_head **bhp)
+int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno)
{
struct buffer_head *header_bh, *cp_bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
- void *kaddr;
+ size_t offset;
int ret;
- if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) ||
- (cno < nilfs_mdt_cno(cpfile) && create)))
- return -EINVAL;
+ if (WARN_ON_ONCE(cno < 1))
+ return -EIO;
down_write(&NILFS_MDT(cpfile)->mi_sem);
-
ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
- if (ret < 0)
+ if (unlikely(ret < 0))
goto out_sem;
- ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh);
- if (ret < 0)
+
+ ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh);
+ if (unlikely(ret < 0))
goto out_header;
- kaddr = kmap(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp)) {
- if (!create) {
- kunmap(cp_bh->b_page);
- brelse(cp_bh);
- ret = -ENOENT;
- goto out_header;
- }
/* a newly-created checkpoint */
nilfs_checkpoint_clear_invalid(cp);
+ kunmap_local(cp);
if (!nilfs_cpfile_is_in_first(cpfile, cno))
nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
- kaddr, 1);
- mark_buffer_dirty(cp_bh);
+ 1);
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh,
- kaddr);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_ncheckpoints, 1);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(header_bh);
- nilfs_mdt_mark_dirty(cpfile);
+ } else {
+ kunmap_local(cp);
}
- if (cpp != NULL)
- *cpp = cp;
- *bhp = cp_bh;
+ /* Force the buffer and the inode to become dirty */
+ mark_buffer_dirty(cp_bh);
+ brelse(cp_bh);
+ nilfs_mdt_mark_dirty(cpfile);
- out_header:
+out_header:
brelse(header_bh);
- out_sem:
+out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
- * nilfs_cpfile_put_checkpoint - put a checkpoint
- * @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- * @bh: buffer head
+ * nilfs_cpfile_finalize_checkpoint - fill in a checkpoint entry in cpfile
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number
+ * @root: nilfs root object
+ * @blkinc: number of blocks added by this checkpoint
+ * @ctime: checkpoint creation time
+ * @minor: minor checkpoint flag
+ *
+ * This function completes the checkpoint entry numbered by @cno in the
+ * cpfile with the data given by the arguments @root, @blkinc, @ctime, and
+ * @minor.
*
- * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint
- * specified by @cno. @bh must be the buffer head which has been returned by
- * a previous call to nilfs_cpfile_get_checkpoint() with @cno.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
*/
-void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno,
- struct buffer_head *bh)
+int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, __u64 blkinc,
+ time64_t ctime, bool minor)
{
- kunmap(bh->b_page);
- brelse(bh);
+ struct buffer_head *cp_bh;
+ struct nilfs_checkpoint *cp;
+ size_t offset;
+ int ret;
+
+ if (WARN_ON_ONCE(cno < 1))
+ return -EIO;
+
+ down_write(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
+ if (unlikely(ret < 0)) {
+ if (ret == -ENOENT)
+ goto error;
+ goto out_sem;
+ }
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
+ if (unlikely(nilfs_checkpoint_invalid(cp))) {
+ kunmap_local(cp);
+ brelse(cp_bh);
+ goto error;
+ }
+
+ cp->cp_snapshot_list.ssl_next = 0;
+ cp->cp_snapshot_list.ssl_prev = 0;
+ cp->cp_inodes_count = cpu_to_le64(atomic64_read(&root->inodes_count));
+ cp->cp_blocks_count = cpu_to_le64(atomic64_read(&root->blocks_count));
+ cp->cp_nblk_inc = cpu_to_le64(blkinc);
+ cp->cp_create = cpu_to_le64(ctime);
+ cp->cp_cno = cpu_to_le64(cno);
+
+ if (minor)
+ nilfs_checkpoint_set_minor(cp);
+ else
+ nilfs_checkpoint_clear_minor(cp);
+
+ nilfs_write_inode_common(root->ifile, &cp->cp_ifile_inode);
+ nilfs_bmap_write(NILFS_I(root->ifile)->i_bmap, &cp->cp_ifile_inode);
+
+ kunmap_local(cp);
+ brelse(cp_bh);
+out_sem:
+ up_write(&NILFS_MDT(cpfile)->mi_sem);
+ return ret;
+
+error:
+ nilfs_error(cpfile->i_sb,
+ "checkpoint finalization failed due to metadata corruption.");
+ ret = -EIO;
+ goto out_sem;
}
/**
* nilfs_cpfile_delete_checkpoints - delete checkpoints
* @cpfile: inode of checkpoint file
* @start: start checkpoint number
- * @end: end checkpoint numer
+ * @end: end checkpoint number
*
* Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in
* the period from @start to @end, excluding @end itself. The checkpoints
* which have been already deleted are ignored.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - invalid checkpoints.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid checkpoints.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
__u64 start,
@@ -284,14 +462,15 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
struct nilfs_checkpoint *cp;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
__u64 cno;
+ size_t offset;
void *kaddr;
unsigned long tnicps;
int ret, ncps, nicps, nss, count, i;
if (unlikely(start == 0 || start > end)) {
- printk(KERN_ERR "%s: invalid range of checkpoint numbers: "
- "[%llu, %llu)\n", __func__,
- (unsigned long long)start, (unsigned long long)end);
+ nilfs_err(cpfile->i_sb,
+ "cannot delete checkpoints: invalid range [%llu, %llu)",
+ (unsigned long long)start, (unsigned long long)end);
return -EINVAL;
}
@@ -314,9 +493,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
continue;
}
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(
- cpfile, cno, cp_bh, kaddr);
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kaddr = kmap_local_folio(cp_bh->b_folio, offset);
nicps = 0;
for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) {
if (nilfs_checkpoint_snapshot(cp)) {
@@ -326,43 +504,42 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
nicps++;
}
}
- if (nicps > 0) {
- tnicps += nicps;
- mark_buffer_dirty(cp_bh);
- nilfs_mdt_mark_dirty(cpfile);
- if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
- count =
- nilfs_cpfile_block_sub_valid_checkpoints(
- cpfile, cp_bh, kaddr, nicps);
- if (count == 0) {
- /* make hole */
- kunmap_atomic(kaddr);
- brelse(cp_bh);
- ret =
- nilfs_cpfile_delete_checkpoint_block(
- cpfile, cno);
- if (ret == 0)
- continue;
- printk(KERN_ERR
- "%s: cannot delete block\n",
- __func__);
- break;
- }
- }
+ kunmap_local(kaddr);
+
+ if (nicps <= 0) {
+ brelse(cp_bh);
+ continue;
+ }
+
+ tnicps += nicps;
+ mark_buffer_dirty(cp_bh);
+ nilfs_mdt_mark_dirty(cpfile);
+ if (nilfs_cpfile_is_in_first(cpfile, cno)) {
+ brelse(cp_bh);
+ continue;
}
- kunmap_atomic(kaddr);
+ count = nilfs_cpfile_block_sub_valid_checkpoints(cpfile, cp_bh,
+ nicps);
brelse(cp_bh);
+ if (count)
+ continue;
+
+ /* Delete the block if there are no more valid checkpoints */
+ ret = nilfs_cpfile_delete_checkpoint_block(cpfile, cno);
+ if (unlikely(ret)) {
+ nilfs_err(cpfile->i_sb,
+ "error %d deleting checkpoint block", ret);
+ break;
+ }
}
if (tnicps > 0) {
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh,
- kaddr);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
}
brelse(header_bh);
@@ -388,13 +565,15 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile,
}
static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
- void *buf, unsigned cisz, size_t nci)
+ void *buf, unsigned int cisz,
+ size_t nci)
{
struct nilfs_checkpoint *cp;
struct nilfs_cpinfo *ci = buf;
struct buffer_head *bh;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
__u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop;
+ size_t offset;
void *kaddr;
int n, ret;
int ncps, i;
@@ -403,17 +582,18 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
return -ENOENT; /* checkpoint number 0 is invalid */
down_read(&NILFS_MDT(cpfile)->mi_sem);
- for (n = 0; cno < cur_cno && n < nci; cno += ncps) {
- ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
- ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
+ for (n = 0; n < nci; cno += ncps) {
+ ret = nilfs_cpfile_find_checkpoint_block(
+ cpfile, cno, cur_cno - 1, &cno, &bh);
if (ret < 0) {
- if (ret != -ENOENT)
- goto out;
- continue; /* skip hole */
+ if (likely(ret == -ENOENT))
+ break;
+ goto out;
}
+ ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
- kaddr = kmap_atomic(bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh);
+ cp = kaddr = kmap_local_folio(bh->b_folio, offset);
for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
if (!nilfs_checkpoint_invalid(cp)) {
nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp,
@@ -422,7 +602,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
n++;
}
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
brelse(bh);
}
@@ -438,7 +618,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
}
static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
- void *buf, unsigned cisz, size_t nci)
+ void *buf, unsigned int cisz,
+ size_t nci)
{
struct buffer_head *bh;
struct nilfs_cpfile_header *header;
@@ -446,7 +627,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
struct nilfs_cpinfo *ci = buf;
__u64 curr = *cnop, next;
unsigned long curr_blkoff, next_blkoff;
- void *kaddr;
+ size_t offset;
int n = 0, ret;
down_read(&NILFS_MDT(cpfile)->mi_sem);
@@ -455,10 +636,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
ret = nilfs_cpfile_get_header_block(cpfile, &bh);
if (ret < 0)
goto out;
- kaddr = kmap_atomic(bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
+ header = kmap_local_folio(bh->b_folio, 0);
curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(bh);
if (curr == 0) {
ret = 0;
@@ -476,9 +656,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
ret = 0; /* No snapshots (started from a hole block) */
goto out;
}
- kaddr = kmap_atomic(bh->b_page);
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, curr, bh);
+ cp = kmap_local_folio(bh->b_folio, offset);
while (n < nci) {
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
curr = ~(__u64)0; /* Terminator */
if (unlikely(nilfs_checkpoint_invalid(cp) ||
!nilfs_checkpoint_snapshot(cp)))
@@ -490,9 +670,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
if (next == 0)
break; /* reach end of the snapshot list */
+ kunmap_local(cp);
next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
if (curr_blkoff != next_blkoff) {
- kunmap_atomic(kaddr);
brelse(bh);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
0, &bh);
@@ -500,12 +680,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
WARN_ON(ret == -ENOENT);
goto out;
}
- kaddr = kmap_atomic(bh->b_page);
}
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, next, bh);
+ cp = kmap_local_folio(bh->b_folio, offset);
curr = next;
curr_blkoff = next_blkoff;
}
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
brelse(bh);
*cnop = curr;
ret = n;
@@ -516,15 +697,33 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
}
/**
- * nilfs_cpfile_get_cpinfo -
- * @cpfile:
- * @cno:
- * @ci:
- * @nci:
+ * nilfs_cpfile_get_cpinfo - get information on checkpoints
+ * @cpfile: checkpoint file inode
+ * @cnop: place to pass a starting checkpoint number and receive a
+ * checkpoint number to continue the search
+ * @mode: mode of checkpoints that the caller wants to retrieve
+ * @buf: buffer for storing checkpoints' information
+ * @cisz: byte size of one checkpoint info item in array
+ * @nci: number of checkpoint info items to retrieve
+ *
+ * nilfs_cpfile_get_cpinfo() searches for checkpoints in @mode state
+ * starting from the checkpoint number stored in @cnop, and stores
+ * information about found checkpoints in @buf.
+ * The buffer pointed to by @buf must be large enough to store information
+ * for @nci checkpoints. If at least one checkpoint information is
+ * successfully retrieved, @cnop is updated to point to the checkpoint
+ * number to continue searching.
+ *
+ * Return: Count of checkpoint info items stored in the output buffer on
+ * success, or one of the following negative error codes on failure:
+ * * %-EINVAL - Invalid checkpoint mode.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Invalid checkpoint number specified.
*/
ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
- void *buf, unsigned cisz, size_t nci)
+ void *buf, unsigned int cisz, size_t nci)
{
switch (mode) {
case NILFS_CHECKPOINT:
@@ -537,9 +736,16 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
}
/**
- * nilfs_cpfile_delete_checkpoint -
- * @cpfile:
- * @cno:
+ * nilfs_cpfile_delete_checkpoint - delete a checkpoint
+ * @cpfile: checkpoint file inode
+ * @cno: checkpoint number to delete
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Checkpoint in use (snapshot specified).
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No valid checkpoint found.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
{
@@ -558,26 +764,6 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1);
}
-static struct nilfs_snapshot_list *
-nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile,
- __u64 cno,
- struct buffer_head *bh,
- void *kaddr)
-{
- struct nilfs_cpfile_header *header;
- struct nilfs_checkpoint *cp;
- struct nilfs_snapshot_list *list;
-
- if (cno != 0) {
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
- list = &cp->cp_snapshot_list;
- } else {
- header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
- list = &header->ch_snapshot_list;
- }
- return list;
-}
-
static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
{
struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh;
@@ -586,94 +772,103 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
struct nilfs_snapshot_list *list;
__u64 curr, prev;
unsigned long curr_blkoff, prev_blkoff;
- void *kaddr;
+ size_t offset, curr_list_offset, prev_list_offset;
int ret;
if (cno == 0)
return -ENOENT; /* checkpoint number 0 is invalid */
down_write(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
+ if (unlikely(ret < 0))
+ goto out_sem;
+
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
if (ret < 0)
- goto out_sem;
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ goto out_header;
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp)) {
ret = -ENOENT;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
if (nilfs_checkpoint_snapshot(cp)) {
ret = 0;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
- kunmap_atomic(kaddr);
-
- ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
- if (ret < 0)
- goto out_cp;
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
+ kunmap_local(cp);
+
+ /*
+ * Find the last snapshot before the checkpoint being changed to
+ * snapshot mode by going backwards through the snapshot list.
+ * Set "prev" to its checkpoint number, or 0 if not found.
+ */
+ header = kmap_local_folio(header_bh->b_folio, 0);
list = &header->ch_snapshot_list;
curr_bh = header_bh;
get_bh(curr_bh);
curr = 0;
curr_blkoff = 0;
+ curr_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
prev = le64_to_cpu(list->ssl_prev);
while (prev > cno) {
prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
curr = prev;
+ kunmap_local(list);
if (curr_blkoff != prev_blkoff) {
- kunmap_atomic(kaddr);
brelse(curr_bh);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
0, &curr_bh);
- if (ret < 0)
- goto out_header;
- kaddr = kmap_atomic(curr_bh->b_page);
+ if (unlikely(ret < 0))
+ goto out_cp;
}
+ curr_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, curr, curr_bh);
+ list = kmap_local_folio(curr_bh->b_folio, curr_list_offset);
curr_blkoff = prev_blkoff;
- cp = nilfs_cpfile_block_get_checkpoint(
- cpfile, curr, curr_bh, kaddr);
- list = &cp->cp_snapshot_list;
prev = le64_to_cpu(list->ssl_prev);
}
- kunmap_atomic(kaddr);
+ kunmap_local(list);
if (prev != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
&prev_bh);
if (ret < 0)
goto out_curr;
+
+ prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, prev, prev_bh);
} else {
prev_bh = header_bh;
get_bh(prev_bh);
+ prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
}
- kaddr = kmap_atomic(curr_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, curr, curr_bh, kaddr);
+ /* Update the list entry for the next snapshot */
+ list = kmap_local_folio(curr_bh->b_folio, curr_list_offset);
list->ssl_prev = cpu_to_le64(cno);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ /* Update the checkpoint being changed to a snapshot */
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
nilfs_checkpoint_set_snapshot(cp);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
- kaddr = kmap_atomic(prev_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, prev, prev_bh, kaddr);
+ /* Update the list entry for the previous snapshot */
+ list = kmap_local_folio(prev_bh->b_folio, prev_list_offset);
list->ssl_next = cpu_to_le64(cno);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
+ /* Update the statistics in the header */
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_nsnapshots, 1);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(prev_bh);
mark_buffer_dirty(curr_bh);
@@ -686,12 +881,12 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
out_curr:
brelse(curr_bh);
- out_header:
- brelse(header_bh);
-
out_cp:
brelse(cp_bh);
+ out_header:
+ brelse(header_bh);
+
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
@@ -704,79 +899,87 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
struct nilfs_checkpoint *cp;
struct nilfs_snapshot_list *list;
__u64 next, prev;
- void *kaddr;
+ size_t offset, next_list_offset, prev_list_offset;
int ret;
if (cno == 0)
return -ENOENT; /* checkpoint number 0 is invalid */
down_write(&NILFS_MDT(cpfile)->mi_sem);
+ ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
+ if (unlikely(ret < 0))
+ goto out_sem;
+
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
if (ret < 0)
- goto out_sem;
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ goto out_header;
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, cp_bh);
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp)) {
ret = -ENOENT;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
if (!nilfs_checkpoint_snapshot(cp)) {
ret = 0;
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
goto out_cp;
}
list = &cp->cp_snapshot_list;
next = le64_to_cpu(list->ssl_next);
prev = le64_to_cpu(list->ssl_prev);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
- ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
- if (ret < 0)
- goto out_cp;
if (next != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0,
&next_bh);
if (ret < 0)
- goto out_header;
+ goto out_cp;
+
+ next_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, next, next_bh);
} else {
next_bh = header_bh;
get_bh(next_bh);
+ next_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
}
if (prev != 0) {
ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
&prev_bh);
if (ret < 0)
goto out_next;
+
+ prev_list_offset = nilfs_cpfile_cp_snapshot_list_offset(
+ cpfile, prev, prev_bh);
} else {
prev_bh = header_bh;
get_bh(prev_bh);
+ prev_list_offset = nilfs_cpfile_ch_snapshot_list_offset();
}
- kaddr = kmap_atomic(next_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, next, next_bh, kaddr);
+ /* Update the list entry for the next snapshot */
+ list = kmap_local_folio(next_bh->b_folio, next_list_offset);
list->ssl_prev = cpu_to_le64(prev);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(prev_bh->b_page);
- list = nilfs_cpfile_block_get_snapshot_list(
- cpfile, prev, prev_bh, kaddr);
+ /* Update the list entry for the previous snapshot */
+ list = kmap_local_folio(prev_bh->b_folio, prev_list_offset);
list->ssl_next = cpu_to_le64(next);
- kunmap_atomic(kaddr);
+ kunmap_local(list);
- kaddr = kmap_atomic(cp_bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+ /* Update the snapshot being changed back to a plain checkpoint */
+ cp = kmap_local_folio(cp_bh->b_folio, offset);
cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
nilfs_checkpoint_clear_snapshot(cp);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
- kaddr = kmap_atomic(header_bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
+ /* Update the statistics in the header */
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->ch_nsnapshots, -1);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(next_bh);
mark_buffer_dirty(prev_bh);
@@ -789,43 +992,39 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
out_next:
brelse(next_bh);
- out_header:
- brelse(header_bh);
-
out_cp:
brelse(cp_bh);
+ out_header:
+ brelse(header_bh);
+
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
return ret;
}
/**
- * nilfs_cpfile_is_snapshot -
+ * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot
* @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- *
- * Description:
- *
- * Return Value: On success, 1 is returned if the checkpoint specified by
- * @cno is a snapshot, or 0 if not. On error, one of the following negative
- * error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * @cno: checkpoint number
*
- * %-ENOENT - No such checkpoint.
+ * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or
+ * one of the following negative error codes on failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No such checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
{
struct buffer_head *bh;
struct nilfs_checkpoint *cp;
- void *kaddr;
+ size_t offset;
int ret;
- /* CP number is invalid if it's zero or larger than the
- largest exist one.*/
+ /*
+ * CP number is invalid if it's zero or larger than the
+ * largest existing one.
+ */
if (cno == 0 || cno >= nilfs_mdt_cno(cpfile))
return -ENOENT;
down_read(&NILFS_MDT(cpfile)->mi_sem);
@@ -833,13 +1032,14 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
if (ret < 0)
goto out;
- kaddr = kmap_atomic(bh->b_page);
- cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
+
+ offset = nilfs_cpfile_checkpoint_offset(cpfile, cno, bh);
+ cp = kmap_local_folio(bh->b_folio, offset);
if (nilfs_checkpoint_invalid(cp))
ret = -ENOENT;
else
ret = nilfs_checkpoint_snapshot(cp);
- kunmap_atomic(kaddr);
+ kunmap_local(cp);
brelse(bh);
out:
@@ -851,19 +1051,16 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
* nilfs_cpfile_change_cpmode - change checkpoint mode
* @cpfile: inode of checkpoint file
* @cno: checkpoint number
- * @status: mode of checkpoint
+ * @mode: mode of checkpoint
*
* Description: nilfs_change_cpmode() changes the mode of the checkpoint
* specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - No such checkpoint.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No such checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
{
@@ -892,23 +1089,20 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
/**
* nilfs_cpfile_get_stat - get checkpoint statistics
* @cpfile: inode of checkpoint file
- * @stat: pointer to a structure of checkpoint statistics
+ * @cpstat: pointer to a structure of checkpoint statistics
*
* Description: nilfs_cpfile_get_stat() returns information about checkpoints.
+ * The checkpoint statistics are stored in the location pointed to by @cpstat.
*
- * Return Value: On success, 0 is returned, and checkpoints information is
- * stored in the place pointed by @stat. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
{
struct buffer_head *bh;
struct nilfs_cpfile_header *header;
- void *kaddr;
int ret;
down_read(&NILFS_MDT(cpfile)->mi_sem);
@@ -916,12 +1110,11 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
ret = nilfs_cpfile_get_header_block(cpfile, &bh);
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(bh->b_page);
- header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
+ header = kmap_local_folio(bh->b_folio, 0);
cpstat->cs_cno = nilfs_mdt_cno(cpfile);
cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(bh);
out_sem:
@@ -935,6 +1128,8 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
* @cpsize: size of a checkpoint entry
* @raw_inode: on-disk cpfile inode
* @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
struct nilfs_inode *raw_inode, struct inode **inodep)
@@ -942,10 +1137,18 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
struct inode *cpfile;
int err;
+ if (cpsize > sb->s_blocksize) {
+ nilfs_err(sb, "too large checkpoint size: %zu bytes", cpsize);
+ return -EINVAL;
+ } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) {
+ nilfs_err(sb, "too small checkpoint size: %zu bytes", cpsize);
+ return -EINVAL;
+ }
+
cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO);
if (unlikely(!cpfile))
return -ENOMEM;
- if (!(cpfile->i_state & I_NEW))
+ if (!(inode_state_read_once(cpfile) & I_NEW))
goto out;
err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0);
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index a242b9a314f9..f5b1d59289eb 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * cpfile.h - NILFS checkpoint file.
+ * NILFS checkpoint file.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#ifndef _NILFS_CPFILE_H
@@ -25,20 +12,23 @@
#include <linux/fs.h>
#include <linux/buffer_head.h>
-#include <linux/nilfs2_fs.h>
+#include <linux/nilfs2_api.h> /* nilfs_cpstat */
+#include <linux/nilfs2_ondisk.h> /* nilfs_inode, nilfs_checkpoint */
-int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
- struct nilfs_checkpoint **,
- struct buffer_head **);
-void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *);
+int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, struct inode *ifile);
+int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno);
+int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno,
+ struct nilfs_root *root, __u64 blkinc,
+ time64_t ctime, bool minor);
int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64);
int nilfs_cpfile_delete_checkpoint(struct inode *, __u64);
int nilfs_cpfile_change_cpmode(struct inode *, __u64, int);
int nilfs_cpfile_is_snapshot(struct inode *, __u64);
int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
-ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned,
- size_t);
+ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *,
+ unsigned int, size_t);
int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
struct nilfs_inode *raw_inode, struct inode **inodep);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index fa0f80308c2d..674380837ab9 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * dat.c - NILFS disk address translation.
+ * NILFS disk address translation.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#include <linux/types.h>
@@ -53,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
static int nilfs_dat_prepare_entry(struct inode *dat,
struct nilfs_palloc_req *req, int create)
{
- return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
- create, &req->pr_entry_bh);
+ int ret;
+
+ ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
+ create, &req->pr_entry_bh);
+ if (unlikely(ret == -ENOENT)) {
+ nilfs_err(dat->i_sb,
+ "DAT doesn't have a block to manage vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ /*
+ * Return internal code -EINVAL to notify bmap layer of
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
static void nilfs_dat_commit_entry(struct inode *dat,
@@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
- ret = nilfs_palloc_prepare_alloc_entry(dat, req);
+ ret = nilfs_palloc_prepare_alloc_entry(dat, req, true);
if (ret < 0)
return ret;
@@ -89,15 +89,15 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
entry->de_blocknr = cpu_to_le64(0);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
nilfs_palloc_commit_alloc_entry(dat, req);
nilfs_dat_commit_entry(dat, req);
@@ -113,41 +113,44 @@ static void nilfs_dat_commit_free(struct inode *dat,
struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
entry->de_blocknr = cpu_to_le64(0);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
nilfs_dat_commit_entry(dat, req);
+
+ if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
+ nilfs_error(dat->i_sb,
+ "state inconsistency probably due to duplicate use of vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ return;
+ }
nilfs_palloc_commit_free_entry(dat, req);
}
int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
{
- int ret;
-
- ret = nilfs_dat_prepare_entry(dat, req, 0);
- WARN_ON(ret == -ENOENT);
- return ret;
+ return nilfs_dat_prepare_entry(dat, req, 0);
}
void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
sector_t blocknr)
{
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
entry->de_blocknr = cpu_to_le64(blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
nilfs_dat_commit_entry(dat, req);
}
@@ -157,21 +160,19 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
struct nilfs_dat_entry *entry;
__u64 start;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_dat_prepare_entry(dat, req, 0);
- if (ret < 0) {
- WARN_ON(ret == -ENOENT);
+ if (ret < 0)
return ret;
- }
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
start = le64_to_cpu(entry->de_start);
blocknr = le64_to_cpu(entry->de_blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
if (blocknr == 0) {
ret = nilfs_palloc_prepare_free_entry(dat, req);
@@ -180,6 +181,15 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
return ret;
}
}
+ if (unlikely(start > nilfs_mdt_cno(dat))) {
+ nilfs_err(dat->i_sb,
+ "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)",
+ (unsigned long long)req->pr_entry_nr,
+ (unsigned long long)start,
+ (unsigned long long)nilfs_mdt_cno(dat));
+ nilfs_dat_abort_entry(dat, req);
+ return -EINVAL;
+ }
return 0;
}
@@ -190,11 +200,11 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
struct nilfs_dat_entry *entry;
__u64 start, end;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
end = start = le64_to_cpu(entry->de_start);
if (!dead) {
end = nilfs_mdt_cno(dat);
@@ -202,7 +212,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
}
entry->de_end = cpu_to_le64(end);
blocknr = le64_to_cpu(entry->de_blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
if (blocknr == 0)
nilfs_dat_commit_free(dat, req);
@@ -215,14 +225,14 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
struct nilfs_dat_entry *entry;
__u64 start;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(req->pr_entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
- req->pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr,
+ req->pr_entry_bh);
+ entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset);
start = le64_to_cpu(entry->de_start);
blocknr = le64_to_cpu(entry->de_blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
if (start == nilfs_mdt_cno(dat) && blocknr == 0)
nilfs_palloc_abort_free_entry(dat, req);
@@ -261,18 +271,16 @@ void nilfs_dat_abort_update(struct inode *dat,
}
/**
- * nilfs_dat_mark_dirty -
- * @dat: DAT file inode
+ * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified
+ * virtual block address entry as dirty
+ * @dat: DAT file inode
* @vblocknr: virtual block number
*
- * Description:
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid DAT entry (internal code).
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr)
{
@@ -295,14 +303,11 @@ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr)
* Description: nilfs_dat_freev() frees the virtual block numbers specified by
* @vblocknrs and @nitems.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - The virtual block number have not been allocated.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - The virtual block number have not been allocated.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems)
{
@@ -318,18 +323,16 @@ int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems)
* Description: nilfs_dat_move() changes the block number associated with
* @vblocknr to @blocknr.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
{
struct buffer_head *entry_bh;
struct nilfs_dat_entry *entry;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
@@ -352,20 +355,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
}
}
- kaddr = kmap_atomic(entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh);
+ entry = kmap_local_folio(entry_bh->b_folio, offset);
if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
- printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__,
- (unsigned long long)vblocknr,
- (unsigned long long)le64_to_cpu(entry->de_start),
- (unsigned long long)le64_to_cpu(entry->de_end));
- kunmap_atomic(kaddr);
+ nilfs_crit(dat->i_sb,
+ "%s: invalid vblocknr = %llu, [%llu, %llu)",
+ __func__, (unsigned long long)vblocknr,
+ (unsigned long long)le64_to_cpu(entry->de_start),
+ (unsigned long long)le64_to_cpu(entry->de_end));
+ kunmap_local(entry);
brelse(entry_bh);
return -EINVAL;
}
WARN_ON(blocknr == 0);
entry->de_blocknr = cpu_to_le64(blocknr);
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
mark_buffer_dirty(entry_bh);
nilfs_mdt_mark_dirty(dat);
@@ -382,24 +386,21 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
* @blocknrp: pointer to a block number
*
* Description: nilfs_dat_translate() maps the virtual block number @vblocknr
- * to the corresponding block number.
- *
- * Return Value: On success, 0 is returned and the block number associated
- * with @vblocknr is stored in the place pointed by @blocknrp. On error, one
- * of the following negative error codes is returned.
+ * to the corresponding block number. The block number associated with
+ * @vblocknr is stored in the place pointed to by @blocknrp.
*
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - A block number associated with @vblocknr does not exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - A block number associated with @vblocknr does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
{
struct buffer_head *entry_bh, *bh;
struct nilfs_dat_entry *entry;
sector_t blocknr;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
@@ -415,8 +416,8 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
}
}
- kaddr = kmap_atomic(entry_bh->b_page);
- entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh);
+ entry = kmap_local_folio(entry_bh->b_folio, offset);
blocknr = le64_to_cpu(entry->de_blocknr);
if (blocknr == 0) {
ret = -ENOENT;
@@ -425,20 +426,21 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
*blocknrp = blocknr;
out:
- kunmap_atomic(kaddr);
+ kunmap_local(entry);
brelse(entry_bh);
return ret;
}
-ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
+ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz,
size_t nvi)
{
struct buffer_head *entry_bh;
- struct nilfs_dat_entry *entry;
+ struct nilfs_dat_entry *entry, *first_entry;
struct nilfs_vinfo *vinfo = buf;
__u64 first, last;
- void *kaddr;
+ size_t offset;
unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block;
+ unsigned int entry_size = NILFS_MDT(dat)->mi_entry_size;
int i, j, n, ret;
for (i = 0; i < nvi; i += n) {
@@ -446,23 +448,28 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
0, &entry_bh);
if (ret < 0)
return ret;
- kaddr = kmap_atomic(entry_bh->b_page);
- /* last virtual block number in this block */
+
first = vinfo->vi_vblocknr;
- do_div(first, entries_per_block);
+ first = div64_ul(first, entries_per_block);
first *= entries_per_block;
+ /* first virtual block number in this block */
+
last = first + entries_per_block - 1;
+ /* last virtual block number in this block */
+
+ offset = nilfs_palloc_entry_offset(dat, first, entry_bh);
+ first_entry = kmap_local_folio(entry_bh->b_folio, offset);
for (j = i, n = 0;
j < nvi && vinfo->vi_vblocknr >= first &&
vinfo->vi_vblocknr <= last;
j++, n++, vinfo = (void *)vinfo + visz) {
- entry = nilfs_palloc_block_get_entry(
- dat, vinfo->vi_vblocknr, entry_bh, kaddr);
+ entry = (void *)first_entry +
+ (vinfo->vi_vblocknr - first) * entry_size;
vinfo->vi_start = le64_to_cpu(entry->de_start);
vinfo->vi_end = le64_to_cpu(entry->de_end);
vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
}
- kunmap_atomic(kaddr);
+ kunmap_local(first_entry);
brelse(entry_bh);
}
@@ -475,6 +482,8 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
* @entry_size: size of a dat entry
* @raw_inode: on-disk dat inode
* @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_inode *raw_inode, struct inode **inodep)
@@ -484,10 +493,20 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_dat_info *di;
int err;
+ if (entry_size > sb->s_blocksize) {
+ nilfs_err(sb, "too large DAT entry size: %zu bytes",
+ entry_size);
+ return -EINVAL;
+ } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) {
+ nilfs_err(sb, "too small DAT entry size: %zu bytes",
+ entry_size);
+ return -EINVAL;
+ }
+
dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO);
if (unlikely(!dat))
return -ENOMEM;
- if (!(dat->i_state & I_NEW))
+ if (!(inode_state_read_once(dat) & I_NEW))
goto out;
err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di));
@@ -501,7 +520,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size,
di = NILFS_DAT_I(dat);
lockdep_set_class(&di->mi.mi_sem, &dat_lock_key);
nilfs_palloc_setup_cache(dat, &di->palloc_cache);
- nilfs_mdt_setup_shadow_map(dat, &di->shadow);
+ err = nilfs_mdt_setup_shadow_map(dat, &di->shadow);
+ if (err)
+ goto failed;
err = nilfs_read_inode_common(dat, raw_inode);
if (err)
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index cbd8e9732503..468c82d26183 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * dat.h - NILFS disk address translation.
+ * NILFS disk address translation.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#ifndef _NILFS_DAT_H
@@ -26,6 +13,7 @@
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
+#include <linux/nilfs2_ondisk.h> /* nilfs_inode, nilfs_checkpoint */
struct nilfs_palloc_req;
@@ -51,7 +39,7 @@ void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
int nilfs_dat_mark_dirty(struct inode *, __u64);
int nilfs_dat_freev(struct inode *, __u64 *, size_t);
int nilfs_dat_move(struct inode *, __u64, sector_t);
-ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t);
+ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned int, size_t);
int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_inode *raw_inode, struct inode **inodep);
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 197a63e9d102..6ca3d74be1e1 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * dir.c - NILFS directory entry operations
+ * NILFS directory entry operations
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>
+ * Modified for NILFS by Amagai Yoshiji.
*/
/*
* linux/fs/ext2/dir.c
@@ -46,80 +33,91 @@
#include "nilfs.h"
#include "page.h"
-/*
- * nilfs uses block-sized chunks. Arguably, sector-sized ones would be
- * more robust, but we have what we have
- */
-static inline unsigned nilfs_chunk_size(struct inode *inode)
+static inline unsigned int nilfs_rec_len_from_disk(__le16 dlen)
{
- return inode->i_sb->s_blocksize;
+ unsigned int len = le16_to_cpu(dlen);
+
+#if (PAGE_SIZE >= 65536)
+ if (len == NILFS_MAX_REC_LEN)
+ return 1 << 16;
+#endif
+ return len;
}
-static inline void nilfs_put_page(struct page *page)
+static inline __le16 nilfs_rec_len_to_disk(unsigned int len)
{
- kunmap(page);
- page_cache_release(page);
+#if (PAGE_SIZE >= 65536)
+ if (len == (1 << 16))
+ return cpu_to_le16(NILFS_MAX_REC_LEN);
+
+ BUG_ON(len > (1 << 16));
+#endif
+ return cpu_to_le16(len);
}
-static inline unsigned long dir_pages(struct inode *inode)
+/*
+ * nilfs uses block-sized chunks. Arguably, sector-sized ones would be
+ * more robust, but we have what we have
+ */
+static inline unsigned int nilfs_chunk_size(struct inode *inode)
{
- return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
+ return inode->i_sb->s_blocksize;
}
/*
* Return the offset into page `page_nr' of the last valid
* byte in that page, plus one.
*/
-static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
+static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr)
{
- unsigned last_byte = inode->i_size;
+ u64 last_byte = inode->i_size;
- last_byte -= page_nr << PAGE_CACHE_SHIFT;
- if (last_byte > PAGE_CACHE_SIZE)
- last_byte = PAGE_CACHE_SIZE;
+ last_byte -= page_nr << PAGE_SHIFT;
+ if (last_byte > PAGE_SIZE)
+ last_byte = PAGE_SIZE;
return last_byte;
}
-static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to)
+static int nilfs_prepare_chunk(struct folio *folio, unsigned int from,
+ unsigned int to)
{
- loff_t pos = page_offset(page) + from;
- return __block_write_begin(page, pos, to - from, nilfs_get_block);
+ loff_t pos = folio_pos(folio) + from;
+
+ return __block_write_begin(folio, pos, to - from, nilfs_get_block);
}
-static void nilfs_commit_chunk(struct page *page,
- struct address_space *mapping,
- unsigned from, unsigned to)
+static void nilfs_commit_chunk(struct folio *folio,
+ struct address_space *mapping, size_t from, size_t to)
{
struct inode *dir = mapping->host;
- loff_t pos = page_offset(page) + from;
- unsigned len = to - from;
- unsigned nr_dirty, copied;
+ loff_t pos = folio_pos(folio) + from;
+ size_t copied, len = to - from;
+ unsigned int nr_dirty;
int err;
- nr_dirty = nilfs_page_count_clean_buffers(page, from, to);
- copied = block_write_end(NULL, mapping, pos, len, len, page, NULL);
+ nr_dirty = nilfs_page_count_clean_buffers(folio, from, to);
+ copied = block_write_end(pos, len, len, folio);
if (pos + copied > dir->i_size)
i_size_write(dir, pos + copied);
if (IS_DIRSYNC(dir))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
err = nilfs_set_file_dirty(dir, nr_dirty);
WARN_ON(err); /* do not happen */
- unlock_page(page);
+ folio_unlock(folio);
}
-static void nilfs_check_page(struct page *page)
+static bool nilfs_check_folio(struct folio *folio, char *kaddr)
{
- struct inode *dir = page->mapping->host;
+ struct inode *dir = folio->mapping->host;
struct super_block *sb = dir->i_sb;
- unsigned chunk_size = nilfs_chunk_size(dir);
- char *kaddr = page_address(page);
- unsigned offs, rec_len;
- unsigned limit = PAGE_CACHE_SIZE;
+ unsigned int chunk_size = nilfs_chunk_size(dir);
+ size_t offs, rec_len;
+ size_t limit = folio_size(folio);
struct nilfs_dir_entry *p;
char *error;
- if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
- limit = dir->i_size & ~PAGE_CACHE_MASK;
+ if (dir->i_size < folio_pos(folio) + limit) {
+ limit = dir->i_size - folio_pos(folio);
if (limit & (chunk_size - 1))
goto Ebadsize;
if (!limit)
@@ -137,20 +135,22 @@ static void nilfs_check_page(struct page *page)
goto Enamelen;
if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
goto Espan;
+ if (unlikely(p->inode &&
+ NILFS_PRIVATE_INODE(le64_to_cpu(p->inode))))
+ goto Einumber;
}
if (offs != limit)
goto Eend;
out:
- SetPageChecked(page);
- return;
+ folio_set_checked(folio);
+ return true;
/* Too bad, we had an error */
Ebadsize:
- nilfs_error(sb, "nilfs_check_page",
+ nilfs_error(sb,
"size of directory #%lu is not a multiple of chunk size",
- dir->i_ino
- );
+ dir->i_ino);
goto fail;
Eshort:
error = "rec_len is smaller than minimal";
@@ -163,41 +163,47 @@ Enamelen:
goto bad_entry;
Espan:
error = "directory entry across blocks";
+ goto bad_entry;
+Einumber:
+ error = "disallowed inode number";
bad_entry:
- nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - "
- "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
- dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
- (unsigned long) le64_to_cpu(p->inode),
+ nilfs_error(sb,
+ "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d",
+ dir->i_ino, error, (folio->index << PAGE_SHIFT) + offs,
+ (unsigned long)le64_to_cpu(p->inode),
rec_len, p->name_len);
goto fail;
Eend:
p = (struct nilfs_dir_entry *)(kaddr + offs);
- nilfs_error(sb, "nilfs_check_page",
- "entry in directory #%lu spans the page boundary"
- "offset=%lu, inode=%lu",
- dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
- (unsigned long) le64_to_cpu(p->inode));
+ nilfs_error(sb,
+ "entry in directory #%lu spans the page boundary offset=%lu, inode=%lu",
+ dir->i_ino, (folio->index << PAGE_SHIFT) + offs,
+ (unsigned long)le64_to_cpu(p->inode));
fail:
- SetPageChecked(page);
- SetPageError(page);
+ return false;
}
-static struct page *nilfs_get_page(struct inode *dir, unsigned long n)
+static void *nilfs_get_folio(struct inode *dir, unsigned long n,
+ struct folio **foliop)
{
struct address_space *mapping = dir->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
+ struct folio *folio = read_mapping_folio(mapping, n, NULL);
+ void *kaddr;
+
+ if (IS_ERR(folio))
+ return folio;
- if (!IS_ERR(page)) {
- kmap(page);
- if (!PageChecked(page))
- nilfs_check_page(page);
- if (PageError(page))
+ kaddr = kmap_local_folio(folio, 0);
+ if (unlikely(!folio_test_checked(folio))) {
+ if (!nilfs_check_folio(folio, kaddr))
goto fail;
}
- return page;
+
+ *foliop = folio;
+ return kaddr;
fail:
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return ERR_PTR(-EIO);
}
@@ -225,46 +231,14 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p)
nilfs_rec_len_from_disk(p->rec_len));
}
-static unsigned char
-nilfs_filetype_table[NILFS_FT_MAX] = {
- [NILFS_FT_UNKNOWN] = DT_UNKNOWN,
- [NILFS_FT_REG_FILE] = DT_REG,
- [NILFS_FT_DIR] = DT_DIR,
- [NILFS_FT_CHRDEV] = DT_CHR,
- [NILFS_FT_BLKDEV] = DT_BLK,
- [NILFS_FT_FIFO] = DT_FIFO,
- [NILFS_FT_SOCK] = DT_SOCK,
- [NILFS_FT_SYMLINK] = DT_LNK,
-};
-
-#define S_SHIFT 12
-static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
- [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK,
-};
-
-static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
-{
- umode_t mode = inode->i_mode;
-
- de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
-
static int nilfs_readdir(struct file *file, struct dir_context *ctx)
{
loff_t pos = ctx->pos;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
- unsigned int offset = pos & ~PAGE_CACHE_MASK;
- unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned int offset = pos & ~PAGE_MASK;
+ unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
-/* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */
if (pos > inode->i_size - NILFS_DIR_REC_LEN(1))
return 0;
@@ -272,102 +246,95 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx)
for ( ; n < npages; n++, offset = 0) {
char *kaddr, *limit;
struct nilfs_dir_entry *de;
- struct page *page = nilfs_get_page(inode, n);
+ struct folio *folio;
- if (IS_ERR(page)) {
- nilfs_error(sb, __func__, "bad page in #%lu",
- inode->i_ino);
- ctx->pos += PAGE_CACHE_SIZE - offset;
+ kaddr = nilfs_get_folio(inode, n, &folio);
+ if (IS_ERR(kaddr)) {
+ nilfs_error(sb, "bad page in #%lu", inode->i_ino);
+ ctx->pos += PAGE_SIZE - offset;
return -EIO;
}
- kaddr = page_address(page);
de = (struct nilfs_dir_entry *)(kaddr + offset);
limit = kaddr + nilfs_last_byte(inode, n) -
NILFS_DIR_REC_LEN(1);
for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) {
if (de->rec_len == 0) {
- nilfs_error(sb, __func__,
- "zero-length directory entry");
- nilfs_put_page(page);
+ nilfs_error(sb, "zero-length directory entry");
+ folio_release_kmap(folio, kaddr);
return -EIO;
}
if (de->inode) {
unsigned char t;
- if (de->file_type < NILFS_FT_MAX)
- t = nilfs_filetype_table[de->file_type];
- else
- t = DT_UNKNOWN;
+ t = fs_ftype_to_dtype(de->file_type);
if (!dir_emit(ctx, de->name, de->name_len,
le64_to_cpu(de->inode), t)) {
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return 0;
}
}
ctx->pos += nilfs_rec_len_from_disk(de->rec_len);
}
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
}
return 0;
}
/*
- * nilfs_find_entry()
+ * nilfs_find_entry()
*
- * finds an entry in the specified directory with the wanted name. It
- * returns the page in which the entry was found, and the entry itself
- * (as a parameter - res_dir). Page is returned mapped and unlocked.
- * Entry is guaranteed to be valid.
+ * Finds an entry in the specified directory with the wanted name. It
+ * returns the folio in which the entry was found, and the entry itself.
+ * The folio is mapped and unlocked. When the caller is finished with
+ * the entry, it should call folio_release_kmap().
+ *
+ * On failure, returns an error pointer and the caller should ignore foliop.
*/
-struct nilfs_dir_entry *
-nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
- struct page **res_page)
+struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir,
+ const struct qstr *qstr, struct folio **foliop)
{
const unsigned char *name = qstr->name;
int namelen = qstr->len;
- unsigned reclen = NILFS_DIR_REC_LEN(namelen);
+ unsigned int reclen = NILFS_DIR_REC_LEN(namelen);
unsigned long start, n;
unsigned long npages = dir_pages(dir);
- struct page *page = NULL;
struct nilfs_inode_info *ei = NILFS_I(dir);
struct nilfs_dir_entry *de;
if (npages == 0)
goto out;
- /* OFFSET_CACHE */
- *res_page = NULL;
-
start = ei->i_dir_start_lookup;
if (start >= npages)
start = 0;
n = start;
do {
- char *kaddr;
- page = nilfs_get_page(dir, n);
- if (!IS_ERR(page)) {
- kaddr = page_address(page);
- de = (struct nilfs_dir_entry *)kaddr;
- kaddr += nilfs_last_byte(dir, n) - reclen;
- while ((char *) de <= kaddr) {
- if (de->rec_len == 0) {
- nilfs_error(dir->i_sb, __func__,
- "zero-length directory entry");
- nilfs_put_page(page);
- goto out;
- }
- if (nilfs_match(namelen, name, de))
- goto found;
- de = nilfs_next_entry(de);
+ char *kaddr = nilfs_get_folio(dir, n, foliop);
+
+ if (IS_ERR(kaddr))
+ return ERR_CAST(kaddr);
+
+ de = (struct nilfs_dir_entry *)kaddr;
+ kaddr += nilfs_last_byte(dir, n) - reclen;
+ while ((char *)de <= kaddr) {
+ if (de->rec_len == 0) {
+ nilfs_error(dir->i_sb,
+ "zero-length directory entry");
+ folio_release_kmap(*foliop, kaddr);
+ goto out;
}
- nilfs_put_page(page);
+ if (nilfs_match(namelen, name, de))
+ goto found;
+ de = nilfs_next_entry(de);
}
+ folio_release_kmap(*foliop, kaddr);
+
if (++n >= npages)
n = 0;
- /* next page is past the blocks we've got */
- if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
- nilfs_error(dir->i_sb, __func__,
+ /* next folio is past the blocks we've got */
+ if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
+ nilfs_error(dir->i_sb,
"dir %lu size %lld exceeds block count %llu",
dir->i_ino, dir->i_size,
(unsigned long long)dir->i_blocks);
@@ -375,59 +342,83 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
}
} while (n != start);
out:
- return NULL;
+ return ERR_PTR(-ENOENT);
found:
- *res_page = page;
ei->i_dir_start_lookup = n;
return de;
}
-struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p)
+struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
{
- struct page *page = nilfs_get_page(dir, 0);
- struct nilfs_dir_entry *de = NULL;
+ struct folio *folio;
+ struct nilfs_dir_entry *de, *next_de;
+ size_t limit;
+ char *msg;
+
+ de = nilfs_get_folio(dir, 0, &folio);
+ if (IS_ERR(de))
+ return NULL;
+
+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
+ !nilfs_match(1, ".", de))) {
+ msg = "missing '.'";
+ goto fail;
+ }
- if (!IS_ERR(page)) {
- de = nilfs_next_entry(
- (struct nilfs_dir_entry *)page_address(page));
- *p = page;
+ next_de = nilfs_next_entry(de);
+ /*
+ * If "next_de" has not reached the end of the chunk, there is
+ * at least one more record. Check whether it matches "..".
+ */
+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
+ !nilfs_match(2, "..", next_de))) {
+ msg = "missing '..'";
+ goto fail;
}
- return de;
+ *foliop = folio;
+ return next_de;
+
+fail:
+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+ folio_release_kmap(folio, de);
+ return NULL;
}
-ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
+int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino)
{
- ino_t res = 0;
struct nilfs_dir_entry *de;
- struct page *page;
+ struct folio *folio;
- de = nilfs_find_entry(dir, qstr, &page);
- if (de) {
- res = le64_to_cpu(de->inode);
- kunmap(page);
- page_cache_release(page);
- }
- return res;
+ de = nilfs_find_entry(dir, qstr, &folio);
+ if (IS_ERR(de))
+ return PTR_ERR(de);
+
+ *ino = le64_to_cpu(de->inode);
+ folio_release_kmap(folio, de);
+ return 0;
}
-/* Releases the page */
-void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
- struct page *page, struct inode *inode)
+int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
+ struct folio *folio, struct inode *inode)
{
- unsigned from = (char *) de - (char *) page_address(page);
- unsigned to = from + nilfs_rec_len_from_disk(de->rec_len);
- struct address_space *mapping = page->mapping;
+ size_t from = offset_in_folio(folio, de);
+ size_t to = from + nilfs_rec_len_from_disk(de->rec_len);
+ struct address_space *mapping = folio->mapping;
int err;
- lock_page(page);
- err = nilfs_prepare_chunk(page, from, to);
- BUG_ON(err);
+ folio_lock(folio);
+ err = nilfs_prepare_chunk(folio, from, to);
+ if (unlikely(err)) {
+ folio_unlock(folio);
+ return err;
+ }
de->inode = cpu_to_le64(inode->i_ino);
- nilfs_set_de_type(de, inode);
- nilfs_commit_chunk(page, mapping, from, to);
- nilfs_put_page(page);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
+ nilfs_commit_chunk(folio, mapping, from, to);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
+ return 0;
}
/*
@@ -435,37 +426,34 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
*/
int nilfs_add_link(struct dentry *dentry, struct inode *inode)
{
- struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *dir = d_inode(dentry->d_parent);
const unsigned char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
- unsigned chunk_size = nilfs_chunk_size(dir);
- unsigned reclen = NILFS_DIR_REC_LEN(namelen);
+ unsigned int chunk_size = nilfs_chunk_size(dir);
+ unsigned int reclen = NILFS_DIR_REC_LEN(namelen);
unsigned short rec_len, name_len;
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct nilfs_dir_entry *de;
unsigned long npages = dir_pages(dir);
unsigned long n;
- char *kaddr;
- unsigned from, to;
+ size_t from, to;
int err;
/*
* We take care of directory expansion in the same loop.
- * This code plays outside i_size, so it locks the page
+ * This code plays outside i_size, so it locks the folio
* to protect that region.
*/
for (n = 0; n <= npages; n++) {
+ char *kaddr = nilfs_get_folio(dir, n, &folio);
char *dir_end;
- page = nilfs_get_page(dir, n);
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto out;
- lock_page(page);
- kaddr = page_address(page);
+ if (IS_ERR(kaddr))
+ return PTR_ERR(kaddr);
+ folio_lock(folio);
dir_end = kaddr + nilfs_last_byte(dir, n);
de = (struct nilfs_dir_entry *)kaddr;
- kaddr += PAGE_CACHE_SIZE - reclen;
+ kaddr += folio_size(folio) - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
/* We hit i_size */
@@ -476,7 +464,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
goto got_it;
}
if (de->rec_len == 0) {
- nilfs_error(dir->i_sb, __func__,
+ nilfs_error(dir->i_sb,
"zero-length directory entry");
err = -EIO;
goto out_unlock;
@@ -492,16 +480,16 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
goto got_it;
de = (struct nilfs_dir_entry *)((char *)de + rec_len);
}
- unlock_page(page);
- nilfs_put_page(page);
+ folio_unlock(folio);
+ folio_release_kmap(folio, kaddr);
}
BUG();
return -EINVAL;
got_it:
- from = (char *)de - (char *)page_address(page);
+ from = offset_in_folio(folio, de);
to = from + rec_len;
- err = nilfs_prepare_chunk(page, from, to);
+ err = nilfs_prepare_chunk(folio, from, to);
if (err)
goto out_unlock;
if (de->inode) {
@@ -515,39 +503,39 @@ got_it:
de->name_len = namelen;
memcpy(de->name, name, namelen);
de->inode = cpu_to_le64(inode->i_ino);
- nilfs_set_de_type(de, inode);
- nilfs_commit_chunk(page, page->mapping, from, to);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
+ nilfs_commit_chunk(folio, folio->mapping, from, to);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
nilfs_mark_inode_dirty(dir);
/* OFFSET_CACHE */
out_put:
- nilfs_put_page(page);
-out:
+ folio_release_kmap(folio, de);
return err;
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
goto out_put;
}
/*
* nilfs_delete_entry deletes a directory entry by merging it with the
- * previous entry. Page is up-to-date. Releases the page.
+ * previous entry. Folio is up-to-date.
*/
-int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
+int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct folio *folio)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
- char *kaddr = page_address(page);
- unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1);
- unsigned to = ((char *)dir - kaddr) +
- nilfs_rec_len_from_disk(dir->rec_len);
- struct nilfs_dir_entry *pde = NULL;
- struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from);
+ char *kaddr = (char *)((unsigned long)dir & ~(folio_size(folio) - 1));
+ size_t from, to;
+ struct nilfs_dir_entry *de, *pde = NULL;
int err;
+ from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1);
+ to = ((char *)dir - kaddr) + nilfs_rec_len_from_disk(dir->rec_len);
+ de = (struct nilfs_dir_entry *)(kaddr + from);
+
while ((char *)de < (char *)dir) {
if (de->rec_len == 0) {
- nilfs_error(inode->i_sb, __func__,
+ nilfs_error(inode->i_sb,
"zero-length directory entry");
err = -EIO;
goto out;
@@ -556,17 +544,19 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
de = nilfs_next_entry(de);
}
if (pde)
- from = (char *)pde - (char *)page_address(page);
- lock_page(page);
- err = nilfs_prepare_chunk(page, from, to);
- BUG_ON(err);
+ from = (char *)pde - kaddr;
+ folio_lock(folio);
+ err = nilfs_prepare_chunk(folio, from, to);
+ if (unlikely(err)) {
+ folio_unlock(folio);
+ goto out;
+ }
if (pde)
pde->rec_len = nilfs_rec_len_to_disk(to - from);
dir->inode = 0;
- nilfs_commit_chunk(page, mapping, from, to);
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ nilfs_commit_chunk(folio, mapping, from, to);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
out:
- nilfs_put_page(page);
return err;
}
@@ -576,39 +566,39 @@ out:
int nilfs_make_empty(struct inode *inode, struct inode *parent)
{
struct address_space *mapping = inode->i_mapping;
- struct page *page = grab_cache_page(mapping, 0);
- unsigned chunk_size = nilfs_chunk_size(inode);
+ struct folio *folio = filemap_grab_folio(mapping, 0);
+ unsigned int chunk_size = nilfs_chunk_size(inode);
struct nilfs_dir_entry *de;
int err;
void *kaddr;
- if (!page)
- return -ENOMEM;
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- err = nilfs_prepare_chunk(page, 0, chunk_size);
+ err = nilfs_prepare_chunk(folio, 0, chunk_size);
if (unlikely(err)) {
- unlock_page(page);
+ folio_unlock(folio);
goto fail;
}
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_folio(folio, 0);
memset(kaddr, 0, chunk_size);
de = (struct nilfs_dir_entry *)kaddr;
de->name_len = 1;
de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1));
memcpy(de->name, ".\0\0", 4);
de->inode = cpu_to_le64(inode->i_ino);
- nilfs_set_de_type(de, inode);
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1));
de->name_len = 2;
de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1));
de->inode = cpu_to_le64(parent->i_ino);
memcpy(de->name, "..\0", 4);
- nilfs_set_de_type(de, inode);
- kunmap_atomic(kaddr);
- nilfs_commit_chunk(page, mapping, 0, chunk_size);
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
+ kunmap_local(kaddr);
+ nilfs_commit_chunk(folio, mapping, 0, chunk_size);
fail:
- page_cache_release(page);
+ folio_put(folio);
return err;
}
@@ -617,26 +607,25 @@ fail:
*/
int nilfs_empty_dir(struct inode *inode)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
+ char *kaddr;
unsigned long i, npages = dir_pages(inode);
for (i = 0; i < npages; i++) {
- char *kaddr;
struct nilfs_dir_entry *de;
- page = nilfs_get_page(inode, i);
- if (IS_ERR(page))
- continue;
+ kaddr = nilfs_get_folio(inode, i, &folio);
+ if (IS_ERR(kaddr))
+ return 0;
- kaddr = page_address(page);
de = (struct nilfs_dir_entry *)kaddr;
kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1);
while ((char *)de <= kaddr) {
if (de->rec_len == 0) {
- nilfs_error(inode->i_sb, __func__,
- "zero-length directory entry "
- "(kaddr=%p, de=%p)\n", kaddr, de);
+ nilfs_error(inode->i_sb,
+ "zero-length directory entry (kaddr=%p, de=%p)",
+ kaddr, de);
goto not_empty;
}
if (de->inode != 0) {
@@ -654,19 +643,19 @@ int nilfs_empty_dir(struct inode *inode)
}
de = nilfs_next_entry(de);
}
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
}
return 1;
not_empty:
- nilfs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return 0;
}
const struct file_operations nilfs_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = nilfs_readdir,
+ .iterate_shared = nilfs_readdir,
.unlocked_ioctl = nilfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = nilfs_compat_ioctl,
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 82f4865e86dd..2d8dc6b35b54 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * direct.c - NILFS direct block pointer.
+ * NILFS direct block pointer.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#include <linux/errno.h>
@@ -62,7 +49,7 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *direct,
static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
__u64 key, __u64 *ptrp,
- unsigned maxblocks)
+ unsigned int maxblocks)
{
struct inode *dat = NULL;
__u64 ptr, ptr2;
@@ -79,11 +66,12 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
dat = nilfs_bmap_get_dat(direct);
ret = nilfs_dat_translate(dat, ptr, &blocknr);
if (ret < 0)
- return ret;
+ goto dat_error;
ptr = blocknr;
}
- maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1);
+ maxblocks = min_t(unsigned int, maxblocks,
+ NILFS_DIRECT_KEY_MAX - key + 1);
for (cnt = 1; cnt < maxblocks &&
(ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) !=
NILFS_BMAP_INVALID_PTR;
@@ -91,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
if (dat) {
ret = nilfs_dat_translate(dat, ptr2, &blocknr);
if (ret < 0)
- return ret;
+ goto dat_error;
ptr2 = blocknr;
}
if (ptr2 != ptr + cnt)
@@ -99,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
}
*ptrp = ptr;
return cnt;
+
+ dat_error:
+ if (ret == -ENOENT)
+ ret = -EINVAL; /* Notify bmap layer of metadata corruption */
+ return ret;
}
static __u64
@@ -110,9 +103,9 @@ nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key)
if (ptr != NILFS_BMAP_INVALID_PTR)
/* sequential access */
return ptr;
- else
- /* block group */
- return nilfs_bmap_find_target_in_group(direct);
+
+ /* block group */
+ return nilfs_bmap_find_target_in_group(direct);
}
static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
@@ -173,6 +166,21 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
return ret;
}
+static int nilfs_direct_seek_key(const struct nilfs_bmap *direct, __u64 start,
+ __u64 *keyp)
+{
+ __u64 key;
+
+ for (key = start; key <= NILFS_DIRECT_KEY_MAX; key++) {
+ if (nilfs_direct_get_ptr(direct, key) !=
+ NILFS_BMAP_INVALID_PTR) {
+ *keyp = key;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp)
{
__u64 key, lastkey;
@@ -265,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
dat = nilfs_bmap_get_dat(bmap);
key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(bmap, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
+ return -EINVAL;
+
if (!buffer_nilfs_volatile(bh)) {
oldreq.pr_entry_nr = ptr;
newreq.pr_entry_nr = ptr;
@@ -311,6 +322,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = 0;
+ memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}
@@ -325,14 +337,18 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
key = nilfs_bmap_data_get_key(bmap, *bh);
if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
- printk(KERN_CRIT "%s: invalid key: %llu\n", __func__,
- (unsigned long long)key);
+ nilfs_crit(bmap->b_inode->i_sb,
+ "%s (ino=%lu): invalid key: %llu",
+ __func__,
+ bmap->b_inode->i_ino, (unsigned long long)key);
return -EINVAL;
}
ptr = nilfs_direct_get_ptr(bmap, key);
if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
- printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__,
- (unsigned long long)ptr);
+ nilfs_crit(bmap->b_inode->i_sb,
+ "%s (ino=%lu): invalid pointer: %llu",
+ __func__,
+ bmap->b_inode->i_ino, (unsigned long long)ptr);
return -EINVAL;
}
@@ -355,7 +371,9 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = {
.bop_assign = nilfs_direct_assign,
.bop_mark = NULL,
+ .bop_seek_key = nilfs_direct_seek_key,
.bop_last_key = nilfs_direct_last_key,
+
.bop_check_insert = nilfs_direct_check_insert,
.bop_check_delete = NULL,
.bop_gather_data = nilfs_direct_gather_data,
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h
index dc643de20a25..b7ca896269af 100644
--- a/fs/nilfs2/direct.h
+++ b/fs/nilfs2/direct.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * direct.h - NILFS direct block pointer.
+ * NILFS direct block pointer.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#ifndef _NILFS_DIRECT_H
@@ -28,16 +15,6 @@
#include "bmap.h"
-/**
- * struct nilfs_direct_node - direct node
- * @dn_flags: flags
- * @dn_pad: padding
- */
-struct nilfs_direct_node {
- __u8 dn_flags;
- __u8 pad[7];
-};
-
#define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1)
#define NILFS_DIRECT_KEY_MIN 0
#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1)
diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h
index 19ccbf9522ab..d29fd837c42c 100644
--- a/fs/nilfs2/export.h
+++ b/fs/nilfs2/export.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef NILFS_EXPORT_H
#define NILFS_EXPORT_H
@@ -20,6 +21,6 @@ struct nilfs_fid {
u32 parent_gen;
u64 parent_ino;
-} __attribute__ ((packed));
+} __packed;
#endif
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 08fdb77852ac..1b8d754db44d 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -1,24 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * file.c - NILFS regular file handling primitives including fsync().
+ * NILFS regular file handling primitives including fsync().
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Amagai Yoshiji <amagai@osrg.net>,
- * Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Amagai Yoshiji and Ryusuke Konishi.
*/
#include <linux/fs.h>
@@ -39,61 +25,56 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
struct the_nilfs *nilfs;
struct inode *inode = file->f_mapping->host;
- int err;
-
- err = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (err)
- return err;
- mutex_lock(&inode->i_mutex);
+ int err = 0;
if (nilfs_inode_dirty(inode)) {
if (datasync)
err = nilfs_construct_dsync_segment(inode->i_sb, inode,
- 0, LLONG_MAX);
+ start, end);
else
err = nilfs_construct_segment(inode->i_sb);
}
- mutex_unlock(&inode->i_mutex);
nilfs = inode->i_sb->s_fs_info;
- if (!err && nilfs_test_opt(nilfs, BARRIER)) {
- err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- if (err != -EIO)
- err = 0;
- }
+ if (!err)
+ err = nilfs_flush_device(nilfs);
+
return err;
}
-static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct vm_area_struct *vma = vmf->vma;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vma->vm_file);
struct nilfs_transaction_info ti;
+ struct buffer_head *bh, *head;
int ret = 0;
if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
return VM_FAULT_SIGBUS; /* -ENOSPC */
sb_start_pagefault(inode->i_sb);
- lock_page(page);
- if (page->mapping != inode->i_mapping ||
- page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
- unlock_page(page);
+ folio_lock(folio);
+ if (folio->mapping != inode->i_mapping ||
+ folio_pos(folio) >= i_size_read(inode) ||
+ !folio_test_uptodate(folio)) {
+ folio_unlock(folio);
ret = -EFAULT; /* make the VM retry the fault */
goto out;
}
/*
- * check to see if the page is mapped already (no holes)
+ * check to see if the folio is mapped already (no holes)
*/
- if (PageMappedToDisk(page))
+ if (folio_test_mappedtodisk(folio))
goto mapped;
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
+ head = folio_buffers(folio);
+ if (head) {
int fully_mapped = 1;
- bh = head = page_buffers(page);
+ bh = head;
do {
if (!buffer_mapped(bh)) {
fully_mapped = 0;
@@ -102,11 +83,11 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
} while (bh = bh->b_this_page, bh != head);
if (fully_mapped) {
- SetPageMappedToDisk(page);
+ folio_set_mappedtodisk(folio);
goto mapped;
}
}
- unlock_page(page);
+ folio_unlock(folio);
/*
* fill hole blocks
@@ -117,7 +98,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
goto out;
file_update_time(vma->vm_file);
- ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
+ ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
if (ret) {
nilfs_transaction_abort(inode->i_sb);
goto out;
@@ -126,22 +107,28 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- wait_for_stable_page(page);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ folio_wait_writeback(folio);
out:
sb_end_pagefault(inode->i_sb);
- return block_page_mkwrite_return(ret);
+ return vmf_fs_error(ret);
}
static const struct vm_operations_struct nilfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = nilfs_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
-static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int nilfs_file_mmap_prepare(struct vm_area_desc *desc)
{
- file_accessed(file);
- vma->vm_ops = &nilfs_file_vm_ops;
+ file_accessed(desc->file);
+ desc->vm_ops = &nilfs_file_vm_ops;
return 0;
}
@@ -151,25 +138,26 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
*/
const struct file_operations nilfs_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
.unlocked_ioctl = nilfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = nilfs_compat_ioctl,
#endif /* CONFIG_COMPAT */
- .mmap = nilfs_file_mmap,
+ .mmap_prepare = nilfs_file_mmap_prepare,
.open = generic_file_open,
/* .release = nilfs_release_file, */
.fsync = nilfs_sync_file,
- .splice_read = generic_file_splice_read,
+ .splice_read = filemap_splice_read,
+ .splice_write = iter_file_splice_write,
};
const struct inode_operations nilfs_file_inode_operations = {
.setattr = nilfs_setattr,
.permission = nilfs_permission,
.fiemap = nilfs_fiemap,
+ .fileattr_get = nilfs_fileattr_get,
+ .fileattr_set = nilfs_fileattr_set,
};
/* end of file */
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 57ceaf33d177..561c220799c7 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -1,25 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * gcinode.c - dummy inodes to buffer blocks for garbage collection
+ * Dummy inodes to buffer blocks for garbage collection
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>,
- * and Ryusuke Konishi <ryusuke@osrg.net>.
- * Revised by Ryusuke Konishi <ryusuke@osrg.net>.
+ * Written by Seiji Kihara, Amagai Yoshiji, and Ryusuke Konishi.
+ * Revised by Ryusuke Konishi.
*
*/
/*
@@ -60,14 +46,11 @@
* specified by @pbn to the GC pagecache with the key @blkoff.
* This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer.
*
- * Return Value: On success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - The block specified with @pbn does not exist.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - The block specified with @pbn does not exist.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
sector_t pbn, __u64 vbn,
@@ -87,10 +70,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
- if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
- brelse(bh);
+ if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */
goto failed;
- }
}
lock_buffer(bh);
@@ -99,14 +80,12 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
goto out;
}
- if (!buffer_mapped(bh)) {
- bh->b_bdev = inode->i_sb->s_bdev;
+ if (!buffer_mapped(bh))
set_buffer_mapped(bh);
- }
bh->b_blocknr = pbn;
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, bh);
if (vbn)
bh->b_blocknr = vbn;
out:
@@ -114,8 +93,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
*out_bh = bh;
failed:
- unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
+ if (unlikely(err))
+ brelse(bh);
return err;
}
@@ -130,20 +111,20 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
* specified by @vbn to the GC pagecache. @pbn can be supplied by the
* caller to avoid translation of the disk block address.
*
- * Return Value: On success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Invalid virtual block address.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
__u64 vbn, struct buffer_head **out_bh)
{
+ struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode;
int ret;
- ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
- vbn ? : pbn, pbn, READ, out_bh, &pbn);
+ ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, vbn ? : pbn, pbn,
+ REQ_OP_READ, out_bh, &pbn);
if (ret == -EEXIST) /* internal code (cache hit) */
ret = 0;
return ret;
@@ -152,8 +133,15 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
{
wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
+ if (!buffer_uptodate(bh)) {
+ struct inode *inode = bh->b_folio->mapping->host;
+
+ nilfs_err(inode->i_sb,
+ "I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)",
+ buffer_nilfs_node(bh) ? "node" : "data",
+ inode->i_ino, (unsigned long long)bh->b_blocknr);
return -EIO;
+ }
if (buffer_dirty(bh))
return -EEXIST;
@@ -171,17 +159,17 @@ int nilfs_init_gcinode(struct inode *inode)
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- inode->i_mapping->a_ops = &empty_aops;
- inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
+ inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
ii->i_flags = 0;
nilfs_bmap_init_gc(ii->i_bmap);
- return 0;
+ return nilfs_attach_btree_node_cache(inode);
}
/**
* nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes
+ * @nilfs: NILFS filesystem instance
*/
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
{
@@ -192,7 +180,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
list_del_init(&ii->i_dirty);
truncate_inode_pages(&ii->vfs_inode.i_data, 0);
- nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
iput(&ii->vfs_inode);
}
}
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 6548c7851b48..99eb8a59009e 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -1,24 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * ifile.c - NILFS inode file
+ * NILFS inode file
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Amagai Yoshiji <amagai@osrg.net>.
- * Revised by Ryusuke Konishi <ryusuke@osrg.net>.
+ * Written by Amagai Yoshiji.
+ * Revised by Ryusuke Konishi.
*
*/
@@ -28,6 +15,7 @@
#include "mdt.h"
#include "alloc.h"
#include "ifile.h"
+#include "cpfile.h"
/**
* struct nilfs_ifile_info - on-memory private data of ifile
@@ -50,17 +38,16 @@ static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile)
* @out_ino: pointer to a variable to store inode number
* @out_bh: buffer_head contains newly allocated disk inode
*
- * Return Value: On success, 0 is returned and the newly allocated inode
- * number is stored in the place pointed by @ino, and buffer_head pointer
- * that contains newly allocated disk inode structure is stored in the
- * place pointed by @out_bh
- * On error, one of the following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * nilfs_ifile_create_inode() allocates a new inode in the ifile metadata
+ * file and stores the inode number in the variable pointed to by @out_ino,
+ * as well as storing the ifile's buffer with the disk inode in the location
+ * pointed to by @out_bh.
*
- * %-ENOSPC - No inode left.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No inode left.
*/
int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
struct buffer_head **out_bh)
@@ -68,11 +55,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
struct nilfs_palloc_req req;
int ret;
- req.pr_entry_nr = 0; /* 0 says find free inode from beginning of
- a group. dull code!! */
+ req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb);
req.pr_entry_bh = NULL;
- ret = nilfs_palloc_prepare_alloc_entry(ifile, &req);
+ ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false);
if (!ret) {
ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1,
&req.pr_entry_bh);
@@ -96,14 +82,11 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
* @ifile: ifile inode
* @ino: inode number
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - The inode number @ino have not been allocated.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Inode number unallocated.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
{
@@ -111,7 +94,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
.pr_entry_nr = ino, .pr_entry_bh = NULL
};
struct nilfs_inode *raw_inode;
- void *kaddr;
+ size_t offset;
int ret;
ret = nilfs_palloc_prepare_free_entry(ifile, &req);
@@ -126,11 +109,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
return ret;
}
- kaddr = kmap_atomic(req.pr_entry_bh->b_page);
- raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
- req.pr_entry_bh, kaddr);
+ offset = nilfs_palloc_entry_offset(ifile, req.pr_entry_nr,
+ req.pr_entry_bh);
+ raw_inode = kmap_local_folio(req.pr_entry_bh->b_folio, offset);
raw_inode->i_flags = 0;
- kunmap_atomic(kaddr);
+ kunmap_local(raw_inode);
mark_buffer_dirty(req.pr_entry_bh);
brelse(req.pr_entry_bh);
@@ -147,15 +130,14 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
int err;
if (unlikely(!NILFS_VALID_INODE(sb, ino))) {
- nilfs_error(sb, __func__, "bad inode number: %lu",
- (unsigned long) ino);
+ nilfs_error(sb, "bad inode number: %lu", (unsigned long)ino);
return -EINVAL;
}
err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
if (unlikely(err))
- nilfs_warning(sb, __func__, "unable to read inode: %lu",
- (unsigned long) ino);
+ nilfs_warn(sb, "error %d reading inode: ino=%lu",
+ err, (unsigned long)ino);
return err;
}
@@ -164,6 +146,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
* @ifile: ifile inode
* @nmaxinodes: current maximum of available inodes count [out]
* @nfreeinodes: free inodes count [out]
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_ifile_count_free_inodes(struct inode *ifile,
u64 *nmaxinodes, u64 *nfreeinodes)
@@ -185,21 +169,26 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile,
* nilfs_ifile_read - read or get ifile inode
* @sb: super block instance
* @root: root object
+ * @cno: number of checkpoint entry to read
* @inode_size: size of an inode
- * @raw_inode: on-disk ifile inode
- * @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid checkpoint.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EIO - I/O error (including metadata corruption).
*/
int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
- size_t inode_size, struct nilfs_inode *raw_inode,
- struct inode **inodep)
+ __u64 cno, size_t inode_size)
{
+ struct the_nilfs *nilfs;
struct inode *ifile;
int err;
ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO);
if (unlikely(!ifile))
return -ENOMEM;
- if (!(ifile->i_state & I_NEW))
+ if (!(inode_state_read_once(ifile) & I_NEW))
goto out;
err = nilfs_mdt_init(ifile, NILFS_MDT_GFP,
@@ -213,13 +202,13 @@ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache);
- err = nilfs_read_inode_common(ifile, raw_inode);
+ nilfs = sb->s_fs_info;
+ err = nilfs_cpfile_read_checkpoint(nilfs->ns_cpfile, cno, root, ifile);
if (err)
goto failed;
unlock_new_inode(ifile);
out:
- *inodep = ifile;
return 0;
failed:
iget_failed(ifile);
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index 679674d13372..5d116a566d9e 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -1,24 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * ifile.h - NILFS inode file
+ * NILFS inode file
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Amagai Yoshiji <amagai@osrg.net>
- * Revised by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Amagai Yoshiji.
+ * Revised by Ryusuke Konishi.
*
*/
@@ -27,7 +14,6 @@
#include <linux/fs.h>
#include <linux/buffer_head.h>
-#include <linux/nilfs2_fs.h>
#include "mdt.h"
#include "alloc.h"
@@ -35,14 +21,14 @@
static inline struct nilfs_inode *
nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
{
- void *kaddr = kmap(ibh->b_page);
- return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr);
+ size_t __offset_in_folio = nilfs_palloc_entry_offset(ifile, ino, ibh);
+
+ return kmap_local_folio(ibh->b_folio, __offset_in_folio);
}
-static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino,
- struct buffer_head *ibh)
+static inline void nilfs_ifile_unmap_inode(struct nilfs_inode *raw_inode)
{
- kunmap(ibh->b_page);
+ kunmap_local(raw_inode);
}
int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
@@ -52,7 +38,6 @@ int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *);
int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
- size_t inode_size, struct nilfs_inode *raw_inode,
- struct inode **inodep);
+ __u64 cno, size_t inode_size);
#endif /* _NILFS_IFILE_H */
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b1a5277cfd18..51bde45d5865 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -1,31 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * inode.c - NILFS inode operations.
+ * NILFS inode operations.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*
*/
#include <linux/buffer_head.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
+#include <linux/pagemap.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
+#include <linux/fiemap.h>
+#include <linux/random.h>
#include "nilfs.h"
#include "btnode.h"
#include "segment.h"
@@ -39,20 +29,22 @@
* @ino: inode number
* @cno: checkpoint number
* @root: pointer on NILFS root object (mounted checkpoint)
- * @for_gc: inode for GC flag
+ * @type: inode type
*/
struct nilfs_iget_args {
u64 ino;
__u64 cno;
struct nilfs_root *root;
- int for_gc;
+ unsigned int type;
};
+static int nilfs_iget_test(struct inode *inode, void *opaque);
+
void nilfs_inode_add_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_add_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_add(n, &root->blocks_count);
}
@@ -61,21 +53,23 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_sub_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_sub(n, &root->blocks_count);
}
/**
* nilfs_get_block() - get a file block on the filesystem (callback function)
- * @inode - inode struct of the target file
- * @blkoff - file block number
- * @bh_result - buffer head to be mapped on
- * @create - indicate whether allocating the block or not when it has not
+ * @inode: inode struct of the target file
+ * @blkoff: file block number
+ * @bh_result: buffer head to be mapped on
+ * @create: indicate whether allocating the block or not when it has not
* been allocated yet.
*
* This function does not issue actual read request of the specified data
* block. It is done by VFS.
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_get_block(struct inode *inode, sector_t blkoff,
struct buffer_head *bh_result, int create)
@@ -84,7 +78,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
__u64 blknum = 0;
int err = 0, ret;
- unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
+ unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits;
down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
@@ -103,7 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
if (unlikely(err))
goto out;
- err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff,
+ err = nilfs_bmap_insert(ii->i_bmap, blkoff,
(unsigned long)bh_result);
if (unlikely(err != 0)) {
if (err == -EEXIST) {
@@ -113,28 +107,28 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
* However, the page having this block must
* be locked in this case.
*/
- printk(KERN_WARNING
- "nilfs_get_block: a race condition "
- "while inserting a data block. "
- "(inode number=%lu, file block "
- "offset=%llu)\n",
- inode->i_ino,
- (unsigned long long)blkoff);
- err = 0;
+ nilfs_warn(inode->i_sb,
+ "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
+ __func__, inode->i_ino,
+ (unsigned long long)blkoff);
+ err = -EAGAIN;
}
nilfs_transaction_abort(inode->i_sb);
goto out;
}
- nilfs_mark_inode_dirty(inode);
+ nilfs_mark_inode_dirty_sync(inode);
nilfs_transaction_commit(inode->i_sb); /* never fails */
/* Error handling should be detailed */
set_buffer_new(bh_result);
set_buffer_delay(bh_result);
- map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed
- to proper value */
+ map_bh(bh_result, inode->i_sb, 0);
+ /* Disk block number must be changed to proper value */
+
} else if (ret == -ENOENT) {
- /* not found is not error (e.g. hole); must return without
- the mapped state flag. */
+ /*
+ * not found is not error (e.g. hole); must return without
+ * the mapped state flag.
+ */
;
} else {
err = ret;
@@ -145,28 +139,21 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
}
/**
- * nilfs_readpage() - implement readpage() method of nilfs_aops {}
+ * nilfs_read_folio() - implement read_folio() method of nilfs_aops {}
* address_space_operations.
- * @file - file struct of the file to be read
- * @page - the page to be read
+ * @file: file struct of the file to be read
+ * @folio: the folio to be read
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
-static int nilfs_readpage(struct file *file, struct page *page)
+static int nilfs_read_folio(struct file *file, struct folio *folio)
{
- return mpage_readpage(page, nilfs_get_block);
+ return mpage_read_folio(folio, nilfs_get_block);
}
-/**
- * nilfs_readpages() - implement readpages() method of nilfs_aops {}
- * address_space_operations.
- * @file - file struct of the file to be read
- * @mapping - address_space struct used for reading multiple pages
- * @pages - the pages to be read
- * @nr_pages - number of pages to be read
- */
-static int nilfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static void nilfs_readahead(struct readahead_control *rac)
{
- return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block);
+ mpage_readahead(rac, nilfs_get_block);
}
static int nilfs_writepages(struct address_space *mapping,
@@ -175,8 +162,8 @@ static int nilfs_writepages(struct address_space *mapping,
struct inode *inode = mapping->host;
int err = 0;
- if (inode->i_sb->s_flags & MS_RDONLY) {
- nilfs_clear_dirty_pages(mapping, false);
+ if (sb_rdonly(inode->i_sb)) {
+ nilfs_clear_dirty_pages(mapping);
return -EROFS;
}
@@ -187,53 +174,22 @@ static int nilfs_writepages(struct address_space *mapping,
return err;
}
-static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- int err;
-
- if (inode->i_sb->s_flags & MS_RDONLY) {
- /*
- * It means that filesystem was remounted in read-only
- * mode because of error or metadata corruption. But we
- * have dirty pages that try to be flushed in background.
- * So, here we simply discard this dirty page.
- */
- nilfs_clear_dirty_page(page, false);
- unlock_page(page);
- return -EROFS;
- }
-
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- err = nilfs_construct_segment(inode->i_sb);
- if (unlikely(err))
- return err;
- } else if (wbc->for_reclaim)
- nilfs_flush_segment(inode->i_sb, inode->i_ino);
-
- return 0;
-}
-
-static int nilfs_set_page_dirty(struct page *page)
+static bool nilfs_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
- int ret = __set_page_dirty_nobuffers(page);
+ struct inode *inode = mapping->host;
+ struct buffer_head *head;
+ unsigned int nr_dirty = 0;
+ bool ret = filemap_dirty_folio(mapping, folio);
- if (page_has_buffers(page)) {
- struct inode *inode = page->mapping->host;
- unsigned nr_dirty = 0;
- struct buffer_head *bh, *head;
+ /*
+ * The page may not be locked, eg if called from try_to_unmap_one()
+ */
+ spin_lock(&mapping->i_private_lock);
+ head = folio_buffers(folio);
+ if (head) {
+ struct buffer_head *bh = head;
- /*
- * This page is locked by callers, and no other thread
- * concurrently marks its buffers dirty since they are
- * only dirtied through routines in fs/buffer.c in
- * which call sites of mark_buffer_dirty are protected
- * by page lock.
- */
- bh = head = page_buffers(page);
do {
/* Do not mark hole blocks dirty */
if (buffer_dirty(bh) || !buffer_mapped(bh))
@@ -242,10 +198,13 @@ static int nilfs_set_page_dirty(struct page *page)
set_buffer_dirty(bh);
nr_dirty++;
} while (bh = bh->b_this_page, bh != head);
-
- if (nr_dirty)
- nilfs_set_file_dirty(inode, nr_dirty);
+ } else if (ret) {
+ nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
}
+ spin_unlock(&mapping->i_private_lock);
+
+ if (nr_dirty)
+ nilfs_set_file_dirty(inode, nr_dirty);
return ret;
}
@@ -254,14 +213,15 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host;
if (to > inode->i_size) {
- truncate_pagecache(inode, to, inode->i_size);
+ truncate_pagecache(inode, inode->i_size);
nilfs_truncate(inode);
}
}
-static int nilfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
+static int nilfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
@@ -270,8 +230,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
if (unlikely(err))
return err;
- err = block_write_begin(mapping, pos, len, flags, pagep,
- nilfs_get_block);
+ err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block);
if (unlikely(err)) {
nilfs_write_failed(mapping, pos + len);
nilfs_transaction_abort(inode->i_sb);
@@ -279,18 +238,19 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
return err;
}
-static int nilfs_write_end(struct file *file, struct address_space *mapping,
+static int nilfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
- unsigned start = pos & (PAGE_CACHE_SIZE - 1);
- unsigned nr_dirty;
+ unsigned int start = pos & (PAGE_SIZE - 1);
+ unsigned int nr_dirty;
int err;
- nr_dirty = nilfs_page_count_clean_buffers(page, start,
+ nr_dirty = nilfs_page_count_clean_buffers(folio, start,
start + copied);
- copied = generic_write_end(file, mapping, pos, len, copied, page,
+ copied = generic_write_end(iocb, mapping, pos, len, copied, folio,
fsdata);
nilfs_set_file_dirty(inode, nr_dirty);
err = nilfs_transaction_commit(inode->i_sb);
@@ -298,57 +258,52 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
}
static ssize_t
-nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs)
+nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = file->f_mapping->host;
- ssize_t size;
+ struct inode *inode = file_inode(iocb->ki_filp);
- if (rw == WRITE)
+ if (iov_iter_rw(iter) == WRITE)
return 0;
/* Needs synchronization with the cleaner */
- size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
- nilfs_get_block);
-
- /*
- * In case of error extending write may have instantiated a few
- * blocks outside i_size. Trim these off again.
- */
- if (unlikely((rw & WRITE) && size < 0)) {
- loff_t isize = i_size_read(inode);
- loff_t end = offset + iov_length(iov, nr_segs);
-
- if (end > isize)
- nilfs_write_failed(mapping, end);
- }
-
- return size;
+ return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block);
}
const struct address_space_operations nilfs_aops = {
- .writepage = nilfs_writepage,
- .readpage = nilfs_readpage,
+ .read_folio = nilfs_read_folio,
.writepages = nilfs_writepages,
- .set_page_dirty = nilfs_set_page_dirty,
- .readpages = nilfs_readpages,
+ .dirty_folio = nilfs_dirty_folio,
+ .readahead = nilfs_readahead,
.write_begin = nilfs_write_begin,
.write_end = nilfs_write_end,
- /* .releasepage = nilfs_releasepage, */
- .invalidatepage = block_invalidatepage,
+ .invalidate_folio = block_invalidate_folio,
.direct_IO = nilfs_direct_IO,
+ .migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
};
+const struct address_space_operations nilfs_buffer_cache_aops = {
+ .invalidate_folio = block_invalidate_folio,
+};
+
+static int nilfs_insert_inode_locked(struct inode *inode,
+ struct nilfs_root *root,
+ unsigned long ino)
+{
+ struct nilfs_iget_args args = {
+ .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
+ };
+
+ return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
+}
+
struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
{
struct super_block *sb = dir->i_sb;
- struct the_nilfs *nilfs = sb->s_fs_info;
struct inode *inode;
struct nilfs_inode_info *ii;
struct nilfs_root *root;
+ struct buffer_head *bh;
int err = -ENOMEM;
ino_t ino;
@@ -357,27 +312,29 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
goto failed;
mapping_set_gfp_mask(inode->i_mapping,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
root = NILFS_I(dir)->i_root;
ii = NILFS_I(inode);
- ii->i_state = 1 << NILFS_I_NEW;
+ ii->i_state = BIT(NILFS_I_NEW);
+ ii->i_type = NILFS_I_TYPE_NORMAL;
ii->i_root = root;
- err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh);
+ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
if (unlikely(err))
goto failed_ifile_create_inode;
/* reference count of i_bh inherits from nilfs_mdt_read_block() */
+ ii->i_bh = bh;
atomic64_inc(&root->inodes_count);
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = ino;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ simple_inode_init_ts(inode);
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
err = nilfs_bmap_read(ii->i_bmap, NULL);
if (err < 0)
- goto failed_bmap;
+ goto failed_after_creation;
set_bit(NILFS_I_BMAP, &ii->i_state);
/* No lock is needed; iget() ensures it. */
@@ -390,30 +347,35 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
/* ii->i_dir_acl = 0; */
ii->i_dir_start_lookup = 0;
nilfs_set_inode_flags(inode);
- spin_lock(&nilfs->ns_next_gen_lock);
- inode->i_generation = nilfs->ns_next_generation++;
- spin_unlock(&nilfs->ns_next_gen_lock);
- insert_inode_hash(inode);
+ inode->i_generation = get_random_u32();
+ if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
+ err = -EIO;
+ goto failed_after_creation;
+ }
err = nilfs_init_acl(inode, dir);
if (unlikely(err))
- goto failed_acl; /* never occur. When supporting
- nilfs_init_acl(), proper cancellation of
- above jobs should be considered */
+ /*
+ * Never occur. When supporting nilfs_init_acl(),
+ * proper cancellation of above jobs should be considered.
+ */
+ goto failed_after_creation;
return inode;
- failed_acl:
- failed_bmap:
+ failed_after_creation:
clear_nlink(inode);
- iput(inode); /* raw_inode will be deleted through
- generic_delete_inode() */
+ if (inode_state_read_once(inode) & I_NEW)
+ unlock_new_inode(inode);
+ iput(inode); /*
+ * raw_inode will be deleted through
+ * nilfs_evict_inode().
+ */
goto failed;
failed_ifile_create_inode:
make_bad_inode(inode);
- iput(inode); /* if i_nlink == 1, generic_forget_inode() will be
- called */
+ iput(inode);
failed:
return ERR_PTR(err);
}
@@ -421,21 +383,20 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
void nilfs_set_inode_flags(struct inode *inode)
{
unsigned int flags = NILFS_I(inode)->i_flags;
+ unsigned int new_fl = 0;
- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
- S_DIRSYNC);
if (flags & FS_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & FS_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
- mapping_set_gfp_mask(inode->i_mapping,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ new_fl |= S_DIRSYNC;
+ inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
+ S_NOATIME | S_DIRSYNC);
}
int nilfs_read_inode_common(struct inode *inode,
@@ -449,14 +410,16 @@ int nilfs_read_inode_common(struct inode *inode,
i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
inode->i_size = le64_to_cpu(raw_inode->i_size);
- inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
- inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
- inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
- inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
- inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
- inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
- if (inode->i_nlink == 0 && inode->i_mode == 0)
- return -EINVAL; /* this inode is deleted */
+ inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime),
+ le32_to_cpu(raw_inode->i_mtime_nsec));
+ inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
+ le32_to_cpu(raw_inode->i_ctime_nsec));
+ inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime),
+ le32_to_cpu(raw_inode->i_mtime_nsec));
+ if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
+ return -EIO; /* this inode is for metadata and corrupted */
+ if (inode->i_nlink == 0)
+ return -ESTALE; /* this inode is deleted */
inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
ii->i_flags = le32_to_cpu(raw_inode->i_flags);
@@ -509,21 +472,31 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_mapping->a_ops = &nilfs_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
- } else {
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &nilfs_special_inode_operations;
init_special_inode(
inode, inode->i_mode,
huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
+ } else {
+ nilfs_error(sb,
+ "invalid file type bits in mode 0%o for inode %lu",
+ inode->i_mode, ino);
+ err = -EIO;
+ goto failed_unmap;
}
- nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+ nilfs_ifile_unmap_inode(raw_inode);
brelse(bh);
up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
nilfs_set_inode_flags(inode);
+ mapping_set_gfp_mask(inode->i_mapping,
+ mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
return 0;
failed_unmap:
- nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+ nilfs_ifile_unmap_inode(raw_inode);
brelse(bh);
bad_inode:
@@ -540,10 +513,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque)
return 0;
ii = NILFS_I(inode);
- if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
- return !args->for_gc;
+ if (ii->i_type != args->type)
+ return 0;
- return args->for_gc && args->cno == ii->i_cno;
+ return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno;
}
static int nilfs_iget_set(struct inode *inode, void *opaque)
@@ -551,15 +524,11 @@ static int nilfs_iget_set(struct inode *inode, void *opaque)
struct nilfs_iget_args *args = opaque;
inode->i_ino = args->ino;
- if (args->for_gc) {
- NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE;
- NILFS_I(inode)->i_cno = args->cno;
- NILFS_I(inode)->i_root = NULL;
- } else {
- if (args->root && args->ino == NILFS_ROOT_INO)
- nilfs_get_root(args->root);
- NILFS_I(inode)->i_root = args->root;
- }
+ NILFS_I(inode)->i_cno = args->cno;
+ NILFS_I(inode)->i_root = args->root;
+ NILFS_I(inode)->i_type = args->type;
+ if (args->root && args->ino == NILFS_ROOT_INO)
+ nilfs_get_root(args->root);
return 0;
}
@@ -567,7 +536,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
unsigned long ino)
{
struct nilfs_iget_args args = {
- .ino = ino, .root = root, .cno = 0, .for_gc = 0
+ .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
};
return ilookup5(sb, ino, nilfs_iget_test, &args);
@@ -577,7 +546,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
unsigned long ino)
{
struct nilfs_iget_args args = {
- .ino = ino, .root = root, .cno = 0, .for_gc = 0
+ .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
};
return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
@@ -592,8 +561,14 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
inode = nilfs_iget_locked(sb, root, ino);
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+
+ if (!(inode_state_read_once(inode) & I_NEW)) {
+ if (!inode->i_nlink) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
return inode;
+ }
err = __nilfs_read_inode(sb, root, ino, inode);
if (unlikely(err)) {
@@ -608,7 +583,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
__u64 cno)
{
struct nilfs_iget_args args = {
- .ino = ino, .root = NULL, .cno = cno, .for_gc = 1
+ .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC
};
struct inode *inode;
int err;
@@ -616,7 +591,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (!(inode_state_read_once(inode) & I_NEW))
return inode;
err = nilfs_init_gcinode(inode);
@@ -628,8 +603,118 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
return inode;
}
+/**
+ * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
+ * @inode: inode object
+ *
+ * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
+ * or does nothing if the inode already has it. This function allocates
+ * an additional inode to maintain page cache of B-tree nodes one-on-one.
+ *
+ * Return: 0 on success, or %-ENOMEM if memory is insufficient.
+ */
+int nilfs_attach_btree_node_cache(struct inode *inode)
+{
+ struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct inode *btnc_inode;
+ struct nilfs_iget_args args;
+
+ if (ii->i_assoc_inode)
+ return 0;
+
+ args.ino = inode->i_ino;
+ args.root = ii->i_root;
+ args.cno = ii->i_cno;
+ args.type = ii->i_type | NILFS_I_TYPE_BTNC;
+
+ btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
+ nilfs_iget_set, &args);
+ if (unlikely(!btnc_inode))
+ return -ENOMEM;
+ if (inode_state_read_once(btnc_inode) & I_NEW) {
+ nilfs_init_btnc_inode(btnc_inode);
+ unlock_new_inode(btnc_inode);
+ }
+ NILFS_I(btnc_inode)->i_assoc_inode = inode;
+ NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
+ ii->i_assoc_inode = btnc_inode;
+
+ return 0;
+}
+
+/**
+ * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
+ * @inode: inode object
+ *
+ * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
+ * holder inode bound to @inode, or does nothing if @inode doesn't have it.
+ */
+void nilfs_detach_btree_node_cache(struct inode *inode)
+{
+ struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct inode *btnc_inode = ii->i_assoc_inode;
+
+ if (btnc_inode) {
+ NILFS_I(btnc_inode)->i_assoc_inode = NULL;
+ ii->i_assoc_inode = NULL;
+ iput(btnc_inode);
+ }
+}
+
+/**
+ * nilfs_iget_for_shadow - obtain inode for shadow mapping
+ * @inode: inode object that uses shadow mapping
+ *
+ * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
+ * caches for shadow mapping. The page cache for data pages is set up
+ * in one inode and the one for b-tree node pages is set up in the
+ * other inode, which is attached to the former inode.
+ *
+ * Return: a pointer to the inode for data pages on success, or %-ENOMEM
+ * if memory is insufficient.
+ */
+struct inode *nilfs_iget_for_shadow(struct inode *inode)
+{
+ struct nilfs_iget_args args = {
+ .ino = inode->i_ino, .root = NULL, .cno = 0,
+ .type = NILFS_I_TYPE_SHADOW
+ };
+ struct inode *s_inode;
+ int err;
+
+ s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
+ nilfs_iget_set, &args);
+ if (unlikely(!s_inode))
+ return ERR_PTR(-ENOMEM);
+ if (!(inode_state_read_once(s_inode) & I_NEW))
+ return inode;
+
+ NILFS_I(s_inode)->i_flags = 0;
+ memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
+ mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
+ s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
+
+ err = nilfs_attach_btree_node_cache(s_inode);
+ if (unlikely(err)) {
+ iget_failed(s_inode);
+ return ERR_PTR(err);
+ }
+ unlock_new_inode(s_inode);
+ return s_inode;
+}
+
+/**
+ * nilfs_write_inode_common - export common inode information to on-disk inode
+ * @inode: inode object
+ * @raw_inode: on-disk inode
+ *
+ * This function writes standard information from the on-memory inode @inode
+ * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap
+ * data is not exported, nilfs_bmap_write() must be called separately during
+ * log writing.
+ */
void nilfs_write_inode_common(struct inode *inode,
- struct nilfs_inode *raw_inode, int has_bmap)
+ struct nilfs_inode *raw_inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
@@ -638,35 +723,22 @@ void nilfs_write_inode_common(struct inode *inode,
raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le64(inode->i_size);
- raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
- raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
- raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
+ raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
raw_inode->i_flags = cpu_to_le32(ii->i_flags);
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
- if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
- struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
-
- /* zero-fill unused portion in the case of super root block */
- raw_inode->i_xattr = 0;
- raw_inode->i_pad = 0;
- memset((void *)raw_inode + sizeof(*raw_inode), 0,
- nilfs->ns_inode_size - sizeof(*raw_inode));
- }
-
- if (has_bmap)
- nilfs_bmap_write(ii->i_bmap, raw_inode);
- else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
- raw_inode->i_device_code =
- cpu_to_le64(huge_encode_dev(inode->i_rdev));
- /* When extending inode, nilfs->ns_inode_size should be checked
- for substitutions of appended fields */
+ /*
+ * When extending inode, nilfs->ns_inode_size should be checked
+ * for substitutions of appended fields.
+ */
}
-void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
+void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
{
ino_t ino = inode->i_ino;
struct nilfs_inode_info *ii = NILFS_I(inode);
@@ -677,13 +749,16 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size);
- set_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
+ if (flags & I_DIRTY_DATASYNC)
+ set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
+
+ nilfs_write_inode_common(inode, raw_inode);
- nilfs_write_inode_common(inode, raw_inode, 0);
- /* XXX: call with has_bmap = 0 is a workaround to avoid
- deadlock of bmap. This delays update of i_bmap to just
- before writing */
- nilfs_ifile_unmap_inode(ifile, ino, ibh);
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ raw_inode->i_device_code =
+ cpu_to_le64(huge_encode_dev(inode->i_rdev));
+
+ nilfs_ifile_unmap_inode(raw_inode);
}
#define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */
@@ -691,7 +766,7 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
unsigned long from)
{
- unsigned long b;
+ __u64 b;
int ret;
if (!test_bit(NILFS_I_BMAP, &ii->i_state))
@@ -706,7 +781,7 @@ repeat:
if (b < from)
return;
- b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
+ b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
ret = nilfs_bmap_truncate(ii->i_bmap, b);
nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
if (!ret || (ret == -ENOMEM &&
@@ -714,9 +789,8 @@ repeat:
goto repeat;
failed:
- nilfs_warning(ii->vfs_inode.i_sb, __func__,
- "failed to truncate bmap (ino=%lu, err=%d)",
- ii->vfs_inode.i_ino, ret);
+ nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
+ ret, ii->vfs_inode.i_ino);
}
void nilfs_truncate(struct inode *inode)
@@ -740,21 +814,22 @@ void nilfs_truncate(struct inode *inode)
nilfs_truncate_bmap(ii, blkoff);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
nilfs_mark_inode_dirty(inode);
nilfs_set_file_dirty(inode, 0);
nilfs_transaction_commit(sb);
- /* May construct a logical segment and may fail in sync mode.
- But truncate has no return value. */
+ /*
+ * May construct a logical segment and may fail in sync mode.
+ * But truncate has no return value.
+ */
}
static void nilfs_clear_inode(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
- struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
/*
* Free resources allocated in nilfs_read_inode(), here.
@@ -763,13 +838,14 @@ static void nilfs_clear_inode(struct inode *inode)
brelse(ii->i_bh);
ii->i_bh = NULL;
- if (mdi && mdi->mi_palloc_cache)
- nilfs_palloc_destroy_cache(inode);
+ if (nilfs_is_metadata_file_inode(inode))
+ nilfs_mdt_clear(inode);
if (test_bit(NILFS_I_BMAP, &ii->i_state))
nilfs_bmap_clear(ii->i_bmap);
- nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+ if (!(ii->i_type & NILFS_I_TYPE_BTNC))
+ nilfs_detach_btree_node_cache(inode);
if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
nilfs_put_root(ii->i_root);
@@ -780,19 +856,35 @@ void nilfs_evict_inode(struct inode *inode)
struct nilfs_transaction_info ti;
struct super_block *sb = inode->i_sb;
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs;
int ret;
if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
- if (inode->i_data.nrpages)
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
nilfs_clear_inode(inode);
return;
}
nilfs_transaction_begin(sb, &ti, 0); /* never fails */
- if (inode->i_data.nrpages)
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
+
+ nilfs = sb->s_fs_info;
+ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
+ /*
+ * If this inode is about to be disposed after the file system
+ * has been degraded to read-only due to file system corruption
+ * or after the writer has been detached, do not make any
+ * changes that cause writes, just clear it.
+ * Do this check after read-locking ns_segctor_sem by
+ * nilfs_transaction_begin() in order to avoid a race with
+ * the writer detach operation.
+ */
+ clear_inode(inode);
+ nilfs_clear_inode(inode);
+ nilfs_transaction_abort(sb);
+ return;
+ }
/* TODO: some of the following operations may fail. */
nilfs_truncate_bmap(ii, 0);
@@ -808,18 +900,21 @@ void nilfs_evict_inode(struct inode *inode)
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
nilfs_transaction_commit(sb);
- /* May construct a logical segment and may fail in sync mode.
- But delete_inode has no return value. */
+ /*
+ * May construct a logical segment and may fail in sync mode.
+ * But delete_inode has no return value.
+ */
}
-int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
+int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *iattr)
{
struct nilfs_transaction_info ti;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
int err;
- err = inode_change_ok(inode, iattr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (err)
return err;
@@ -834,7 +929,7 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
nilfs_truncate(inode);
}
- setattr_copy(inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE) {
@@ -850,14 +945,16 @@ out_err:
return err;
}
-int nilfs_permission(struct inode *inode, int mask)
+int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
+ int mask)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
+
if ((mask & MAY_WRITE) && root &&
root->cno != NILFS_CPTREE_CURRENT_CNO)
return -EROFS; /* snapshot is not writable */
- return generic_permission(inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
@@ -867,7 +964,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
int err;
spin_lock(&nilfs->ns_inode_lock);
- if (ii->i_bh == NULL) {
+ if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
spin_unlock(&nilfs->ns_inode_lock);
err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
inode->i_ino, pbh);
@@ -876,7 +973,10 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
spin_lock(&nilfs->ns_inode_lock);
if (ii->i_bh == NULL)
ii->i_bh = *pbh;
- else {
+ else if (unlikely(!buffer_uptodate(ii->i_bh))) {
+ __brelse(ii->i_bh);
+ ii->i_bh = *pbh;
+ } else {
brelse(*pbh);
*pbh = ii->i_bh;
}
@@ -903,7 +1003,7 @@ int nilfs_inode_dirty(struct inode *inode)
return ret;
}
-int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
+int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
@@ -916,17 +1016,23 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
spin_lock(&nilfs->ns_inode_lock);
if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
!test_bit(NILFS_I_BUSY, &ii->i_state)) {
- /* Because this routine may race with nilfs_dispose_list(),
- we have to check NILFS_I_QUEUED here, too. */
+ /*
+ * Because this routine may race with nilfs_dispose_list(),
+ * we have to check NILFS_I_QUEUED here, too.
+ */
if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
- /* This will happen when somebody is freeing
- this inode. */
- nilfs_warning(inode->i_sb, __func__,
- "cannot get inode (ino=%lu)\n",
- inode->i_ino);
+ /*
+ * This will happen when somebody is freeing
+ * this inode.
+ */
+ nilfs_warn(inode->i_sb,
+ "cannot set file dirty (ino=%lu): the file is being freed",
+ inode->i_ino);
spin_unlock(&nilfs->ns_inode_lock);
- return -EINVAL; /* NILFS_I_DIRTY may remain for
- freeing inode */
+ return -EINVAL; /*
+ * NILFS_I_DIRTY may remain for
+ * freeing inode.
+ */
}
list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
set_bit(NILFS_I_QUEUED, &ii->i_state);
@@ -935,18 +1041,27 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
return 0;
}
-int nilfs_mark_inode_dirty(struct inode *inode)
+int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
struct buffer_head *ibh;
int err;
+ /*
+ * Do not dirty inodes after the log writer has been detached
+ * and its nilfs_root struct has been freed.
+ */
+ if (unlikely(nilfs_purging(nilfs)))
+ return 0;
+
err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) {
- nilfs_warning(inode->i_sb, __func__,
- "failed to reget inode block.\n");
+ nilfs_warn(inode->i_sb,
+ "cannot mark inode dirty (ino=%lu): error %d loading inode block",
+ inode->i_ino, err);
return err;
}
- nilfs_update_inode(inode, ibh);
+ nilfs_update_inode(inode, ibh, flags);
mark_buffer_dirty(ibh);
nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
brelse(ibh);
@@ -956,6 +1071,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
/**
* nilfs_dirty_inode - reflect changes on given inode to an inode block.
* @inode: inode of the file to be registered.
+ * @flags: flags to determine the dirty state of the inode
*
* nilfs_dirty_inode() loads a inode block containing the specified
* @inode and copies data from a nilfs_inode to a corresponding inode
@@ -969,8 +1085,8 @@ void nilfs_dirty_inode(struct inode *inode, int flags)
struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
if (is_bad_inode(inode)) {
- nilfs_warning(inode->i_sb, __func__,
- "tried to mark bad_inode dirty. ignored.\n");
+ nilfs_warn(inode->i_sb,
+ "tried to mark bad_inode dirty. ignored.");
dump_stack();
return;
}
@@ -979,7 +1095,7 @@ void nilfs_dirty_inode(struct inode *inode, int flags)
return;
}
nilfs_transaction_begin(inode->i_sb, &ti, 0);
- nilfs_mark_inode_dirty(inode);
+ __nilfs_mark_inode_dirty(inode, flags);
nilfs_transaction_commit(inode->i_sb); /* never fails */
}
@@ -996,11 +1112,11 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
unsigned int blkbits = inode->i_blkbits;
int ret, n;
- ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ ret = fiemap_prep(inode, fieinfo, start, &len, 0);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
@@ -1079,7 +1195,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (size) {
if (phys && blkphy << blkbits == phys + size) {
/* The current extent goes on */
- size += n << blkbits;
+ size += (u64)n << blkbits;
} else {
/* Terminate the current extent */
ret = fiemap_fill_next_extent(
@@ -1092,14 +1208,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags = FIEMAP_EXTENT_MERGED;
logical = blkoff << blkbits;
phys = blkphy << blkbits;
- size = n << blkbits;
+ size = (u64)n << blkbits;
}
} else {
/* Start a new extent */
flags = FIEMAP_EXTENT_MERGED;
logical = blkoff << blkbits;
phys = blkphy << blkbits;
- size = n << blkbits;
+ size = (u64)n << blkbits;
}
blkoff += n;
}
@@ -1110,6 +1226,6 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret == 1)
ret = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index b44bdb291b84..e17b8da66491 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * ioctl.c - NILFS ioctl operations.
+ * NILFS ioctl operations.
*
* Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#include <linux/fs.h>
@@ -29,7 +16,8 @@
#include <linux/compat.h> /* compat_ptr() */
#include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */
#include <linux/buffer_head.h>
-#include <linux/nilfs2_fs.h>
+#include <linux/fileattr.h>
+#include <linux/string.h>
#include "nilfs.h"
#include "segment.h"
#include "bmap.h"
@@ -37,7 +25,23 @@
#include "sufile.h"
#include "dat.h"
-
+/**
+ * nilfs_ioctl_wrap_copy - wrapping function of get/set metadata info
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @dir: set of direction flags
+ * @dofunc: concrete function of get/set metadata info
+ *
+ * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of
+ * calling dofunc() function on the basis of @argv argument. If successful,
+ * the requested metadata information is copied to userspace memory.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during execution of requested operation.
+ * * %-EINVAL - Invalid arguments from userspace.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
struct nilfs_argv *argv, int dir,
ssize_t (*dofunc)(struct the_nilfs *,
@@ -45,7 +49,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
void *, size_t, size_t))
{
void *buf;
- void __user *base = (void __user *)(unsigned long)argv->v_base;
+ void __user *base = u64_to_user_ptr(argv->v_base);
size_t maxmembs, total, n;
ssize_t nr;
int ret, i;
@@ -54,10 +58,18 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
if (argv->v_nmembs == 0)
return 0;
- if (argv->v_size > PAGE_SIZE)
+ if ((size_t)argv->v_size > PAGE_SIZE)
return -EINVAL;
- buf = (void *)__get_free_pages(GFP_NOFS, 0);
+ /*
+ * Reject pairs of a start item position (argv->v_index) and a
+ * total count (argv->v_nmembs) which leads position 'pos' to
+ * overflow by the increment at the end of the loop.
+ */
+ if (argv->v_index > ~(__u64)0 - argv->v_nmembs)
+ return -EINVAL;
+
+ buf = (void *)get_zeroed_page(GFP_NOFS);
if (unlikely(!buf))
return -ENOMEM;
maxmembs = PAGE_SIZE / argv->v_size;
@@ -99,70 +111,87 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
return ret;
}
-static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
+/**
+ * nilfs_fileattr_get - retrieve miscellaneous file attributes
+ * @dentry: the object to retrieve from
+ * @fa: fileattr pointer
+ *
+ * Return: always 0 as success.
+ */
+int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
{
- unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE;
+ struct inode *inode = d_inode(dentry);
+
+ fileattr_fill_flags(fa, NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE);
- return put_user(flags, (int __user *)argp);
+ return 0;
}
-static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
- void __user *argp)
+/**
+ * nilfs_fileattr_set - change miscellaneous file attributes
+ * @idmap: idmap of the mount
+ * @dentry: the object to change
+ * @fa: fileattr pointer
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+int nilfs_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa)
{
+ struct inode *inode = d_inode(dentry);
struct nilfs_transaction_info ti;
unsigned int flags, oldflags;
int ret;
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- if (get_user(flags, (int __user *)argp))
- return -EFAULT;
-
- ret = mnt_want_write_file(filp);
- if (ret)
- return ret;
-
- flags = nilfs_mask_flags(inode->i_mode, flags);
-
- mutex_lock(&inode->i_mutex);
+ if (fileattr_has_fsx(fa))
+ return -EOPNOTSUPP;
- oldflags = NILFS_I(inode)->i_flags;
-
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by the
- * relevant capability.
- */
- ret = -EPERM;
- if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) &&
- !capable(CAP_LINUX_IMMUTABLE))
- goto out;
+ flags = nilfs_mask_flags(inode->i_mode, fa->flags);
ret = nilfs_transaction_begin(inode->i_sb, &ti, 0);
if (ret)
- goto out;
+ return ret;
- NILFS_I(inode)->i_flags = (oldflags & ~FS_FL_USER_MODIFIABLE) |
- (flags & FS_FL_USER_MODIFIABLE);
+ oldflags = NILFS_I(inode)->i_flags & ~FS_FL_USER_MODIFIABLE;
+ NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE);
nilfs_set_inode_flags(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode_set_ctime_current(inode);
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
nilfs_mark_inode_dirty(inode);
- ret = nilfs_transaction_commit(inode->i_sb);
-out:
- mutex_unlock(&inode->i_mutex);
- mnt_drop_write_file(filp);
- return ret;
+ return nilfs_transaction_commit(inode->i_sb);
}
+/**
+ * nilfs_ioctl_getversion - get info about a file's version (generation number)
+ * @inode: inode object
+ * @argp: userspace memory where the generation number of @inode is stored
+ *
+ * Return: 0 on success, or %-EFAULT on error.
+ */
static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
{
return put_user(inode->i_generation, (int __user *)argp);
}
+/**
+ * nilfs_ioctl_change_cpmode - change checkpoint mode (checkpoint/snapshot)
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_change_cpmode() function changes mode of
+ * given checkpoint between checkpoint and snapshot state. This ioctl
+ * is used in chcp and mkcp utilities.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * %-EFAULT - Failure during checkpoint mode changing.
+ * %-EPERM - Operation not permitted.
+ */
static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -198,6 +227,22 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_delete_checkpoint - remove checkpoint
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_delete_checkpoint() function removes
+ * checkpoint from NILFS2 file system. This ioctl is used in rmcp
+ * utility.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * %-EFAULT - Failure during checkpoint removing.
+ * %-EPERM - Operation not permitted.
+ */
static int
nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -229,6 +274,21 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_do_get_cpinfo - callback method getting info about checkpoints
+ * @nilfs: nilfs object
+ * @posp: pointer on array of checkpoint's numbers
+ * @flags: checkpoint mode (checkpoint or snapshot)
+ * @buf: buffer for storing checkponts' info
+ * @size: size in bytes of one checkpoint info item in array
+ * @nmembs: number of checkpoints in array (numbers and infos)
+ *
+ * Description: nilfs_ioctl_do_get_cpinfo() function returns info about
+ * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in
+ * lscp utility and by nilfs_cleanerd daemon.
+ *
+ * Return: Count of nilfs_cpinfo structures in output buffer.
+ */
static ssize_t
nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -242,6 +302,24 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
return ret;
}
+/**
+ * nilfs_ioctl_get_cpstat - get checkpoints statistics
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints.
+ * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities
+ * and by nilfs_cleanerd daemon. The checkpoint statistics are copied to
+ * the userspace memory pointed to by @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during getting checkpoints statistics.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -260,6 +338,22 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
return ret;
}
+/**
+ * nilfs_ioctl_do_get_suinfo - callback method getting segment usage info
+ * @nilfs: nilfs object
+ * @posp: pointer on array of segment numbers
+ * @flags: *not used*
+ * @buf: buffer for storing suinfo array
+ * @size: size in bytes of one suinfo item in array
+ * @nmembs: count of segment numbers and suinfos in array
+ *
+ * Description: nilfs_ioctl_do_get_suinfo() function returns segment usage
+ * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used
+ * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon.
+ *
+ * Return: Count of nilfs_suinfo structures in output buffer on success,
+ * or a negative error code on failure.
+ */
static ssize_t
nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -273,6 +367,24 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
return ret;
}
+/**
+ * nilfs_ioctl_get_sustat - get segment usage statistics
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_get_sustat() returns segment usage statistics.
+ * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities
+ * and by nilfs_cleanerd daemon. The requested segment usage information is
+ * copied to the userspace memory pointed to by @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during getting segment usage statistics.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -291,6 +403,22 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
return ret;
}
+/**
+ * nilfs_ioctl_do_get_vinfo - callback method getting virtual blocks info
+ * @nilfs: nilfs object
+ * @posp: *not used*
+ * @flags: *not used*
+ * @buf: buffer for storing array of nilfs_vinfo structures
+ * @size: size in bytes of one vinfo item in array
+ * @nmembs: count of vinfos in array
+ *
+ * Description: nilfs_ioctl_do_get_vinfo() function returns information
+ * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used
+ * by nilfs_cleanerd daemon.
+ *
+ * Return: Count of nilfs_vinfo structures in output buffer on success, or
+ * a negative error code on failure.
+ */
static ssize_t
nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -303,6 +431,22 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
return ret;
}
+/**
+ * nilfs_ioctl_do_get_bdescs - callback method getting disk block descriptors
+ * @nilfs: nilfs object
+ * @posp: *not used*
+ * @flags: *not used*
+ * @buf: buffer for storing array of nilfs_bdesc structures
+ * @size: size in bytes of one bdesc item in array
+ * @nmembs: count of bdescs in array
+ *
+ * Description: nilfs_ioctl_do_get_bdescs() function returns information
+ * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
+ * is used by nilfs_cleanerd daemon.
+ *
+ * Return: Count of nilfs_bdescs structures in output buffer on success, or
+ * a negative error code on failure.
+ */
static ssize_t
nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -329,6 +473,25 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
return nmembs;
}
+/**
+ * nilfs_ioctl_get_bdescs - get disk block descriptors
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_do_get_bdescs() function returns information
+ * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
+ * is used by nilfs_cleanerd daemon. If successful, disk block descriptors
+ * are copied to userspace pointer @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during getting disk block descriptors.
+ * * %-EINVAL - Invalid arguments from userspace.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -352,6 +515,22 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
return ret;
}
+/**
+ * nilfs_ioctl_move_inode_block - prepare data/node block for moving by GC
+ * @inode: inode object
+ * @vdesc: descriptor of virtual block number
+ * @buffers: list of moving buffers
+ *
+ * Description: nilfs_ioctl_move_inode_block() function registers data/node
+ * buffer in the GC pagecache and submit read request.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EEXIST - Block conflict detected.
+ * * %-EIO - I/O error.
+ * * %-ENOENT - Requested block doesn't exist.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_move_inode_block(struct inode *inode,
struct nilfs_vdesc *vdesc,
struct list_head *buffers)
@@ -369,27 +548,25 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
if (unlikely(ret < 0)) {
if (ret == -ENOENT)
- printk(KERN_CRIT
- "%s: invalid virtual block address (%s): "
- "ino=%llu, cno=%llu, offset=%llu, "
- "blocknr=%llu, vblocknr=%llu\n",
- __func__, vdesc->vd_flags ? "node" : "data",
- (unsigned long long)vdesc->vd_ino,
- (unsigned long long)vdesc->vd_cno,
- (unsigned long long)vdesc->vd_offset,
- (unsigned long long)vdesc->vd_blocknr,
- (unsigned long long)vdesc->vd_vblocknr);
+ nilfs_crit(inode->i_sb,
+ "%s: invalid virtual block address (%s): ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu",
+ __func__, vdesc->vd_flags ? "node" : "data",
+ (unsigned long long)vdesc->vd_ino,
+ (unsigned long long)vdesc->vd_cno,
+ (unsigned long long)vdesc->vd_offset,
+ (unsigned long long)vdesc->vd_blocknr,
+ (unsigned long long)vdesc->vd_vblocknr);
return ret;
}
if (unlikely(!list_empty(&bh->b_assoc_buffers))) {
- printk(KERN_CRIT "%s: conflicting %s buffer: ino=%llu, "
- "cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu\n",
- __func__, vdesc->vd_flags ? "node" : "data",
- (unsigned long long)vdesc->vd_ino,
- (unsigned long long)vdesc->vd_cno,
- (unsigned long long)vdesc->vd_offset,
- (unsigned long long)vdesc->vd_blocknr,
- (unsigned long long)vdesc->vd_vblocknr);
+ nilfs_crit(inode->i_sb,
+ "%s: conflicting %s buffer: ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu",
+ __func__, vdesc->vd_flags ? "node" : "data",
+ (unsigned long long)vdesc->vd_ino,
+ (unsigned long long)vdesc->vd_cno,
+ (unsigned long long)vdesc->vd_offset,
+ (unsigned long long)vdesc->vd_blocknr,
+ (unsigned long long)vdesc->vd_vblocknr);
brelse(bh);
return -EEXIST;
}
@@ -397,6 +574,19 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
return 0;
}
+/**
+ * nilfs_ioctl_move_blocks - move valid inode's blocks during garbage collection
+ * @sb: superblock object
+ * @argv: vector of arguments from userspace
+ * @buf: array of nilfs_vdesc structures
+ *
+ * Description: nilfs_ioctl_move_blocks() function reads valid data/node
+ * blocks that garbage collector specified with the array of nilfs_vdesc
+ * structures and stores them into page caches of GC inodes.
+ *
+ * Return: Number of processed nilfs_vdesc structures on success, or
+ * a negative error code on failure.
+ */
static int nilfs_ioctl_move_blocks(struct super_block *sb,
struct nilfs_argv *argv, void *buf)
{
@@ -462,6 +652,22 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
return ret;
}
+/**
+ * nilfs_ioctl_delete_checkpoints - delete checkpoints
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of periods of checkpoints numbers
+ *
+ * Description: nilfs_ioctl_delete_checkpoints() function deletes checkpoints
+ * in the period from p_start to p_end, excluding p_end itself. The checkpoints
+ * which have been already deleted are ignored.
+ *
+ * Return: Number of processed nilfs_period structures on success, or one of
+ * the following negative error codes on failure:
+ * * %-EINVAL - invalid checkpoints.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
{
@@ -479,6 +685,21 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
return nmembs;
}
+/**
+ * nilfs_ioctl_free_vblocknrs - free virtual block numbers
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of virtual block numbers
+ *
+ * Description: nilfs_ioctl_free_vblocknrs() function frees
+ * the virtual block numbers specified by @buf and @argv->v_nmembs.
+ *
+ * Return: Number of processed virtual block numbers on success, or one of the
+ * following negative error codes on failure:
+ * * %-EIO - I/O error.
+ * * %-ENOENT - Unallocated virtual block number.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
{
@@ -490,12 +711,28 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
return (ret < 0) ? ret : nmembs;
}
+/**
+ * nilfs_ioctl_mark_blocks_dirty - mark blocks dirty
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of block descriptors
+ *
+ * Description: nilfs_ioctl_mark_blocks_dirty() function marks
+ * metadata file or data blocks as dirty.
+ *
+ * Return: Number of processed block descriptors on success, or one of the
+ * following negative error codes on failure:
+ * * %-EIO - I/O error.
+ * * %-ENOENT - Non-existent block (hole block).
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
{
size_t nmembs = argv->v_nmembs;
struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap;
struct nilfs_bdesc *bdescs = buf;
+ struct buffer_head *bh;
int ret, i;
for (i = 0; i < nmembs; i++) {
@@ -513,12 +750,16 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
/* skip dead block */
continue;
if (bdescs[i].bd_level == 0) {
- ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat,
- bdescs[i].bd_offset);
- if (ret < 0) {
+ ret = nilfs_mdt_get_block(nilfs->ns_dat,
+ bdescs[i].bd_offset,
+ false, NULL, &bh);
+ if (unlikely(ret)) {
WARN_ON(ret == -ENOENT);
return ret;
}
+ mark_buffer_dirty(bh);
+ nilfs_mdt_mark_dirty(nilfs->ns_dat);
+ put_bh(bh);
} else {
ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset,
bdescs[i].bd_level);
@@ -566,11 +807,24 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
return 0;
failed:
- printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n",
- msg, ret);
+ nilfs_err(nilfs->ns_sb, "error %d preparing GC: %s", ret, msg);
return ret;
}
+/**
+ * nilfs_ioctl_clean_segments - clean segments
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_clean_segments() function makes garbage
+ * collection operation in the environment of requested parameters
+ * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by
+ * nilfs_cleanerd daemon.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -582,7 +836,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
sizeof(struct nilfs_bdesc),
sizeof(__u64),
};
- void __user *base;
void *kbufs[5];
struct the_nilfs *nilfs;
size_t len, nsegs;
@@ -603,16 +856,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
nsegs = argv[4].v_nmembs;
if (argv[4].v_size != argsz[4])
goto out;
- if (nsegs > UINT_MAX / sizeof(__u64))
- goto out;
/*
* argv[4] points to segment numbers this ioctl cleans. We
- * use kmalloc() for its buffer because memory used for the
- * segment numbers is enough small.
+ * use kmalloc() for its buffer because the memory used for the
+ * segment numbers is small enough.
*/
- kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base,
- nsegs * sizeof(__u64));
+ kbufs[4] = memdup_array_user(u64_to_user_ptr(argv[4].v_base),
+ nsegs, sizeof(__u64));
if (IS_ERR(kbufs[4])) {
ret = PTR_ERR(kbufs[4]);
goto out;
@@ -631,20 +882,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
goto out_free;
len = argv[n].v_size * argv[n].v_nmembs;
- base = (void __user *)(unsigned long)argv[n].v_base;
if (len == 0) {
kbufs[n] = NULL;
continue;
}
- kbufs[n] = vmalloc(len);
- if (!kbufs[n]) {
- ret = -ENOMEM;
- goto out_free;
- }
- if (copy_from_user(kbufs[n], base, len)) {
- ret = -EFAULT;
- vfree(kbufs[n]);
+ kbufs[n] = vmemdup_user(u64_to_user_ptr(argv[n].v_base), len);
+ if (IS_ERR(kbufs[n])) {
+ ret = PTR_ERR(kbufs[n]);
goto out_free;
}
}
@@ -661,10 +906,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
}
ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]);
- if (ret < 0)
- printk(KERN_ERR "NILFS: GC failed during preparation: "
- "cannot read source blocks: err=%d\n", ret);
- else {
+ if (ret < 0) {
+ nilfs_err(inode->i_sb,
+ "error %d preparing GC: cannot read source blocks",
+ ret);
+ } else {
if (nilfs_sb_need_update(nilfs))
set_nilfs_discontinued(nilfs);
ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
@@ -675,13 +921,34 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
out_free:
while (--n >= 0)
- vfree(kbufs[n]);
+ kvfree(kbufs[n]);
kfree(kbufs[4]);
out:
mnt_drop_write_file(filp);
return ret;
}
+/**
+ * nilfs_ioctl_sync - make a checkpoint
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_sync() function constructs a logical segment
+ * for checkpointing. This function guarantees that all modified data
+ * and metadata are written out to the device when it successfully
+ * returned.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during execution of requested operation.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
+ * * %-EROFS - Read only filesystem.
+ */
static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -694,11 +961,9 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
return ret;
nilfs = inode->i_sb->s_fs_info;
- if (nilfs_test_opt(nilfs, BARRIER)) {
- ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- if (ret == -EIO)
- return ret;
- }
+ ret = nilfs_flush_device(nilfs);
+ if (ret < 0)
+ return ret;
if (argp != NULL) {
down_read(&nilfs->ns_segctor_sem);
@@ -710,6 +975,14 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
return 0;
}
+/**
+ * nilfs_ioctl_resize - resize NILFS2 volume
+ * @inode: inode object
+ * @filp: file object
+ * @argp: pointer on argument from userspace
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
void __user *argp)
{
@@ -735,6 +1008,59 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_trim_fs() - trim ioctl handle function
+ * @inode: inode object
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It
+ * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which
+ * performs the actual trim operation.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp)
+{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+ struct fstrim_range range;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!bdev_max_discard_sectors(nilfs->ns_bdev))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&range, argp, sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max_t(u64, range.minlen,
+ bdev_discard_granularity(nilfs->ns_bdev));
+
+ down_read(&nilfs->ns_segctor_sem);
+ ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range);
+ up_read(&nilfs->ns_segctor_sem);
+
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(argp, &range, sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/**
+ * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated
+ * @inode: inode object
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_set_alloc_range() function defines lower limit
+ * of segments in bytes and upper limit of segments in bytes.
+ * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
{
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
@@ -751,15 +1077,22 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
goto out;
ret = -ERANGE;
- if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
+ if (range[1] > bdev_nr_bytes(inode->i_sb->s_bdev))
goto out;
segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
minseg = range[0] + segbytes - 1;
- do_div(minseg, segbytes);
+ minseg = div64_ul(minseg, segbytes);
+
+ if (range[1] < 4096)
+ goto out;
+
maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
- do_div(maxseg, segbytes);
+ if (maxseg < segbytes)
+ goto out;
+
+ maxseg = div64_ul(maxseg, segbytes);
maxseg--;
ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
@@ -767,6 +1100,26 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_get_info - wrapping function of get metadata info
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ * @membsz: size of an item in bytes
+ * @dofunc: concrete function of getting metadata info
+ *
+ * Description: nilfs_ioctl_get_info() gets metadata info by means of
+ * calling dofunc() function. The requested metadata information is copied
+ * to userspace memory @argp.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Failure during execution of requested operation.
+ * * %-EINVAL - Invalid arguments from userspace.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp,
size_t membsz,
@@ -794,16 +1147,175 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
return ret;
}
+/**
+ * nilfs_ioctl_set_suinfo - set segment usage info
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: Expects an array of nilfs_suinfo_update structures
+ * encapsulated in nilfs_argv and updates the segment usage info
+ * according to the flags in nilfs_suinfo_update.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EEXIST - Block conflict detected.
+ * * %-EFAULT - Error copying input data.
+ * * %-EINVAL - Invalid values in input (segment number, flags or nblocks).
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EPERM - Not enough permissions.
+ */
+static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
+ unsigned int cmd, void __user *argp)
+{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+ struct nilfs_transaction_info ti;
+ struct nilfs_argv argv;
+ size_t len;
+ void *kbuf;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+ if (copy_from_user(&argv, argp, sizeof(argv)))
+ goto out;
+
+ ret = -EINVAL;
+ if (argv.v_size < sizeof(struct nilfs_suinfo_update))
+ goto out;
+
+ if (argv.v_nmembs > nilfs->ns_nsegments)
+ goto out;
+
+ if (argv.v_nmembs >= UINT_MAX / argv.v_size)
+ goto out;
+
+ len = argv.v_size * argv.v_nmembs;
+ if (!len) {
+ ret = 0;
+ goto out;
+ }
+
+ kbuf = vmemdup_user(u64_to_user_ptr(argv.v_base), len);
+ if (IS_ERR(kbuf)) {
+ ret = PTR_ERR(kbuf);
+ goto out;
+ }
+
+ nilfs_transaction_begin(inode->i_sb, &ti, 0);
+ ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size,
+ argv.v_nmembs);
+ if (unlikely(ret < 0))
+ nilfs_transaction_abort(inode->i_sb);
+ else
+ nilfs_transaction_commit(inode->i_sb); /* never fails */
+
+ kvfree(kbuf);
+out:
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
+/**
+ * nilfs_ioctl_get_fslabel - get the volume name of the file system
+ * @sb: super block instance
+ * @argp: pointer to userspace memory where the volume name should be stored
+ *
+ * Return: 0 on success, %-EFAULT if copying to userspace memory fails.
+ */
+static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ char label[NILFS_MAX_VOLUME_NAME + 1];
+
+ BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX);
+
+ down_read(&nilfs->ns_sem);
+ memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name);
+ up_read(&nilfs->ns_sem);
+
+ if (copy_to_user(argp, label, sizeof(label)))
+ return -EFAULT;
+ return 0;
+}
+
+/**
+ * nilfs_ioctl_set_fslabel - set the volume name of the file system
+ * @sb: super block instance
+ * @filp: file object
+ * @argp: pointer to userspace memory that contains the volume name
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EFAULT - Error copying input data.
+ * * %-EINVAL - Label length exceeds record size in superblock.
+ * * %-EIO - I/O error.
+ * * %-EPERM - Operation not permitted (insufficient permissions).
+ * * %-EROFS - Read only file system.
+ */
+static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp,
+ void __user *argp)
+{
+ char label[NILFS_MAX_VOLUME_NAME + 1];
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ struct nilfs_super_block **sbp;
+ size_t len;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) {
+ ret = -EFAULT;
+ goto out_drop_write;
+ }
+
+ len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1);
+ if (len > NILFS_MAX_VOLUME_NAME) {
+ nilfs_err(sb, "unable to set label with more than %zu bytes",
+ NILFS_MAX_VOLUME_NAME);
+ ret = -EINVAL;
+ goto out_drop_write;
+ }
+
+ down_write(&nilfs->ns_sem);
+ sbp = nilfs_prepare_super(sb, false);
+ if (unlikely(!sbp)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ strtomem_pad(sbp[0]->s_volume_name, label, 0);
+ if (sbp[1])
+ strtomem_pad(sbp[1]->s_volume_name, label, 0);
+
+ ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
+
+out_unlock:
+ up_write(&nilfs->ns_sem);
+out_drop_write:
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
void __user *argp = (void __user *)arg;
switch (cmd) {
- case FS_IOC_GETFLAGS:
- return nilfs_ioctl_getflags(inode, argp);
- case FS_IOC_SETFLAGS:
- return nilfs_ioctl_setflags(inode, filp, argp);
case FS_IOC_GETVERSION:
return nilfs_ioctl_getversion(inode, argp);
case NILFS_IOCTL_CHANGE_CPMODE:
@@ -820,6 +1332,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return nilfs_ioctl_get_info(inode, filp, cmd, argp,
sizeof(struct nilfs_suinfo),
nilfs_ioctl_do_get_suinfo);
+ case NILFS_IOCTL_SET_SUINFO:
+ return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp);
case NILFS_IOCTL_GET_SUSTAT:
return nilfs_ioctl_get_sustat(inode, filp, cmd, argp);
case NILFS_IOCTL_GET_VINFO:
@@ -836,6 +1350,12 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return nilfs_ioctl_resize(inode, filp, argp);
case NILFS_IOCTL_SET_ALLOC_RANGE:
return nilfs_ioctl_set_alloc_range(inode, argp);
+ case FITRIM:
+ return nilfs_ioctl_trim_fs(inode, argp);
+ case FS_IOC_GETFSLABEL:
+ return nilfs_ioctl_get_fslabel(inode->i_sb, argp);
+ case FS_IOC_SETFSLABEL:
+ return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp);
default:
return -ENOTTY;
}
@@ -845,12 +1365,6 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
- case FS_IOC32_GETFLAGS:
- cmd = FS_IOC_GETFLAGS;
- break;
- case FS_IOC32_SETFLAGS:
- cmd = FS_IOC_SETFLAGS;
- break;
case FS_IOC32_GETVERSION:
cmd = FS_IOC_GETVERSION;
break;
@@ -859,6 +1373,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case NILFS_IOCTL_GET_CPINFO:
case NILFS_IOCTL_GET_CPSTAT:
case NILFS_IOCTL_GET_SUINFO:
+ case NILFS_IOCTL_SET_SUINFO:
case NILFS_IOCTL_GET_SUSTAT:
case NILFS_IOCTL_GET_VINFO:
case NILFS_IOCTL_GET_BDESCS:
@@ -866,6 +1381,9 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case NILFS_IOCTL_SYNC:
case NILFS_IOCTL_RESIZE:
case NILFS_IOCTL_SET_ALLOC_RANGE:
+ case FITRIM:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index c4dcd1db57ee..946b0d3534a5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * mdt.c - meta data file for NILFS
+ * Meta data file for NILFS
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*/
#include <linux/buffer_head.h>
@@ -32,7 +19,9 @@
#include "segment.h"
#include "page.h"
#include "mdt.h"
+#include "alloc.h" /* nilfs_palloc_destroy_cache() */
+#include <trace/events/nilfs2.h>
#define NILFS_MDT_MAX_RA_BLOCKS (16 - 1)
@@ -44,7 +33,8 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
struct buffer_head *, void *))
{
struct nilfs_inode_info *ii = NILFS_I(inode);
- void *kaddr;
+ struct folio *folio = bh->b_folio;
+ void *from;
int ret;
/* Caller exclude read accesses using page lock */
@@ -58,16 +48,21 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
set_buffer_mapped(bh);
- kaddr = kmap_atomic(bh->b_page);
- memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
+ /* Initialize block (block size > PAGE_SIZE not yet supported) */
+ from = kmap_local_folio(folio, offset_in_folio(folio, bh->b_data));
+ memset(from, 0, bh->b_size);
if (init_block)
- init_block(inode, bh, kaddr);
- flush_dcache_page(bh->b_page);
- kunmap_atomic(kaddr);
+ init_block(inode, bh, from);
+ kunmap_local(from);
+
+ flush_dcache_folio(folio);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(inode);
+
+ trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block);
+
return 0;
}
@@ -97,7 +92,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
if (buffer_uptodate(bh))
goto failed_bh;
- bh->b_bdev = sb->s_bdev;
err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
if (likely(!err)) {
get_bh(bh);
@@ -105,8 +99,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
}
failed_bh:
- unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
brelse(bh);
failed_unlock:
@@ -119,8 +113,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
}
static int
-nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
- int mode, struct buffer_head **out_bh)
+nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf,
+ struct buffer_head **out_bh)
{
struct buffer_head *bh;
__u64 blknum = 0;
@@ -134,12 +128,12 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
if (buffer_uptodate(bh))
goto out;
- if (mode == READA) {
+ if (opf & REQ_RAHEAD) {
if (!trylock_buffer(bh)) {
ret = -EBUSY;
goto failed_bh;
}
- } else /* mode == READ */
+ } else /* opf == REQ_OP_READ */
lock_buffer(bh);
if (buffer_uptodate(bh)) {
@@ -156,15 +150,18 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(mode, bh);
+ submit_bh(opf, bh);
ret = 0;
+
+ trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff,
+ opf & REQ_OP_MASK);
out:
get_bh(bh);
*out_bh = bh;
failed_bh:
- unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
brelse(bh);
failed:
return ret;
@@ -178,7 +175,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
int err;
- err = nilfs_mdt_submit_block(inode, block, READ, &first_bh);
+ err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, &first_bh);
if (err == -EEXIST) /* internal code */
goto out;
@@ -188,7 +185,8 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
if (readahead) {
blkoff = block + 1;
for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
- err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
+ err = nilfs_mdt_submit_block(inode, blkoff,
+ REQ_OP_READ | REQ_RAHEAD, &bh);
if (likely(!err || err == -EEXIST))
brelse(bh);
else if (err != -EBUSY)
@@ -203,8 +201,12 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
out_no_wait:
err = -EIO;
- if (!buffer_uptodate(first_bh))
+ if (!buffer_uptodate(first_bh)) {
+ nilfs_err(inode->i_sb,
+ "I/O error reading meta-data file (ino=%lu, block-offset=%lu)",
+ inode->i_ino, block);
goto failed_bh;
+ }
out:
*out_bh = first_bh;
return 0;
@@ -224,20 +226,21 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
* @out_bh: output of a pointer to the buffer_head
*
* nilfs_mdt_get_block() looks up the specified buffer and tries to create
- * a new buffer if @create is not zero. On success, the returned buffer is
- * assured to be either existing or formatted using a buffer lock on success.
- * @out_bh is substituted only when zero is returned.
- *
- * Return Value: On success, it returns 0. On error, the following negative
- * error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-EIO - I/O error
+ * a new buffer if @create is not zero. If (and only if) this function
+ * succeeds, it stores a pointer to the retrieved buffer head in the location
+ * pointed to by @out_bh.
*
- * %-ENOENT - the specified block does not exist (hole block)
+ * The retrieved buffer may be either an existing one or a newly allocated one.
+ * For a newly created buffer, if the callback function argument @init_block
+ * is non-NULL, the callback will be called with the buffer locked to format
+ * the block.
*
- * %-EROFS - Read only filesystem (for create mode)
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - The specified block does not exist (hole block).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EROFS - Read only filesystem (for create mode).
*/
int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
void (*init_block)(struct inode *,
@@ -261,16 +264,66 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
}
/**
- * nilfs_mdt_delete_block - make a hole on the meta data file.
+ * nilfs_mdt_find_block - find and get a buffer on meta data file.
* @inode: inode of the meta data file
- * @block: block offset
+ * @start: start block offset (inclusive)
+ * @end: end block offset (inclusive)
+ * @blkoff: block offset
+ * @out_bh: place to store a pointer to buffer_head struct
*
- * Return Value: On success, zero is returned.
- * On error, one of the following negative error code is returned.
+ * nilfs_mdt_find_block() looks up an existing block in range of
+ * [@start, @end] and stores pointer to a buffer head of the block to
+ * @out_bh, and block offset to @blkoff, respectively. @out_bh and
+ * @blkoff are substituted only when zero is returned.
*
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - No block was found in the range.
+ * * %-ENOMEM - Insufficient memory available.
+ */
+int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
+ unsigned long end, unsigned long *blkoff,
+ struct buffer_head **out_bh)
+{
+ __u64 next;
+ int ret;
+
+ if (unlikely(start > end))
+ return -ENOENT;
+
+ ret = nilfs_mdt_read_block(inode, start, true, out_bh);
+ if (!ret) {
+ *blkoff = start;
+ goto out;
+ }
+ if (unlikely(ret != -ENOENT || start == ULONG_MAX))
+ goto out;
+
+ ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
+ if (!ret) {
+ if (next <= end) {
+ ret = nilfs_mdt_read_block(inode, next, true, out_bh);
+ if (!ret)
+ *blkoff = next;
+ } else {
+ ret = -ENOENT;
+ }
+ }
+out:
+ return ret;
+}
+
+/**
+ * nilfs_mdt_delete_block - make a hole on the meta data file.
+ * @inode: inode of the meta data file
+ * @block: block offset
*
- * %-EIO - I/O error
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Non-existent block.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
{
@@ -293,39 +346,35 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
* nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
* tries to release the page including the buffer from a page cache.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-EBUSY - page has an active buffer.
- *
- * %-ENOENT - page cache has no page addressed by the offset.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Page has an active buffer.
+ * * %-ENOENT - Page cache has no page addressed by the offset.
*/
int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
{
- pgoff_t index = (pgoff_t)block >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
- struct page *page;
- unsigned long first_block;
+ pgoff_t index = block >> (PAGE_SHIFT - inode->i_blkbits);
+ struct folio *folio;
+ struct buffer_head *bh;
int ret = 0;
int still_dirty;
- page = find_lock_page(inode->i_mapping, index);
- if (!page)
+ folio = filemap_lock_folio(inode->i_mapping, index);
+ if (IS_ERR(folio))
return -ENOENT;
- wait_on_page_writeback(page);
-
- first_block = (unsigned long)index <<
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
- if (page_has_buffers(page)) {
- struct buffer_head *bh;
+ folio_wait_writeback(folio);
- bh = nilfs_page_get_nth_block(page, block - first_block);
+ bh = folio_buffers(folio);
+ if (bh) {
+ unsigned long first_block = index <<
+ (PAGE_SHIFT - inode->i_blkbits);
+ bh = get_nth_bh(bh, block - first_block);
nilfs_forget_buffer(bh);
}
- still_dirty = PageDirty(page);
- unlock_page(page);
- page_cache_release(page);
+ still_dirty = folio_test_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
if (still_dirty ||
invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
@@ -333,34 +382,6 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
return ret;
}
-/**
- * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty.
- * @inode: inode of the meta data file
- * @block: block offset
- *
- * Return Value: On success, it returns 0. On error, the following negative
- * error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-EIO - I/O error
- *
- * %-ENOENT - the specified block does not exist (hole block)
- */
-int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
-{
- struct buffer_head *bh;
- int err;
-
- err = nilfs_mdt_read_block(inode, block, 0, &bh);
- if (unlikely(err))
- return err;
- mark_buffer_dirty(bh);
- nilfs_mdt_mark_dirty(inode);
- brelse(bh);
- return 0;
-}
-
int nilfs_mdt_fetch_dirty(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
@@ -372,27 +393,27 @@ int nilfs_mdt_fetch_dirty(struct inode *inode)
return test_bit(NILFS_I_DIRTY, &ii->i_state);
}
-static int
-nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
+static int nilfs_mdt_write_folio(struct folio *folio,
+ struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct super_block *sb;
int err = 0;
- if (inode && (inode->i_sb->s_flags & MS_RDONLY)) {
+ if (inode && sb_rdonly(inode->i_sb)) {
/*
* It means that filesystem was remounted in read-only
* mode because of error or metadata corruption. But we
- * have dirty pages that try to be flushed in background.
- * So, here we simply discard this dirty page.
+ * have dirty folios that try to be flushed in background.
+ * So, here we simply discard this dirty folio.
*/
- nilfs_clear_dirty_page(page, false);
- unlock_page(page);
+ nilfs_clear_folio_dirty(folio);
+ folio_unlock(folio);
return -EROFS;
}
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
if (!inode)
return 0;
@@ -401,15 +422,27 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL)
err = nilfs_construct_segment(sb);
- else if (wbc->for_reclaim)
- nilfs_flush_segment(sb, inode->i_ino);
return err;
}
+static int nilfs_mdt_writeback(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct folio *folio = NULL;
+ int error;
+
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ error = nilfs_mdt_write_folio(folio, wbc);
+
+ return error;
+}
static const struct address_space_operations def_mdt_aops = {
- .writepage = nilfs_mdt_write_page,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
+ .writepages = nilfs_mdt_writeback,
+ .migrate_folio = buffer_migrate_folio_norefs,
};
static const struct inode_operations def_mdt_iops;
@@ -429,7 +462,6 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
- inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
inode->i_op = &def_mdt_iops;
inode->i_fop = &def_mdt_fops;
@@ -438,13 +470,46 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
return 0;
}
-void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
- unsigned header_size)
+/**
+ * nilfs_mdt_clear - do cleanup for the metadata file
+ * @inode: inode of the metadata file
+ */
+void nilfs_mdt_clear(struct inode *inode)
+{
+ struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+ struct nilfs_shadow_map *shadow = mdi->mi_shadow;
+
+ if (mdi->mi_palloc_cache)
+ nilfs_palloc_destroy_cache(inode);
+
+ if (shadow) {
+ struct inode *s_inode = shadow->inode;
+
+ shadow->inode = NULL;
+ iput(s_inode);
+ mdi->mi_shadow = NULL;
+ }
+}
+
+/**
+ * nilfs_mdt_destroy - release resources used by the metadata file
+ * @inode: inode of the metadata file
+ */
+void nilfs_mdt_destroy(struct inode *inode)
+{
+ struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+
+ kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
+ kfree(mdi);
+}
+
+void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
+ unsigned int header_size)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
mi->mi_entry_size = entry_size;
- mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size;
+ mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
}
@@ -452,18 +517,22 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
* nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
* @inode: inode of the metadata file
* @shadow: shadow mapping
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct nilfs_shadow_map *shadow)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
- struct backing_dev_info *bdi = inode->i_sb->s_bdi;
+ struct inode *s_inode;
INIT_LIST_HEAD(&shadow->frozen_buffers);
- address_space_init_once(&shadow->frozen_data);
- nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
- address_space_init_once(&shadow->frozen_btnodes);
- nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
+
+ s_inode = nilfs_iget_for_shadow(inode);
+ if (IS_ERR(s_inode))
+ return PTR_ERR(s_inode);
+
+ shadow->inode = s_inode;
mi->mi_shadow = shadow;
return 0;
}
@@ -471,20 +540,23 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
/**
* nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map
* @inode: inode of the metadata file
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_mdt_save_to_shadow_map(struct inode *inode)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
struct nilfs_inode_info *ii = NILFS_I(inode);
struct nilfs_shadow_map *shadow = mi->mi_shadow;
+ struct inode *s_inode = shadow->inode;
int ret;
- ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping);
+ ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping);
if (ret)
goto out;
- ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes,
- &ii->i_btnode_cache);
+ ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping,
+ ii->i_assoc_inode->i_mapping);
if (ret)
goto out;
@@ -497,17 +569,20 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen;
- struct page *page;
+ struct folio *folio;
int blkbits = inode->i_blkbits;
- page = grab_cache_page(&shadow->frozen_data, bh->b_page->index);
- if (!page)
- return -ENOMEM;
+ folio = filemap_grab_folio(shadow->inode->i_mapping,
+ bh->b_folio->index);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << blkbits, 0);
+ bh_frozen = folio_buffers(folio);
+ if (!bh_frozen)
+ bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0);
- bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
+ bh_frozen = get_nth_bh(bh_frozen,
+ offset_in_folio(folio, bh->b_data) >> blkbits);
if (!buffer_uptodate(bh_frozen))
nilfs_copy_buffer(bh_frozen, bh);
@@ -519,8 +594,8 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
brelse(bh_frozen); /* already frozen */
}
- unlock_page(page);
- page_cache_release(page);
+ folio_unlock(folio);
+ folio_put(folio);
return 0;
}
@@ -529,17 +604,20 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
{
struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
struct buffer_head *bh_frozen = NULL;
- struct page *page;
+ struct folio *folio;
int n;
- page = find_lock_page(&shadow->frozen_data, bh->b_page->index);
- if (page) {
- if (page_has_buffers(page)) {
- n = bh_offset(bh) >> inode->i_blkbits;
- bh_frozen = nilfs_page_get_nth_block(page, n);
+ folio = filemap_lock_folio(shadow->inode->i_mapping,
+ bh->b_folio->index);
+ if (!IS_ERR(folio)) {
+ bh_frozen = folio_buffers(folio);
+ if (bh_frozen) {
+ n = offset_in_folio(folio, bh->b_data) >>
+ inode->i_blkbits;
+ bh_frozen = get_nth_bh(bh_frozen, n);
}
- unlock_page(page);
- page_cache_release(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
return bh_frozen;
}
@@ -572,11 +650,12 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
if (mi->mi_palloc_cache)
nilfs_palloc_clear_cache(inode);
- nilfs_clear_dirty_pages(inode->i_mapping, true);
- nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data);
+ nilfs_clear_dirty_pages(inode->i_mapping);
+ nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping);
- nilfs_clear_dirty_pages(&ii->i_btnode_cache, true);
- nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes);
+ nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping);
+ nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping,
+ NILFS_I(shadow->inode)->i_assoc_inode->i_mapping);
nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
@@ -591,10 +670,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode)
{
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
struct nilfs_shadow_map *shadow = mi->mi_shadow;
+ struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode;
down_write(&mi->mi_sem);
nilfs_release_frozen_buffers(shadow);
- truncate_inode_pages(&shadow->frozen_data, 0);
- truncate_inode_pages(&shadow->frozen_btnodes, 0);
+ truncate_inode_pages(shadow->inode->i_mapping, 0);
+ truncate_inode_pages(shadow_btnc_inode->i_mapping, 0);
up_write(&mi->mi_sem);
}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ab172e8549c5..9e23bab3ff12 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * mdt.h - NILFS meta data file prototype and definitions
+ * NILFS meta data file prototype and definitions
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*/
#ifndef _NILFS_MDT_H
@@ -31,14 +18,12 @@
/**
* struct nilfs_shadow_map - shadow mapping of meta data file
* @bmap_store: shadow copy of bmap state
- * @frozen_data: shadowed dirty data pages
- * @frozen_btnodes: shadowed dirty b-tree nodes' pages
+ * @inode: holder of page caches used in shadow mapping
* @frozen_buffers: list of frozen buffers
*/
struct nilfs_shadow_map {
struct nilfs_bmap_store bmap_store;
- struct address_space frozen_data;
- struct address_space frozen_btnodes;
+ struct inode *inode;
struct list_head frozen_buffers;
};
@@ -57,8 +42,8 @@ struct nilfs_shadow_map {
struct nilfs_mdt_info {
struct rw_semaphore mi_sem;
struct blockgroup_lock *mi_bgl;
- unsigned mi_entry_size;
- unsigned mi_first_entry_offset;
+ unsigned int mi_entry_size;
+ unsigned int mi_first_entry_offset;
unsigned long mi_entries_per_block;
struct nilfs_palloc_cache *mi_palloc_cache;
struct nilfs_shadow_map *mi_shadow;
@@ -71,20 +56,30 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
return inode->i_private;
}
+static inline int nilfs_is_metadata_file_inode(const struct inode *inode)
+{
+ return inode->i_private != NULL;
+}
+
/* Default GFP flags using highmem */
-#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
+#define NILFS_MDT_GFP (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM)
int nilfs_mdt_get_block(struct inode *, unsigned long, int,
void (*init_block)(struct inode *,
struct buffer_head *, void *),
struct buffer_head **);
+int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
+ unsigned long end, unsigned long *blkoff,
+ struct buffer_head **out_bh);
int nilfs_mdt_delete_block(struct inode *, unsigned long);
int nilfs_mdt_forget_block(struct inode *, unsigned long);
-int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
int nilfs_mdt_fetch_dirty(struct inode *);
int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz);
-void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned);
+void nilfs_mdt_clear(struct inode *inode);
+void nilfs_mdt_destroy(struct inode *inode);
+
+void nilfs_mdt_set_entry_size(struct inode *, unsigned int, unsigned int);
int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct nilfs_shadow_map *shadow);
@@ -111,7 +106,10 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode)
return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
}
-#define nilfs_mdt_bgl_lock(inode, bg) \
- (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock)
+static inline spinlock_t *
+nilfs_mdt_bgl_lock(struct inode *inode, unsigned int block_group)
+{
+ return bgl_lock_ptr(NILFS_MDT(inode)->mi_bgl, block_group);
+}
#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 9de78f08989e..40f4b1a28705 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -1,24 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * namei.c - NILFS pathname lookup operations.
+ * NILFS pathname lookup operations.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>,
- * Ryusuke Konishi <ryusuke@osrg.net>
+ * Modified for NILFS by Amagai Yoshiji and Ryusuke Konishi.
*/
/*
* linux/fs/ext2/namei.c
@@ -49,11 +35,13 @@
static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = nilfs_add_link(dentry, inode);
+
if (!err) {
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
+ unlock_new_inode(inode);
iput(inode);
return err;
}
@@ -67,12 +55,25 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
ino_t ino;
+ int res;
if (dentry->d_name.len > NILFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ino = nilfs_inode_by_name(dir, &dentry->d_name);
- inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
+ res = nilfs_inode_by_name(dir, &dentry->d_name, &ino);
+ if (res) {
+ if (res != -ENOENT)
+ return ERR_PTR(res);
+ inode = NULL;
+ } else {
+ inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
+ if (inode == ERR_PTR(-ESTALE)) {
+ nilfs_error(dir->i_sb,
+ "deleted inode referenced: %lu", ino);
+ return ERR_PTR(-EIO);
+ }
+ }
+
return d_splice_alias(inode, dentry);
}
@@ -84,8 +85,8 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int nilfs_create(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
struct nilfs_transaction_info ti;
@@ -112,15 +113,13 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
static int
-nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+nilfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
struct nilfs_transaction_info ti;
int err;
- if (!new_valid_dev(rdev))
- return -EINVAL;
-
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
return err;
@@ -139,12 +138,12 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
return err;
}
-static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct nilfs_transaction_info ti;
struct super_block *sb = dir->i_sb;
- unsigned l = strlen(symname)+1;
+ unsigned int l = strlen(symname) + 1;
struct inode *inode;
int err;
@@ -155,13 +154,17 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
if (err)
return err;
- inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+ inode = nilfs_new_inode(dir, S_IFLNK | 0777);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out;
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
+ mapping_set_gfp_mask(inode->i_mapping,
+ mapping_gfp_constraint(inode->i_mapping,
+ ~__GFP_FS));
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
@@ -182,6 +185,7 @@ out:
out_fail:
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
+ unlock_new_inode(inode);
iput(inode);
goto out;
}
@@ -189,7 +193,7 @@ out_fail:
static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
- struct inode *inode = old_dentry->d_inode;
+ struct inode *inode = d_inode(old_dentry);
struct nilfs_transaction_info ti;
int err;
@@ -197,20 +201,25 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
if (err)
return err;
- inode->i_ctime = CURRENT_TIME;
+ inode_set_ctime_current(inode);
inode_inc_link_count(inode);
ihold(inode);
- err = nilfs_add_nondir(dentry, inode);
- if (!err)
+ err = nilfs_add_link(dentry, inode);
+ if (!err) {
+ d_instantiate(dentry, inode);
err = nilfs_transaction_commit(dir->i_sb);
- else
+ } else {
+ inode_dec_link_count(inode);
+ iput(inode);
nilfs_transaction_abort(dir->i_sb);
+ }
return err;
}
-static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static struct dentry *nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode;
struct nilfs_transaction_info ti;
@@ -218,7 +227,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
if (err)
- return err;
+ return ERR_PTR(err);
inc_nlink(dir);
@@ -242,19 +251,20 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
nilfs_mark_inode_dirty(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
else
nilfs_transaction_abort(dir->i_sb);
- return err;
+ return ERR_PTR(err);
out_fail:
drop_nlink(inode);
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
+ unlock_new_inode(inode);
iput(inode);
out_dir:
drop_nlink(dir);
@@ -266,30 +276,32 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
struct nilfs_dir_entry *de;
- struct page *page;
+ struct folio *folio;
int err;
- err = -ENOENT;
- de = nilfs_find_entry(dir, &dentry->d_name, &page);
- if (!de)
+ de = nilfs_find_entry(dir, &dentry->d_name, &folio);
+ if (IS_ERR(de)) {
+ err = PTR_ERR(de);
goto out;
+ }
- inode = dentry->d_inode;
+ inode = d_inode(dentry);
err = -EIO;
if (le64_to_cpu(de->inode) != inode->i_ino)
goto out;
if (!inode->i_nlink) {
- nilfs_warning(inode->i_sb, __func__,
- "deleting nonexistent file (%lu), %d\n",
- inode->i_ino, inode->i_nlink);
+ nilfs_warn(inode->i_sb,
+ "deleting nonexistent file (ino=%lu), %d",
+ inode->i_ino, inode->i_nlink);
set_nlink(inode, 1);
}
- err = nilfs_delete_entry(de, page);
+ err = nilfs_delete_entry(de, folio);
+ folio_release_kmap(folio, de);
if (err)
goto out;
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
drop_nlink(inode);
err = 0;
out:
@@ -309,7 +321,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
if (!err) {
nilfs_mark_inode_dirty(dir);
- nilfs_mark_inode_dirty(dentry->d_inode);
+ nilfs_mark_inode_dirty(d_inode(dentry));
err = nilfs_transaction_commit(dir->i_sb);
} else
nilfs_transaction_abort(dir->i_sb);
@@ -319,7 +331,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct nilfs_transaction_info ti;
int err;
@@ -346,50 +358,62 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
return err;
}
-static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+static int nilfs_rename(struct mnt_idmap *idmap,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
- struct inode *old_inode = old_dentry->d_inode;
- struct inode *new_inode = new_dentry->d_inode;
- struct page *dir_page = NULL;
+ struct inode *old_inode = d_inode(old_dentry);
+ struct inode *new_inode = d_inode(new_dentry);
+ struct folio *dir_folio = NULL;
struct nilfs_dir_entry *dir_de = NULL;
- struct page *old_page;
+ struct folio *old_folio;
struct nilfs_dir_entry *old_de;
struct nilfs_transaction_info ti;
+ bool old_is_dir = S_ISDIR(old_inode->i_mode);
int err;
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1);
if (unlikely(err))
return err;
- err = -ENOENT;
- old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page);
- if (!old_de)
+ old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio);
+ if (IS_ERR(old_de)) {
+ err = PTR_ERR(old_de);
goto out;
+ }
- if (S_ISDIR(old_inode->i_mode)) {
+ if (old_is_dir && old_dir != new_dir) {
err = -EIO;
- dir_de = nilfs_dotdot(old_inode, &dir_page);
+ dir_de = nilfs_dotdot(old_inode, &dir_folio);
if (!dir_de)
goto out_old;
}
if (new_inode) {
- struct page *new_page;
+ struct folio *new_folio;
struct nilfs_dir_entry *new_de;
err = -ENOTEMPTY;
- if (dir_de && !nilfs_empty_dir(new_inode))
+ if (old_is_dir && !nilfs_empty_dir(new_inode))
goto out_dir;
- err = -ENOENT;
- new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
- if (!new_de)
+ new_de = nilfs_find_entry(new_dir, &new_dentry->d_name,
+ &new_folio);
+ if (IS_ERR(new_de)) {
+ err = PTR_ERR(new_de);
+ goto out_dir;
+ }
+ err = nilfs_set_link(new_dir, new_de, new_folio, old_inode);
+ folio_release_kmap(new_folio, new_de);
+ if (unlikely(err))
goto out_dir;
- nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
- new_inode->i_ctime = CURRENT_TIME;
- if (dir_de)
+ inode_set_ctime_current(new_inode);
+ if (old_is_dir)
drop_nlink(new_inode);
drop_nlink(new_inode);
nilfs_mark_inode_dirty(new_inode);
@@ -397,7 +421,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
err = nilfs_add_link(new_dentry, old_inode);
if (err)
goto out_dir;
- if (dir_de) {
+ if (old_is_dir) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
}
@@ -407,30 +431,30 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = CURRENT_TIME;
-
- nilfs_delete_entry(old_de, old_page);
-
- if (dir_de) {
- nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
- drop_nlink(old_dir);
+ inode_set_ctime_current(old_inode);
+
+ err = nilfs_delete_entry(old_de, old_folio);
+ if (likely(!err)) {
+ if (old_is_dir) {
+ if (old_dir != new_dir)
+ err = nilfs_set_link(old_inode, dir_de,
+ dir_folio, new_dir);
+ drop_nlink(old_dir);
+ }
+ nilfs_mark_inode_dirty(old_dir);
}
- nilfs_mark_inode_dirty(old_dir);
nilfs_mark_inode_dirty(old_inode);
- err = nilfs_transaction_commit(old_dir->i_sb);
- return err;
-
out_dir:
- if (dir_de) {
- kunmap(dir_page);
- page_cache_release(dir_page);
- }
+ if (dir_de)
+ folio_release_kmap(dir_folio, dir_de);
out_old:
- kunmap(old_page);
- page_cache_release(old_page);
+ folio_release_kmap(old_folio, old_de);
out:
- nilfs_transaction_abort(old_dir->i_sb);
+ if (likely(!err))
+ err = nilfs_transaction_commit(old_dir->i_sb);
+ else
+ nilfs_transaction_abort(old_dir->i_sb);
return err;
}
@@ -439,22 +463,17 @@ out:
*/
static struct dentry *nilfs_get_parent(struct dentry *child)
{
- unsigned long ino;
- struct inode *inode;
- struct qstr dotdot = QSTR_INIT("..", 2);
+ ino_t ino;
+ int res;
struct nilfs_root *root;
- ino = nilfs_inode_by_name(child->d_inode, &dotdot);
- if (!ino)
- return ERR_PTR(-ENOENT);
+ res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino);
+ if (res)
+ return ERR_PTR(res);
- root = NILFS_I(child->d_inode)->i_root;
+ root = NILFS_I(d_inode(child))->i_root;
- inode = nilfs_iget(child->d_inode->i_sb, root, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- return d_obtain_alias(inode);
+ return d_obtain_alias(nilfs_iget(child->d_sb, root, ino));
}
static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno,
@@ -487,8 +506,7 @@ static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
{
struct nilfs_fid *fid = (struct nilfs_fid *)fh;
- if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE &&
- fh_len != NILFS_FID_SIZE_CONNECTABLE) ||
+ if (fh_len < NILFS_FID_SIZE_NON_CONNECTABLE ||
(fh_type != FILEID_NILFS_WITH_PARENT &&
fh_type != FILEID_NILFS_WITHOUT_PARENT))
return NULL;
@@ -501,7 +519,7 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh,
{
struct nilfs_fid *fid = (struct nilfs_fid *)fh;
- if (fh_len != NILFS_FID_SIZE_CONNECTABLE ||
+ if (fh_len < NILFS_FID_SIZE_CONNECTABLE ||
fh_type != FILEID_NILFS_WITH_PARENT)
return NULL;
@@ -554,6 +572,8 @@ const struct inode_operations nilfs_dir_inode_operations = {
.setattr = nilfs_setattr,
.permission = nilfs_permission,
.fiemap = nilfs_fiemap,
+ .fileattr_get = nilfs_fileattr_get,
+ .fileattr_set = nilfs_fileattr_set,
};
const struct inode_operations nilfs_special_inode_operations = {
@@ -562,9 +582,7 @@ const struct inode_operations nilfs_special_inode_operations = {
};
const struct inode_operations nilfs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.permission = nilfs_permission,
};
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..b7e3d91b6243 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * nilfs.h - NILFS local header file.
+ * NILFS local header file.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>
- * Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Koji Sato and Ryusuke Konishi.
*/
#ifndef _NILFS_H
@@ -28,20 +14,23 @@
#include <linux/buffer_head.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
-#include <linux/nilfs2_fs.h>
+#include <linux/fs_struct.h>
+#include <linux/nilfs2_api.h>
+#include <linux/nilfs2_ondisk.h>
#include "the_nilfs.h"
#include "bmap.h"
/**
* struct nilfs_inode_info - nilfs inode data in memory
* @i_flags: inode flags
+ * @i_type: inode type (combination of flags that inidicate usage)
* @i_state: dynamic state flags
* @i_bmap: pointer on i_bmap_data
* @i_bmap_data: raw block mapping
* @i_xattr: <TODO>
* @i_dir_start_lookup: page index of last successful search
* @i_cno: checkpoint number for GC inode
- * @i_btnode_cache: cached pages of b-tree nodes
+ * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer)
* @i_dirty: list for connecting dirty files
* @xattr_sem: semaphore for extended attributes processing
* @i_bh: buffer contains disk inode
@@ -50,13 +39,14 @@
*/
struct nilfs_inode_info {
__u32 i_flags;
+ unsigned int i_type;
unsigned long i_state; /* Dynamic state flags */
struct nilfs_bmap *i_bmap;
struct nilfs_bmap i_bmap_data;
__u64 i_xattr; /* sector_t ??? */
__u32 i_dir_start_lookup;
__u64 i_cno; /* check point number for GC inode */
- struct address_space i_btnode_cache;
+ struct inode *i_assoc_inode;
struct list_head i_dirty; /* List for connecting dirty files */
#ifdef CONFIG_NILFS_XATTR
@@ -69,8 +59,10 @@ struct nilfs_inode_info {
*/
struct rw_semaphore xattr_sem;
#endif
- struct buffer_head *i_bh; /* i_bh contains a new or dirty
- disk inode */
+ struct buffer_head *i_bh; /*
+ * i_bh contains a new or dirty
+ * disk inode.
+ */
struct nilfs_root *i_root;
struct inode vfs_inode;
};
@@ -86,13 +78,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap)
return container_of(bmap, struct nilfs_inode_info, i_bmap_data);
}
-static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
-{
- struct nilfs_inode_info *ii =
- container_of(btnc, struct nilfs_inode_info, i_btnode_cache);
- return &ii->vfs_inode;
-}
-
/*
* Dynamic state flags of NILFS on-memory inode (i_state)
*/
@@ -100,13 +85,24 @@ enum {
NILFS_I_NEW = 0, /* Inode is newly created */
NILFS_I_DIRTY, /* The file is dirty */
NILFS_I_QUEUED, /* inode is in dirty_files list */
- NILFS_I_BUSY, /* inode is grabbed by a segment
- constructor */
+ NILFS_I_BUSY, /*
+ * Inode is grabbed by a segment
+ * constructor
+ */
NILFS_I_COLLECTED, /* All dirty blocks are collected */
NILFS_I_UPDATED, /* The file has been written back */
- NILFS_I_INODE_DIRTY, /* write_inode is requested */
+ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */
NILFS_I_BMAP, /* has bmap and btnode_cache */
- NILFS_I_GCINODE, /* inode for GC, on memory only */
+};
+
+/*
+ * Flags to identify the usage of on-memory inodes (i_type)
+ */
+enum {
+ NILFS_I_TYPE_NORMAL = 0,
+ NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */
+ NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */
+ NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */
};
/*
@@ -117,23 +113,40 @@ enum {
NILFS_SB_COMMIT_ALL /* Commit both super blocks */
};
+/**
+ * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a
+ * file system volume name
+ *
+ * Defined by the size of the volume name field in the on-disk superblocks.
+ * This volume name does not include the terminating NULL byte if the string
+ * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the
+ * size of the buffer that requires a NULL byte termination.
+ */
+#define NILFS_MAX_VOLUME_NAME \
+ sizeof_field(struct nilfs_super_block, s_volume_name)
+
/*
* Macros to check inode numbers
*/
-#define NILFS_MDT_INO_BITS \
- ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \
- 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \
- 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO))
+#define NILFS_MDT_INO_BITS \
+ (BIT(NILFS_DAT_INO) | BIT(NILFS_CPFILE_INO) | \
+ BIT(NILFS_SUFILE_INO) | BIT(NILFS_IFILE_INO) | \
+ BIT(NILFS_ATIME_INO) | BIT(NILFS_SKETCH_INO))
-#define NILFS_SYS_INO_BITS \
- ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
+#define NILFS_SYS_INO_BITS (BIT(NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
#define NILFS_MDT_INODE(sb, ino) \
- ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
+ ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino)))
#define NILFS_VALID_INODE(sb, ino) \
- ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino))))
+ ((ino) >= NILFS_FIRST_INO(sb) || \
+ ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino))))
+
+#define NILFS_PRIVATE_INODE(ino) ({ \
+ ino_t __ino = (ino); \
+ ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \
+ (__ino) != NILFS_SKETCH_INO); })
/**
* struct nilfs_transaction_info: context information for synchronization
@@ -141,16 +154,16 @@ enum {
* @ti_save: Backup of journal_info field of task_struct
* @ti_flags: Flags
* @ti_count: Nest level
- * @ti_garbage: List of inode to be put when releasing semaphore
*/
struct nilfs_transaction_info {
u32 ti_magic;
void *ti_save;
- /* This should never used. If this happens,
- one of other filesystems has a bug. */
+ /*
+ * This should never be used. If it happens,
+ * one of other filesystems has a bug.
+ */
unsigned short ti_flags;
unsigned short ti_count;
- struct list_head ti_garbage;
};
/* ti_magic */
@@ -158,8 +171,10 @@ struct nilfs_transaction_info {
/* ti_flags */
#define NILFS_TI_DYNAMIC_ALLOC 0x0001 /* Allocated from slab */
-#define NILFS_TI_SYNC 0x0002 /* Force to construct segment at the
- end of transaction. */
+#define NILFS_TI_SYNC 0x0002 /*
+ * Force to construct segment at the
+ * end of transaction.
+ */
#define NILFS_TI_GC 0x0004 /* GC context */
#define NILFS_TI_COMMIT 0x0008 /* Change happened or not */
#define NILFS_TI_WRITER 0x0010 /* Constructor context */
@@ -211,6 +226,9 @@ static inline int nilfs_acl_chmod(struct inode *inode)
static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
{
+ if (S_ISLNK(inode->i_mode))
+ return 0;
+
inode->i_mode &= ~current_umask();
return 0;
}
@@ -236,21 +254,24 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags)
}
/* dir.c */
-extern int nilfs_add_link(struct dentry *, struct inode *);
-extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
-extern int nilfs_make_empty(struct inode *, struct inode *);
-extern struct nilfs_dir_entry *
-nilfs_find_entry(struct inode *, const struct qstr *, struct page **);
-extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *);
-extern int nilfs_empty_dir(struct inode *);
-extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **);
-extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
- struct page *, struct inode *);
+int nilfs_add_link(struct dentry *, struct inode *);
+int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino);
+int nilfs_make_empty(struct inode *, struct inode *);
+struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *,
+ struct folio **);
+int nilfs_delete_entry(struct nilfs_dir_entry *, struct folio *);
+int nilfs_empty_dir(struct inode *);
+struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct folio **);
+int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
+ struct folio *folio, struct inode *inode);
/* file.c */
extern int nilfs_sync_file(struct file *, loff_t, loff_t, int);
/* ioctl.c */
+int nilfs_fileattr_get(struct dentry *dentry, struct file_kattr *m);
+int nilfs_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa);
long nilfs_ioctl(struct file *, unsigned int, unsigned long);
long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
@@ -260,11 +281,11 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
void nilfs_inode_add_blocks(struct inode *inode, int n);
void nilfs_inode_sub_blocks(struct inode *inode, int n);
extern struct inode *nilfs_new_inode(struct inode *, umode_t);
-extern void nilfs_free_inode(struct inode *);
extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern void nilfs_set_inode_flags(struct inode *);
extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *);
-extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
+void nilfs_write_inode_common(struct inode *inode,
+ struct nilfs_inode *raw_inode);
struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
unsigned long ino);
struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
@@ -273,31 +294,77 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
unsigned long ino);
extern struct inode *nilfs_iget_for_gc(struct super_block *sb,
unsigned long ino, __u64 cno);
-extern void nilfs_update_inode(struct inode *, struct buffer_head *);
+int nilfs_attach_btree_node_cache(struct inode *inode);
+void nilfs_detach_btree_node_cache(struct inode *inode);
+struct inode *nilfs_iget_for_shadow(struct inode *inode);
+extern void nilfs_update_inode(struct inode *, struct buffer_head *, int);
extern void nilfs_truncate(struct inode *);
extern void nilfs_evict_inode(struct inode *);
-extern int nilfs_setattr(struct dentry *, struct iattr *);
+extern int nilfs_setattr(struct mnt_idmap *, struct dentry *,
+ struct iattr *);
extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
-int nilfs_permission(struct inode *inode, int mask);
+int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
+ int mask);
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
extern int nilfs_inode_dirty(struct inode *);
-int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
-extern int nilfs_mark_inode_dirty(struct inode *);
+int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty);
+extern int __nilfs_mark_inode_dirty(struct inode *, int);
extern void nilfs_dirty_inode(struct inode *, int flags);
int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
+static inline int nilfs_mark_inode_dirty(struct inode *inode)
+{
+ return __nilfs_mark_inode_dirty(inode, I_DIRTY);
+}
+static inline int nilfs_mark_inode_dirty_sync(struct inode *inode)
+{
+ return __nilfs_mark_inode_dirty(inode, I_DIRTY_SYNC);
+}
/* super.c */
extern struct inode *nilfs_alloc_inode(struct super_block *);
-extern void nilfs_destroy_inode(struct inode *);
-extern __printf(3, 4)
-void nilfs_error(struct super_block *, const char *, const char *, ...);
+
+__printf(2, 3)
+void __nilfs_msg(struct super_block *sb, const char *fmt, ...);
extern __printf(3, 4)
-void nilfs_warning(struct super_block *, const char *, const char *, ...);
+void __nilfs_error(struct super_block *sb, const char *function,
+ const char *fmt, ...);
+
+#ifdef CONFIG_PRINTK
+
+#define nilfs_msg(sb, level, fmt, ...) \
+ __nilfs_msg(sb, level fmt, ##__VA_ARGS__)
+#define nilfs_error(sb, fmt, ...) \
+ __nilfs_error(sb, __func__, fmt, ##__VA_ARGS__)
+
+#else
+
+#define nilfs_msg(sb, level, fmt, ...) \
+ do { \
+ no_printk(level fmt, ##__VA_ARGS__); \
+ (void)(sb); \
+ } while (0)
+#define nilfs_error(sb, fmt, ...) \
+ do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __nilfs_error(sb, "", " "); \
+ } while (0)
+
+#endif /* CONFIG_PRINTK */
+
+#define nilfs_crit(sb, fmt, ...) \
+ nilfs_msg(sb, KERN_CRIT, fmt, ##__VA_ARGS__)
+#define nilfs_err(sb, fmt, ...) \
+ nilfs_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
+#define nilfs_warn(sb, fmt, ...) \
+ nilfs_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define nilfs_info(sb, fmt, ...) \
+ nilfs_msg(sb, KERN_INFO, fmt, ##__VA_ARGS__)
+
extern struct nilfs_super_block *
nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
-extern int nilfs_store_magic_and_option(struct super_block *,
- struct nilfs_super_block *, char *);
+extern int nilfs_store_magic(struct super_block *sb,
+ struct nilfs_super_block *sbp);
extern int nilfs_check_feature_compatibility(struct super_block *,
struct nilfs_super_block *);
extern void nilfs_set_log_cursor(struct nilfs_super_block *,
@@ -320,6 +387,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
int nilfs_init_gcinode(struct inode *inode);
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
+/* sysfs.c */
+int __init nilfs_sysfs_init(void);
+void nilfs_sysfs_exit(void);
+int nilfs_sysfs_create_device_group(struct super_block *);
+void nilfs_sysfs_delete_device_group(struct the_nilfs *);
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
+
/*
* Inodes and files operations
*/
@@ -327,6 +402,7 @@ extern const struct file_operations nilfs_dir_operations;
extern const struct inode_operations nilfs_file_inode_operations;
extern const struct file_operations nilfs_file_operations;
extern const struct address_space_operations nilfs_aops;
+extern const struct address_space_operations nilfs_buffer_cache_aops;
extern const struct inode_operations nilfs_dir_inode_operations;
extern const struct inode_operations nilfs_special_inode_operations;
extern const struct inode_operations nilfs_symlink_inode_operations;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0ba679866e50..56c4da417b6a 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -1,24 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * page.c - buffer/page management specific to NILFS
+ * Buffer/page management specific to NILFS
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>,
- * Seiji Kihara <kihara@osrg.net>.
+ * Written by Ryusuke Konishi and Seiji Kihara.
*/
#include <linux/pagemap.h>
@@ -35,25 +21,24 @@
#include "mdt.h"
-#define NILFS_BUFFER_INHERENT_BITS \
- ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
- (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
+#define NILFS_BUFFER_INHERENT_BITS \
+ (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \
+ BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked))
-static struct buffer_head *
-__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
- int blkbits, unsigned long b_state)
+static struct buffer_head *__nilfs_get_folio_block(struct folio *folio,
+ unsigned long block, pgoff_t index, int blkbits,
+ unsigned long b_state)
{
unsigned long first_block;
- struct buffer_head *bh;
+ struct buffer_head *bh = folio_buffers(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << blkbits, b_state);
+ if (!bh)
+ bh = create_empty_buffers(folio, 1 << blkbits, b_state);
- first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
- bh = nilfs_page_get_nth_block(page, block - first_block);
+ first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
+ bh = get_nth_bh(bh, block - first_block);
- touch_buffer(bh);
wait_on_buffer(bh);
return bh;
}
@@ -64,45 +49,45 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
unsigned long b_state)
{
int blkbits = inode->i_blkbits;
- pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
- struct page *page;
+ pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
+ struct folio *folio;
struct buffer_head *bh;
- page = grab_cache_page(mapping, index);
- if (unlikely(!page))
+ folio = filemap_grab_folio(mapping, index);
+ if (IS_ERR(folio))
return NULL;
- bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
+ bh = __nilfs_get_folio_block(folio, blkoff, index, blkbits, b_state);
if (unlikely(!bh)) {
- unlock_page(page);
- page_cache_release(page);
+ folio_unlock(folio);
+ folio_put(folio);
return NULL;
}
+ bh->b_bdev = inode->i_sb->s_bdev;
return bh;
}
/**
* nilfs_forget_buffer - discard dirty state
- * @inode: owner inode of the buffer
* @bh: buffer head of the buffer to be discarded
*/
void nilfs_forget_buffer(struct buffer_head *bh)
{
- struct page *page = bh->b_page;
+ struct folio *folio = bh->b_folio;
+ const unsigned long clear_bits =
+ (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
+ BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
+ BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
+ BIT(BH_Delay));
lock_buffer(bh);
- clear_buffer_nilfs_volatile(bh);
- clear_buffer_nilfs_checked(bh);
- clear_buffer_nilfs_redirected(bh);
- clear_buffer_dirty(bh);
- if (nilfs_page_buffers_clean(page))
- __nilfs_clear_page_dirty(page);
-
- clear_buffer_uptodate(bh);
- clear_buffer_mapped(bh);
+ set_mask_bits(&bh->b_state, clear_bits, 0);
+ if (nilfs_folio_buffers_clean(folio))
+ __nilfs_clear_folio_dirty(folio);
+
bh->b_blocknr = -1;
- ClearPageUptodate(page);
- ClearPageMappedToDisk(page);
+ folio_clear_uptodate(folio);
+ folio_clear_mappedtodisk(folio);
unlock_buffer(bh);
brelse(bh);
}
@@ -114,81 +99,81 @@ void nilfs_forget_buffer(struct buffer_head *bh)
*/
void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
{
- void *kaddr0, *kaddr1;
+ void *saddr, *daddr;
unsigned long bits;
- struct page *spage = sbh->b_page, *dpage = dbh->b_page;
+ struct folio *sfolio = sbh->b_folio, *dfolio = dbh->b_folio;
struct buffer_head *bh;
- kaddr0 = kmap_atomic(spage);
- kaddr1 = kmap_atomic(dpage);
- memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
- kunmap_atomic(kaddr1);
- kunmap_atomic(kaddr0);
+ saddr = kmap_local_folio(sfolio, bh_offset(sbh));
+ daddr = kmap_local_folio(dfolio, bh_offset(dbh));
+ memcpy(daddr, saddr, sbh->b_size);
+ kunmap_local(daddr);
+ kunmap_local(saddr);
dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
dbh->b_blocknr = sbh->b_blocknr;
dbh->b_bdev = sbh->b_bdev;
bh = dbh;
- bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
+ bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped));
while ((bh = bh->b_this_page) != dbh) {
lock_buffer(bh);
bits &= bh->b_state;
unlock_buffer(bh);
}
- if (bits & (1UL << BH_Uptodate))
- SetPageUptodate(dpage);
+ if (bits & BIT(BH_Uptodate))
+ folio_mark_uptodate(dfolio);
else
- ClearPageUptodate(dpage);
- if (bits & (1UL << BH_Mapped))
- SetPageMappedToDisk(dpage);
+ folio_clear_uptodate(dfolio);
+ if (bits & BIT(BH_Mapped))
+ folio_set_mappedtodisk(dfolio);
else
- ClearPageMappedToDisk(dpage);
+ folio_clear_mappedtodisk(dfolio);
}
/**
- * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
- * @page: page to be checked
+ * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not.
+ * @folio: Folio to be checked.
*
- * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
- * Otherwise, it returns non-zero value.
+ * Return: false if the folio has dirty buffers, true otherwise.
*/
-int nilfs_page_buffers_clean(struct page *page)
+bool nilfs_folio_buffers_clean(struct folio *folio)
{
struct buffer_head *bh, *head;
- bh = head = page_buffers(page);
+ bh = head = folio_buffers(folio);
do {
if (buffer_dirty(bh))
- return 0;
+ return false;
bh = bh->b_this_page;
} while (bh != head);
- return 1;
+ return true;
}
-void nilfs_page_bug(struct page *page)
+void nilfs_folio_bug(struct folio *folio)
{
+ struct buffer_head *bh, *head;
struct address_space *m;
unsigned long ino;
- if (unlikely(!page)) {
- printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
+ if (unlikely(!folio)) {
+ printk(KERN_CRIT "NILFS_FOLIO_BUG(NULL)\n");
return;
}
- m = page->mapping;
+ m = folio->mapping;
ino = m ? m->host->i_ino : 0;
- printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
+ printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
"mapping=%p ino=%lu\n",
- page, atomic_read(&page->_count),
- (unsigned long long)page->index, page->flags, m, ino);
+ folio, folio_ref_count(folio),
+ (unsigned long long)folio->index, folio->flags.f, m, ino);
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
+ head = folio_buffers(folio);
+ if (head) {
int i = 0;
- bh = head = page_buffers(page);
+ bh = head;
do {
printk(KERN_CRIT
" BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
@@ -200,30 +185,32 @@ void nilfs_page_bug(struct page *page)
}
/**
- * nilfs_copy_page -- copy the page with buffers
- * @dst: destination page
- * @src: source page
- * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
+ * nilfs_copy_folio -- copy the folio with buffers
+ * @dst: destination folio
+ * @src: source folio
+ * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads.
*
- * This function is for both data pages and btnode pages. The dirty flag
- * should be treated by caller. The page must not be under i/o.
- * Both src and dst page must be locked
+ * This function is for both data folios and btnode folios. The dirty flag
+ * should be treated by caller. The folio must not be under i/o.
+ * Both src and dst folio must be locked
*/
-static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
+static void nilfs_copy_folio(struct folio *dst, struct folio *src,
+ bool copy_dirty)
{
- struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
+ struct buffer_head *dbh, *dbufs, *sbh;
unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
- BUG_ON(PageWriteback(dst));
+ BUG_ON(folio_test_writeback(dst));
- sbh = sbufs = page_buffers(src);
- if (!page_has_buffers(dst))
- create_empty_buffers(dst, sbh->b_size, 0);
+ sbh = folio_buffers(src);
+ dbh = folio_buffers(dst);
+ if (!dbh)
+ dbh = create_empty_buffers(dst, sbh->b_size, 0);
if (copy_dirty)
- mask |= (1UL << BH_Dirty);
+ mask |= BIT(BH_Dirty);
- dbh = dbufs = page_buffers(dst);
+ dbufs = dbh;
do {
lock_buffer(sbh);
lock_buffer(dbh);
@@ -234,16 +221,16 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
dbh = dbh->b_this_page;
} while (dbh != dbufs);
- copy_highpage(dst, src);
+ folio_copy(dst, src);
- if (PageUptodate(src) && !PageUptodate(dst))
- SetPageUptodate(dst);
- else if (!PageUptodate(src) && PageUptodate(dst))
- ClearPageUptodate(dst);
- if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
- SetPageMappedToDisk(dst);
- else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
- ClearPageMappedToDisk(dst);
+ if (folio_test_uptodate(src) && !folio_test_uptodate(dst))
+ folio_mark_uptodate(dst);
+ else if (!folio_test_uptodate(src) && folio_test_uptodate(dst))
+ folio_clear_uptodate(dst);
+ if (folio_test_mappedtodisk(src) && !folio_test_mappedtodisk(dst))
+ folio_set_mappedtodisk(dst);
+ else if (!folio_test_mappedtodisk(src) && folio_test_mappedtodisk(dst))
+ folio_clear_mappedtodisk(dst);
do {
unlock_buffer(sbh);
@@ -256,43 +243,43 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
int nilfs_copy_dirty_pages(struct address_space *dmap,
struct address_space *smap)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
unsigned int i;
pgoff_t index = 0;
int err = 0;
- pagevec_init(&pvec, 0);
+ folio_batch_init(&fbatch);
repeat:
- if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE))
+ if (!filemap_get_folios_tag(smap, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch))
return 0;
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i], *dpage;
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i], *dfolio;
- lock_page(page);
- if (unlikely(!PageDirty(page)))
- NILFS_PAGE_BUG(page, "inconsistent dirty state");
+ folio_lock(folio);
+ if (unlikely(!folio_test_dirty(folio)))
+ NILFS_FOLIO_BUG(folio, "inconsistent dirty state");
- dpage = grab_cache_page(dmap, page->index);
- if (unlikely(!dpage)) {
+ dfolio = filemap_grab_folio(dmap, folio->index);
+ if (IS_ERR(dfolio)) {
/* No empty page is added to the page cache */
- err = -ENOMEM;
- unlock_page(page);
+ folio_unlock(folio);
+ err = PTR_ERR(dfolio);
break;
}
- if (unlikely(!page_has_buffers(page)))
- NILFS_PAGE_BUG(page,
+ if (unlikely(!folio_buffers(folio)))
+ NILFS_FOLIO_BUG(folio,
"found empty page in dat page cache");
- nilfs_copy_page(dpage, page, 1);
- __set_page_dirty_nobuffers(dpage);
+ nilfs_copy_folio(dfolio, folio, true);
+ filemap_dirty_folio(folio_mapping(dfolio), dfolio);
- unlock_page(dpage);
- page_cache_release(dpage);
- unlock_page(page);
+ folio_unlock(dfolio);
+ folio_put(dfolio);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
if (likely(!err))
@@ -305,66 +292,63 @@ repeat:
* @dmap: destination page cache
* @smap: source page cache
*
- * No pages must no be added to the cache during this process.
+ * No pages must be added to the cache during this process.
* This must be ensured by the caller.
*/
void nilfs_copy_back_pages(struct address_space *dmap,
struct address_space *smap)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
unsigned int i, n;
- pgoff_t index = 0;
- int err;
+ pgoff_t start = 0;
- pagevec_init(&pvec, 0);
+ folio_batch_init(&fbatch);
repeat:
- n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
+ n = filemap_get_folios(smap, &start, ~0UL, &fbatch);
if (!n)
return;
- index = pvec.pages[n - 1]->index + 1;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i], *dpage;
- pgoff_t offset = page->index;
-
- lock_page(page);
- dpage = find_lock_page(dmap, offset);
- if (dpage) {
- /* override existing page on the destination cache */
- WARN_ON(PageDirty(dpage));
- nilfs_copy_page(dpage, page, 0);
- unlock_page(dpage);
- page_cache_release(dpage);
- } else {
- struct page *page2;
- /* move the page to the destination cache */
- spin_lock_irq(&smap->tree_lock);
- page2 = radix_tree_delete(&smap->page_tree, offset);
- WARN_ON(page2 != page);
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i], *dfolio;
+ pgoff_t index = folio->index;
+
+ folio_lock(folio);
+ dfolio = filemap_lock_folio(dmap, index);
+ if (!IS_ERR(dfolio)) {
+ /* overwrite existing folio in the destination cache */
+ WARN_ON(folio_test_dirty(dfolio));
+ nilfs_copy_folio(dfolio, folio, false);
+ folio_unlock(dfolio);
+ folio_put(dfolio);
+ /* Do we not need to remove folio from smap here? */
+ } else {
+ struct folio *f;
+ /* move the folio to the destination cache */
+ xa_lock_irq(&smap->i_pages);
+ f = __xa_erase(&smap->i_pages, index);
+ WARN_ON(folio != f);
smap->nrpages--;
- spin_unlock_irq(&smap->tree_lock);
-
- spin_lock_irq(&dmap->tree_lock);
- err = radix_tree_insert(&dmap->page_tree, offset, page);
- if (unlikely(err < 0)) {
- WARN_ON(err == -EEXIST);
- page->mapping = NULL;
- page_cache_release(page); /* for cache */
+ xa_unlock_irq(&smap->i_pages);
+
+ xa_lock_irq(&dmap->i_pages);
+ f = __xa_store(&dmap->i_pages, index, folio, GFP_NOFS);
+ if (unlikely(f)) {
+ /* Probably -ENOMEM */
+ folio->mapping = NULL;
+ folio_put(folio);
} else {
- page->mapping = dmap;
+ folio->mapping = dmap;
dmap->nrpages++;
- if (PageDirty(page))
- radix_tree_tag_set(&dmap->page_tree,
- offset,
- PAGECACHE_TAG_DIRTY);
+ if (folio_test_dirty(folio))
+ __xa_set_mark(&dmap->i_pages, index,
+ PAGECACHE_TAG_DIRTY);
}
- spin_unlock_irq(&dmap->tree_lock);
+ xa_unlock_irq(&dmap->i_pages);
}
- unlock_page(page);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
goto repeat;
@@ -373,83 +357,101 @@ repeat:
/**
* nilfs_clear_dirty_pages - discard dirty pages in address space
* @mapping: address space with dirty pages for discarding
- * @silent: suppress [true] or print [false] warning messages
*/
-void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
+void nilfs_clear_dirty_pages(struct address_space *mapping)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
unsigned int i;
pgoff_t index = 0;
- pagevec_init(&pvec, 0);
+ folio_batch_init(&fbatch);
+
+ while (filemap_get_folios_tag(mapping, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+
+ folio_lock(folio);
- while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
+ /*
+ * This folio may have been removed from the address
+ * space by truncation or invalidation when the lock
+ * was acquired. Skip processing in that case.
+ */
+ if (likely(folio->mapping == mapping))
+ nilfs_clear_folio_dirty(folio);
- lock_page(page);
- nilfs_clear_dirty_page(page, silent);
- unlock_page(page);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
/**
- * nilfs_clear_dirty_page - discard dirty page
- * @page: dirty page that will be discarded
- * @silent: suppress [true] or print [false] warning messages
+ * nilfs_clear_folio_dirty - discard dirty folio
+ * @folio: dirty folio that will be discarded
+ *
+ * nilfs_clear_folio_dirty() clears working states including dirty state for
+ * the folio and its buffers. If the folio has buffers, clear only if it is
+ * confirmed that none of the buffer heads are busy (none have valid
+ * references and none are locked).
*/
-void nilfs_clear_dirty_page(struct page *page, bool silent)
+void nilfs_clear_folio_dirty(struct folio *folio)
{
- struct inode *inode = page->mapping->host;
- struct super_block *sb = inode->i_sb;
+ struct buffer_head *bh, *head;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
- if (!silent) {
- nilfs_warning(sb, __func__,
- "discard page: offset %lld, ino %lu",
- page_offset(page), inode->i_ino);
- }
+ head = folio_buffers(folio);
+ if (head) {
+ const unsigned long clear_bits =
+ (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
+ BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
+ BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
+ BIT(BH_Delay));
+ bool busy, invalidated = false;
- ClearPageUptodate(page);
- ClearPageMappedToDisk(page);
+recheck_buffers:
+ busy = false;
+ bh = head;
+ do {
+ if (atomic_read(&bh->b_count) | buffer_locked(bh)) {
+ busy = true;
+ break;
+ }
+ } while (bh = bh->b_this_page, bh != head);
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
+ if (busy) {
+ if (invalidated)
+ return;
+ invalidate_bh_lrus();
+ invalidated = true;
+ goto recheck_buffers;
+ }
- bh = head = page_buffers(page);
+ bh = head;
do {
lock_buffer(bh);
- if (!silent) {
- nilfs_warning(sb, __func__,
- "discard block %llu, size %zu",
- (u64)bh->b_blocknr, bh->b_size);
- }
- clear_buffer_dirty(bh);
- clear_buffer_nilfs_volatile(bh);
- clear_buffer_nilfs_checked(bh);
- clear_buffer_nilfs_redirected(bh);
- clear_buffer_uptodate(bh);
- clear_buffer_mapped(bh);
+ set_mask_bits(&bh->b_state, clear_bits, 0);
unlock_buffer(bh);
} while (bh = bh->b_this_page, bh != head);
}
- __nilfs_clear_page_dirty(page);
+ folio_clear_uptodate(folio);
+ folio_clear_mappedtodisk(folio);
+ folio_clear_checked(folio);
+ __nilfs_clear_folio_dirty(folio);
}
-unsigned nilfs_page_count_clean_buffers(struct page *page,
- unsigned from, unsigned to)
+unsigned int nilfs_page_count_clean_buffers(struct folio *folio,
+ unsigned int from, unsigned int to)
{
- unsigned block_start, block_end;
+ unsigned int block_start, block_end;
struct buffer_head *bh, *head;
- unsigned nc = 0;
+ unsigned int nc = 0;
- for (bh = head = page_buffers(page), block_start = 0;
+ for (bh = head = folio_buffers(folio), block_start = 0;
bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + bh->b_size;
@@ -459,45 +461,33 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}
-void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
- struct backing_dev_info *bdi)
-{
- mapping->host = inode;
- mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_NOFS);
- mapping->private_data = NULL;
- mapping->backing_dev_info = bdi;
- mapping->a_ops = &empty_aops;
-}
-
/*
* NILFS2 needs clear_page_dirty() in the following two cases:
*
- * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
- * page dirty flags when it copies back pages from the shadow cache
- * (gcdat->{i_mapping,i_btnode_cache}) to its original cache
- * (dat->{i_mapping,i_btnode_cache}).
+ * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty
+ * flag of pages when it copies back pages from shadow cache to the
+ * original cache.
*
* 2) Some B-tree operations like insertion or deletion may dispose buffers
* in dirty state, and this needs to cancel the dirty state of their pages.
*/
-int __nilfs_clear_page_dirty(struct page *page)
+void __nilfs_clear_folio_dirty(struct folio *folio)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
if (mapping) {
- spin_lock_irq(&mapping->tree_lock);
- if (test_bit(PG_dirty, &page->flags)) {
- radix_tree_tag_clear(&mapping->page_tree,
- page_index(page),
+ xa_lock_irq(&mapping->i_pages);
+ if (folio_test_dirty(folio)) {
+ __xa_clear_mark(&mapping->i_pages, folio->index,
PAGECACHE_TAG_DIRTY);
- spin_unlock_irq(&mapping->tree_lock);
- return clear_page_dirty_for_io(page);
+ xa_unlock_irq(&mapping->i_pages);
+ folio_clear_dirty_for_io(folio);
+ return;
}
- spin_unlock_irq(&mapping->tree_lock);
- return 0;
+ xa_unlock_irq(&mapping->i_pages);
+ return;
}
- return TestClearPageDirty(page);
+ folio_clear_dirty(folio);
}
/**
@@ -509,48 +499,44 @@ int __nilfs_clear_page_dirty(struct page *page)
* This function searches an extent of buffers marked "delayed" which
* starts from a block offset equal to or larger than @start_blk. If
* such an extent was found, this will store the start offset in
- * @blkoff and return its length in blocks. Otherwise, zero is
- * returned.
+ * @blkoff and return its length in blocks.
+ *
+ * Return: Length in blocks of found extent, 0 otherwise.
*/
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
sector_t *blkoff)
{
- unsigned int i;
+ unsigned int i, nr_folios;
pgoff_t index;
- unsigned int nblocks_in_page;
unsigned long length = 0;
- sector_t b;
- struct pagevec pvec;
- struct page *page;
+ struct folio_batch fbatch;
+ struct folio *folio;
if (inode->i_mapping->nrpages == 0)
return 0;
- index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
- pagevec_init(&pvec, 0);
+ folio_batch_init(&fbatch);
repeat:
- pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
- pvec.pages);
- if (pvec.nr == 0)
+ nr_folios = filemap_get_folios_contig(inode->i_mapping, &index, ULONG_MAX,
+ &fbatch);
+ if (nr_folios == 0)
return length;
- if (length > 0 && pvec.pages[0]->index > index)
- goto out;
-
- b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
i = 0;
do {
- page = pvec.pages[i];
+ folio = fbatch.folios[i];
- lock_page(page);
- if (page_has_buffers(page)) {
+ folio_lock(folio);
+ if (folio_buffers(folio)) {
struct buffer_head *bh, *head;
+ sector_t b;
- bh = head = page_buffers(page);
+ b = folio->index << (PAGE_SHIFT - inode->i_blkbits);
+ bh = head = folio_buffers(folio);
do {
if (b < start_blk)
continue;
@@ -565,21 +551,17 @@ repeat:
} else {
if (length > 0)
goto out_locked;
-
- b += nblocks_in_page;
}
- unlock_page(page);
+ folio_unlock(folio);
- } while (++i < pagevec_count(&pvec));
+ } while (++i < nr_folios);
- index = page->index + 1;
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
goto repeat;
out_locked:
- unlock_page(page);
-out:
- pagevec_release(&pvec);
+ folio_unlock(folio);
+ folio_batch_release(&fbatch);
return length;
}
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index ef30c5c2426f..136cd1c143c9 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * page.h - buffer/page management specific to NILFS
+ * Buffer/page management specific to NILFS
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>,
- * Seiji Kihara <kihara@osrg.net>.
+ * Written by Ryusuke Konishi and Seiji Kihara.
*/
#ifndef _NILFS_PAGE_H
@@ -44,38 +30,26 @@ BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
-int __nilfs_clear_page_dirty(struct page *);
+void __nilfs_clear_folio_dirty(struct folio *);
struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
unsigned long, unsigned long);
void nilfs_forget_buffer(struct buffer_head *);
void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
-int nilfs_page_buffers_clean(struct page *);
-void nilfs_page_bug(struct page *);
+bool nilfs_folio_buffers_clean(struct folio *);
+void nilfs_folio_bug(struct folio *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
-void nilfs_clear_dirty_page(struct page *, bool);
-void nilfs_clear_dirty_pages(struct address_space *, bool);
-void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
- struct backing_dev_info *bdi);
-unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
+void nilfs_clear_folio_dirty(struct folio *folio);
+void nilfs_clear_dirty_pages(struct address_space *mapping);
+unsigned int nilfs_page_count_clean_buffers(struct folio *folio,
+ unsigned int from, unsigned int to);
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
sector_t *blkoff);
-#define NILFS_PAGE_BUG(page, m, a...) \
- do { nilfs_page_bug(page); BUG(); } while (0)
-
-static inline struct buffer_head *
-nilfs_page_get_nth_block(struct page *page, unsigned int count)
-{
- struct buffer_head *bh = page_buffers(page);
-
- while (count-- > 0)
- bh = bh->b_this_page;
- get_bh(bh);
- return bh;
-}
+#define NILFS_FOLIO_BUG(folio, m, a...) \
+ do { nilfs_folio_bug(folio); BUG(); } while (0)
#endif /* _NILFS_PAGE_H */
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ff00a0b7acb9..a9c61d0492cb 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * recovery.c - NILFS recovery logic
+ * NILFS recovery logic
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*/
#include <linux/buffer_head.h>
@@ -47,8 +34,10 @@ enum {
/* work structure for recovery */
struct nilfs_recovery_block {
- ino_t ino; /* Inode number of the file that this block
- belongs to */
+ ino_t ino; /*
+ * Inode number of the file that this block
+ * belongs to
+ */
sector_t blocknr; /* block number */
__u64 vblocknr; /* virtual block number */
unsigned long blkoff; /* File offset of the data block (per block) */
@@ -56,38 +45,37 @@ struct nilfs_recovery_block {
};
-static int nilfs_warn_segment_error(int err)
+static int nilfs_warn_segment_error(struct super_block *sb, int err)
{
+ const char *msg = NULL;
+
switch (err) {
case NILFS_SEG_FAIL_IO:
- printk(KERN_WARNING
- "NILFS warning: I/O error on loading last segment\n");
+ nilfs_err(sb, "I/O error reading segment");
return -EIO;
case NILFS_SEG_FAIL_MAGIC:
- printk(KERN_WARNING
- "NILFS warning: Segment magic number invalid\n");
+ msg = "Magic number mismatch";
break;
case NILFS_SEG_FAIL_SEQ:
- printk(KERN_WARNING
- "NILFS warning: Sequence number mismatch\n");
+ msg = "Sequence number mismatch";
break;
case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
- printk(KERN_WARNING
- "NILFS warning: Checksum error in super root\n");
+ msg = "Checksum error in super root";
break;
case NILFS_SEG_FAIL_CHECKSUM_FULL:
- printk(KERN_WARNING
- "NILFS warning: Checksum error in segment payload\n");
+ msg = "Checksum error in segment payload";
break;
case NILFS_SEG_FAIL_CONSISTENCY:
- printk(KERN_WARNING
- "NILFS warning: Inconsistent segment\n");
+ msg = "Inconsistency found";
break;
case NILFS_SEG_NO_SUPER_ROOT:
- printk(KERN_WARNING
- "NILFS warning: No super root in the last segment\n");
+ msg = "No super root in the last segment";
break;
+ default:
+ nilfs_err(sb, "unrecognized segment error %d", err);
+ return -EINVAL;
}
+ nilfs_warn(sb, "invalid segment: %s", msg);
return -EINVAL;
}
@@ -100,6 +88,8 @@ static int nilfs_warn_segment_error(int err)
* @check_bytes: number of bytes to be checked
* @start: DBN of start block
* @nblock: number of blocks to be checked
+ *
+ * Return: 0 on success, or %-EIO if an I/O error occurs.
*/
static int nilfs_compute_checksum(struct the_nilfs *nilfs,
struct buffer_head *bhs, u32 *sum,
@@ -138,6 +128,11 @@ static int nilfs_compute_checksum(struct the_nilfs *nilfs,
* @sr_block: disk block number of the super root block
* @pbh: address of a buffer_head pointer to return super root buffer
* @check: CRC check flag
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Super root block corrupted.
+ * * %-EIO - I/O error.
*/
int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
struct buffer_head **pbh, int check)
@@ -156,7 +151,7 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
sr = (struct nilfs_super_root *)bh_sr->b_data;
if (check) {
- unsigned bytes = le16_to_cpu(sr->sr_bytes);
+ unsigned int bytes = le16_to_cpu(sr->sr_bytes);
if (bytes == 0 || bytes > nilfs->ns_blocksize) {
ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
@@ -180,7 +175,7 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
brelse(bh_sr);
failed:
- return nilfs_warn_segment_error(ret);
+ return nilfs_warn_segment_error(nilfs->ns_sb, ret);
}
/**
@@ -188,6 +183,8 @@ int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
* @nilfs: nilfs object
* @start_blocknr: start block number of the log
* @sum: pointer to return segment summary structure
+ *
+ * Return: Buffer head pointer, or NULL if an I/O error occurs.
*/
static struct buffer_head *
nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
@@ -207,6 +204,13 @@ nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
* @seg_seq: sequence number of segment
* @bh_sum: buffer head of summary block
* @sum: segment summary struct
+ *
+ * Return: 0 on success, or one of the following internal codes on failure:
+ * * %NILFS_SEG_FAIL_MAGIC - Magic number mismatch.
+ * * %NILFS_SEG_FAIL_SEQ - Sequence number mismatch.
+ * * %NIFLS_SEG_FAIL_CONSISTENCY - Block count out of range.
+ * * %NILFS_SEG_FAIL_IO - I/O error.
+ * * %NILFS_SEG_FAIL_CHECKSUM_FULL - Full log checksum verification failed.
*/
static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq,
struct buffer_head *bh_sum,
@@ -250,6 +254,9 @@ out:
* @pbh: the current buffer head on summary blocks [in, out]
* @offset: the current byte offset on summary blocks [in, out]
* @bytes: byte size of the item to be read
+ *
+ * Return: Kernel space address of current segment summary entry, or
+ * NULL if an I/O error occurs.
*/
static void *nilfs_read_summary_info(struct the_nilfs *nilfs,
struct buffer_head **pbh,
@@ -312,6 +319,11 @@ static void nilfs_skip_summary_info(struct the_nilfs *nilfs,
* @start_blocknr: start block number of the log
* @sum: log summary information
* @head: list head to add nilfs_recovery_block struct
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
struct nilfs_segment_summary *sum,
@@ -445,8 +457,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
* The next segment is invalidated by this recovery.
*/
err = nilfs_sufile_free(sufile, segnum[1]);
- if (unlikely(err))
+ if (unlikely(err)) {
+ if (err == -ENOENT) {
+ nilfs_err(sb,
+ "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated",
+ (unsigned long long)nilfs->ns_last_pseg,
+ (unsigned long long)nilfs->ns_segnum,
+ (unsigned long long)segnum[1]);
+ err = -EINVAL;
+ }
goto failed;
+ }
for (i = 1; i < 4; i++) {
err = nilfs_segment_list_add(head, segnum[i]);
@@ -484,18 +505,16 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct folio *folio)
{
struct buffer_head *bh_org;
- void *kaddr;
+ size_t from = offset_in_folio(folio, pos);
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
if (unlikely(!bh_org))
return -EIO;
- kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
- kunmap_atomic(kaddr);
+ memcpy_to_folio(folio, from, bh_org->b_data, bh_org->b_size);
brelse(bh_org);
return 0;
}
@@ -508,8 +527,8 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
{
struct inode *inode;
struct nilfs_recovery_block *rb, *n;
- unsigned blocksize = nilfs->ns_blocksize;
- struct page *page;
+ unsigned int blocksize = nilfs->ns_blocksize;
+ struct folio *folio;
loff_t pos;
int err = 0, err2 = 0;
@@ -523,42 +542,41 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
pos = rb->blkoff << inode->i_blkbits;
err = block_write_begin(inode->i_mapping, pos, blocksize,
- 0, &page, nilfs_get_block);
+ &folio, nilfs_get_block);
if (unlikely(err)) {
loff_t isize = inode->i_size;
+
if (pos + blocksize > isize)
nilfs_write_failed(inode->i_mapping,
pos + blocksize);
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, folio);
if (unlikely(err))
- goto failed_page;
+ goto failed_folio;
err = nilfs_set_file_dirty(inode, 1);
if (unlikely(err))
- goto failed_page;
+ goto failed_folio;
- block_write_end(NULL, inode->i_mapping, pos, blocksize,
- blocksize, page, NULL);
+ block_write_end(pos, blocksize, blocksize, folio);
- unlock_page(page);
- page_cache_release(page);
+ folio_unlock(folio);
+ folio_put(folio);
(*nr_salvaged_blocks)++;
goto next;
- failed_page:
- unlock_page(page);
- page_cache_release(page);
+ failed_folio:
+ folio_unlock(folio);
+ folio_put(folio);
failed_inode:
- printk(KERN_WARNING
- "NILFS warning: error recovering data block "
- "(err=%d, ino=%lu, block-offset=%llu)\n",
- err, (unsigned long)rb->ino,
- (unsigned long long)rb->blkoff);
+ nilfs_warn(sb,
+ "error %d recovering data block (ino=%lu, block-offset=%llu)",
+ err, (unsigned long)rb->ino,
+ (unsigned long long)rb->blkoff);
if (!err2)
err2 = err;
next:
@@ -574,7 +592,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
* checkpoint
* @nilfs: nilfs object
* @sb: super block instance
+ * @root: NILFS root instance
* @ri: pointer to a nilfs_recovery_info
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Log format error.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
struct super_block *sb,
@@ -582,7 +607,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
struct nilfs_recovery_info *ri)
{
struct buffer_head *bh_sum = NULL;
- struct nilfs_segment_summary *sum;
+ struct nilfs_segment_summary *sum = NULL;
sector_t pseg_start;
sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */
unsigned long nsalvaged_blocks = 0;
@@ -638,7 +663,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
!(flags & NILFS_SS_SYNDT))
goto try_next_pseg;
state = RF_DSYNC_ST;
- /* Fall through */
+ fallthrough;
case RF_DSYNC_ST:
if (!(flags & NILFS_SS_SYNDT))
goto confused;
@@ -681,8 +706,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
}
if (nsalvaged_blocks) {
- printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
- sb->s_id, nsalvaged_blocks);
+ nilfs_info(sb, "salvaged %lu blocks", nsalvaged_blocks);
ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
}
out:
@@ -693,10 +717,9 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
confused:
err = -EINVAL;
failed:
- printk(KERN_ERR
- "NILFS (device %s): Error roll-forwarding "
- "(err=%d, pseg block=%llu). ",
- sb->s_id, err, (unsigned long long)pseg_start);
+ nilfs_err(sb,
+ "error %d roll-forwarding partial segment at blocknr = %llu",
+ err, (unsigned long long)pseg_start);
goto out;
}
@@ -711,35 +734,62 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
return;
bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize);
- BUG_ON(!bh);
+ if (WARN_ON(!bh))
+ return; /* should never happen */
+
+ lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
+ set_buffer_uptodate(bh);
set_buffer_dirty(bh);
+ unlock_buffer(bh);
+
err = sync_dirty_buffer(bh);
if (unlikely(err))
- printk(KERN_WARNING
- "NILFS warning: buffer sync write failed during "
- "post-cleaning of recovery.\n");
+ nilfs_warn(nilfs->ns_sb,
+ "buffer sync write failed during post-cleaning of recovery.");
brelse(bh);
}
/**
+ * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery
+ * @nilfs: nilfs object
+ */
+static void nilfs_abort_roll_forward(struct the_nilfs *nilfs)
+{
+ struct nilfs_inode_info *ii, *n;
+ LIST_HEAD(head);
+
+ /* Abandon inodes that have read recovery data */
+ spin_lock(&nilfs->ns_inode_lock);
+ list_splice_init(&nilfs->ns_dirty_files, &head);
+ spin_unlock(&nilfs->ns_inode_lock);
+ if (list_empty(&head))
+ return;
+
+ set_nilfs_purging(nilfs);
+ list_for_each_entry_safe(ii, n, &head, i_dirty) {
+ spin_lock(&nilfs->ns_inode_lock);
+ list_del_init(&ii->i_dirty);
+ spin_unlock(&nilfs->ns_inode_lock);
+
+ iput(&ii->vfs_inode);
+ }
+ clear_nilfs_purging(nilfs);
+}
+
+/**
* nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
* @nilfs: nilfs object
* @sb: super block instance
* @ri: pointer to a nilfs_recovery_info struct to store search results.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-EINVAL - Inconsistent filesystem state.
- *
- * %-EIO - I/O error
- *
- * %-ENOSPC - No space left on device (only in a panic state).
- *
- * %-ERESTARTSYS - Interrupted.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Inconsistent filesystem state.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
*/
int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
struct super_block *sb,
@@ -753,8 +803,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root);
if (unlikely(err)) {
- printk(KERN_ERR
- "NILFS: error loading the latest checkpoint.\n");
+ nilfs_err(sb, "error %d loading the latest checkpoint", err);
return err;
}
@@ -765,8 +814,8 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri);
if (unlikely(err)) {
- printk(KERN_ERR "NILFS: Error preparing segments for "
- "recovery.\n");
+ nilfs_err(sb, "error %d preparing segment for recovery",
+ err);
goto failed;
}
@@ -779,17 +828,21 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
nilfs_detach_log_writer(sb);
if (unlikely(err)) {
- printk(KERN_ERR "NILFS: Oops! recovery failed. "
- "(err=%d)\n", err);
- goto failed;
+ nilfs_err(sb, "error %d writing segment for recovery",
+ err);
+ goto put_root;
}
nilfs_finish_roll_forward(nilfs, ri);
}
- failed:
+put_root:
nilfs_put_root(root);
return err;
+
+failed:
+ nilfs_abort_roll_forward(nilfs);
+ goto put_root;
}
/**
@@ -801,20 +854,17 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
* segment pointed by the superblock. It sets up struct the_nilfs through
* this search. It fills nilfs_recovery_info (ri) required for recovery.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-EINVAL - No valid segment found
- *
- * %-EIO - I/O error
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - No valid segment found.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_search_super_root(struct the_nilfs *nilfs,
struct nilfs_recovery_info *ri)
{
struct buffer_head *bh_sum = NULL;
- struct nilfs_segment_summary *sum;
+ struct nilfs_segment_summary *sum = NULL;
sector_t pseg_start, pseg_end, sr_pseg_start = 0;
sector_t seg_start, seg_end; /* range of full segment (block number) */
sector_t b, end;
@@ -872,9 +922,11 @@ int nilfs_search_super_root(struct the_nilfs *nilfs,
flags = le16_to_cpu(sum->ss_flags);
if (!(flags & NILFS_SS_SR) && !scan_newer) {
- /* This will never happen because a superblock
- (last_segment) always points to a pseg
- having a super root. */
+ /*
+ * This will never happen because a superblock
+ * (last_segment) always points to a pseg with
+ * a super root.
+ */
ret = NILFS_SEG_FAIL_CONSISTENCY;
goto failed;
}
@@ -960,5 +1012,5 @@ int nilfs_search_super_root(struct the_nilfs *nilfs,
failed:
brelse(bh_sum);
nilfs_dispose_segment_list(&segments);
- return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
+ return ret < 0 ? ret : nilfs_warn_segment_error(nilfs->ns_sb, ret);
}
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc9a913784ab..a8bdf3d318ea 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * segbuf.c - NILFS segment buffer
+ * NILFS segment buffer
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*
*/
@@ -114,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
if (unlikely(!bh))
return -ENOMEM;
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ memset(bh->b_data, 0, bh->b_size);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
nilfs_segbuf_add_segsum_buffer(segbuf, bh);
return 0;
}
@@ -133,8 +126,8 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
return 0;
}
-int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
- time_t ctime, __u64 cno)
+int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned int flags,
+ time64_t ctime, __u64 cno)
{
int err;
@@ -212,7 +205,6 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
{
struct buffer_head *bh;
struct nilfs_segment_summary *raw_sum;
- void *kaddr;
u32 crc;
bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head,
@@ -227,9 +219,13 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
crc = crc32_le(crc, bh->b_data, bh->b_size);
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
- kaddr = kmap_atomic(bh->b_page);
- crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
- kunmap_atomic(kaddr);
+ size_t offset = offset_in_folio(bh->b_folio, bh->b_data);
+ unsigned char *from;
+
+ /* Do not support block sizes larger than PAGE_SIZE */
+ from = kmap_local_folio(bh->b_folio, offset);
+ crc = crc32_le(crc, from, bh->b_size);
+ kunmap_local(from);
}
raw_sum->ss_datasum = cpu_to_le32(crc);
}
@@ -240,7 +236,7 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
{
struct nilfs_super_root *raw_sr;
struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
- unsigned srsize;
+ unsigned int srsize;
u32 crc;
raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
@@ -338,18 +334,11 @@ void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed)
/*
* BIO operations
*/
-static void nilfs_end_bio_write(struct bio *bio, int err)
+static void nilfs_end_bio_write(struct bio *bio)
{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct nilfs_segment_buffer *segbuf = bio->bi_private;
- if (err == -EOPNOTSUPP) {
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- bio_put(bio);
- /* to be detected by submit_seg_bio() */
- }
-
- if (!uptodate)
+ if (bio->bi_status)
atomic_inc(&segbuf->sb_err);
bio_put(bio);
@@ -357,69 +346,20 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
}
static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
- struct nilfs_write_info *wi, int mode)
+ struct nilfs_write_info *wi)
{
struct bio *bio = wi->bio;
- int err;
-
- if (segbuf->sb_nbio > 0 &&
- bdi_write_congested(segbuf->sb_super->s_bdi)) {
- wait_for_completion(&segbuf->sb_bio_event);
- segbuf->sb_nbio--;
- if (unlikely(atomic_read(&segbuf->sb_err))) {
- bio_put(bio);
- err = -EIO;
- goto failed;
- }
- }
bio->bi_end_io = nilfs_end_bio_write;
bio->bi_private = segbuf;
- bio_get(bio);
- submit_bio(mode, bio);
- if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
- bio_put(bio);
- err = -EOPNOTSUPP;
- goto failed;
- }
+ submit_bio(bio);
segbuf->sb_nbio++;
- bio_put(bio);
wi->bio = NULL;
wi->rest_blocks -= wi->end - wi->start;
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
wi->start = wi->end;
return 0;
-
- failed:
- wi->bio = NULL;
- return err;
-}
-
-/**
- * nilfs_alloc_seg_bio - allocate a new bio for writing log
- * @nilfs: nilfs object
- * @start: start block number of the bio
- * @nr_vecs: request size of page vector.
- *
- * Return Value: On success, pointer to the struct bio is returned.
- * On error, NULL is returned.
- */
-static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
- int nr_vecs)
-{
- struct bio *bio;
-
- bio = bio_alloc(GFP_NOIO, nr_vecs);
- if (bio == NULL) {
- while (!bio && (nr_vecs >>= 1))
- bio = bio_alloc(GFP_NOIO, nr_vecs);
- }
- if (likely(bio)) {
- bio->bi_bdev = nilfs->ns_bdev;
- bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9);
- }
- return bio;
}
static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
@@ -427,7 +367,7 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
{
wi->bio = NULL;
wi->rest_blocks = segbuf->sb_sum.nblocks;
- wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev);
+ wi->max_pages = BIO_MAX_VECS;
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
wi->start = wi->end = 0;
wi->blocknr = segbuf->sb_pseg_start;
@@ -435,26 +375,26 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
struct nilfs_write_info *wi,
- struct buffer_head *bh, int mode)
+ struct buffer_head *bh)
{
- int len, err;
+ int err;
BUG_ON(wi->nr_vecs <= 0);
repeat:
if (!wi->bio) {
- wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end,
- wi->nr_vecs);
- if (unlikely(!wi->bio))
- return -ENOMEM;
+ wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs,
+ REQ_OP_WRITE, GFP_NOIO);
+ wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) <<
+ (wi->nilfs->ns_blocksize_bits - 9);
}
- len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh));
- if (len == bh->b_size) {
+ if (bio_add_folio(wi->bio, bh->b_folio, bh->b_size,
+ offset_in_folio(bh->b_folio, bh->b_data))) {
wi->end++;
return 0;
}
/* bio is FULL */
- err = nilfs_segbuf_submit_bio(segbuf, wi, mode);
+ err = nilfs_segbuf_submit_bio(segbuf, wi);
/* never submit current bh */
if (likely(!err))
goto repeat;
@@ -466,31 +406,26 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
* @segbuf: buffer storing a log to be written
* @nilfs: nilfs object
*
- * Return Value: On Success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: Always 0.
*/
static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
struct the_nilfs *nilfs)
{
struct nilfs_write_info wi;
struct buffer_head *bh;
- int res = 0, rw = WRITE;
+ int res = 0;
wi.nilfs = nilfs;
nilfs_segbuf_prepare_write(segbuf, &wi);
list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
- res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh);
if (unlikely(res))
goto failed_bio;
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
- res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh);
if (unlikely(res))
goto failed_bio;
}
@@ -500,8 +435,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
* Last BIO is always sent through the following
* submission.
*/
- rw |= REQ_SYNC;
- res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
+ wi.bio->bi_opf |= REQ_SYNC;
+ res = nilfs_segbuf_submit_bio(segbuf, &wi);
}
failed_bio:
@@ -512,10 +447,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
* nilfs_segbuf_wait - wait for completion of requested BIOs
* @segbuf: segment buffer
*
- * Return Value: On Success, 0 is returned. On Error, one of the following
- * negative error code is returned.
- *
- * %-EIO - I/O error
+ * Return: 0 on success, or %-EIO if I/O error is detected.
*/
static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf)
{
@@ -529,7 +461,11 @@ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf)
} while (--segbuf->sb_nbio > 0);
if (unlikely(atomic_read(&segbuf->sb_err) > 0)) {
- printk(KERN_ERR "NILFS: IO error writing segment\n");
+ nilfs_err(segbuf->sb_super,
+ "I/O error writing log (start-blocknr=%llu, block-count=%lu) in segment %llu",
+ (unsigned long long)segbuf->sb_pseg_start,
+ segbuf->sb_sum.nblocks,
+ (unsigned long long)segbuf->sb_segnum);
err = -EIO;
}
return err;
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index b04f08cc2397..e20091ededba 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * segbuf.h - NILFS Segment buffer prototypes and definitions
+ * NILFS Segment buffer prototypes and definitions
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*
*/
#ifndef _NILFS_SEGBUF_H
@@ -50,7 +37,7 @@ struct nilfs_segsum_info {
unsigned long nfileblk;
u64 seg_seq;
__u64 cno;
- time_t ctime;
+ time64_t ctime;
sector_t next;
};
@@ -82,7 +69,7 @@ struct nilfs_segment_buffer {
__u64 sb_nextnum;
sector_t sb_fseg_start, sb_fseg_end;
sector_t sb_pseg_start;
- unsigned sb_rest_blocks;
+ unsigned int sb_rest_blocks;
/* Buffers */
struct list_head sb_segsum_buffers;
@@ -124,7 +111,8 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
struct nilfs_segment_buffer *prev);
void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
struct the_nilfs *);
-int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t, __u64);
+int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time64_t,
+ __u64);
int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
struct buffer_head **);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bd88a7461063..deee16bc9d4e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1,29 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * segment.c - NILFS segment constructor.
+ * NILFS segment constructor.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*
*/
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bitops.h>
#include <linux/bio.h>
#include <linux/completion.h>
#include <linux/blkdev.h>
@@ -33,6 +21,8 @@
#include <linux/crc32.h>
#include <linux/pagevec.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
+
#include "nilfs.h"
#include "btnode.h"
#include "page.h"
@@ -48,18 +38,26 @@
*/
#define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */
-#define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments
- appended in collection retry loop */
+#define SC_MAX_SEGDELTA 64 /*
+ * Upper limit of the number of segments
+ * appended in collection retry loop
+ */
/* Construction mode */
enum {
SC_LSEG_SR = 1, /* Make a logical segment having a super root */
- SC_LSEG_DSYNC, /* Flush data blocks of a given file and make
- a logical segment without a super root */
- SC_FLUSH_FILE, /* Flush data files, leads to segment writes without
- creating a checkpoint */
- SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without
- a checkpoint */
+ SC_LSEG_DSYNC, /*
+ * Flush data blocks of a given file and make
+ * a logical segment without a super root.
+ */
+ SC_FLUSH_FILE, /*
+ * Flush data files, leads to segment writes without
+ * creating a checkpoint.
+ */
+ SC_FLUSH_DAT, /*
+ * Flush DAT file. This also creates segments
+ * without a checkpoint.
+ */
};
/* Stage numbers of dirty block collection */
@@ -76,6 +74,36 @@ enum {
NILFS_ST_DONE,
};
+#define CREATE_TRACE_POINTS
+#include <trace/events/nilfs2.h>
+
+/*
+ * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are
+ * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of
+ * the variable must use them because transition of stage count must involve
+ * trace events (trace_nilfs2_collection_stage_transition).
+ *
+ * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't
+ * produce tracepoint events. It is provided just for making the intention
+ * clear.
+ */
+static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci)
+{
+ sci->sc_stage.scnt++;
+ trace_nilfs2_collection_stage_transition(sci);
+}
+
+static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt)
+{
+ sci->sc_stage.scnt = next_scnt;
+ trace_nilfs2_collection_stage_transition(sci);
+}
+
+static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci)
+{
+ return sci->sc_stage.scnt;
+}
+
/* State flags of collection */
#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */
#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */
@@ -106,16 +134,12 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
-#define nilfs_cnt32_gt(a, b) \
- (typecheck(__u32, a) && typecheck(__u32, b) && \
- ((__s32)(b) - (__s32)(a) < 0))
#define nilfs_cnt32_ge(a, b) \
(typecheck(__u32, a) && typecheck(__u32, b) && \
- ((__s32)(a) - (__s32)(b) >= 0))
-#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a)
-#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a)
+ ((__s32)((a) - (b)) >= 0))
-static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
+static int nilfs_prepare_segment_lock(struct super_block *sb,
+ struct nilfs_transaction_info *ti)
{
struct nilfs_transaction_info *cur_ti = current->journal_info;
void *save = NULL;
@@ -123,17 +147,14 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
if (cur_ti) {
if (cur_ti->ti_magic == NILFS_TI_MAGIC)
return ++cur_ti->ti_count;
- else {
- /*
- * If journal_info field is occupied by other FS,
- * it is saved and will be restored on
- * nilfs_transaction_commit().
- */
- printk(KERN_WARNING
- "NILFS warning: journal info from a different "
- "FS\n");
- save = current->journal_info;
- }
+
+ /*
+ * If journal_info field is occupied by other FS,
+ * it is saved and will be restored on
+ * nilfs_transaction_commit().
+ */
+ nilfs_warn(sb, "journal info from a different FS");
+ save = current->journal_info;
}
if (!ti) {
ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
@@ -170,24 +191,29 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
* When @vacancy_check flag is set, this function will check the amount of
* free space, and will wait for the GC to reclaim disk space if low capacity.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
- *
- * %-ENOSPC - No space left on device
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (if checking free space).
*/
int nilfs_transaction_begin(struct super_block *sb,
struct nilfs_transaction_info *ti,
int vacancy_check)
{
struct the_nilfs *nilfs;
- int ret = nilfs_prepare_segment_lock(ti);
+ int ret = nilfs_prepare_segment_lock(sb, ti);
+ struct nilfs_transaction_info *trace_ti;
if (unlikely(ret < 0))
return ret;
- if (ret > 0)
+ if (ret > 0) {
+ trace_ti = current->journal_info;
+
+ trace_nilfs2_transaction_transition(sb, trace_ti,
+ trace_ti->ti_count, trace_ti->ti_flags,
+ TRACE_NILFS2_TRANSACTION_BEGIN);
return 0;
+ }
sb_start_intwrite(sb);
@@ -198,6 +224,11 @@ int nilfs_transaction_begin(struct super_block *sb,
ret = -ENOSPC;
goto failed;
}
+
+ trace_ti = current->journal_info;
+ trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count,
+ trace_ti->ti_flags,
+ TRACE_NILFS2_TRANSACTION_BEGIN);
return 0;
failed:
@@ -219,6 +250,8 @@ int nilfs_transaction_begin(struct super_block *sb,
* nilfs_transaction_commit() sets a timer to start the segment
* constructor. If a sync flag is set, it starts construction
* directly.
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_transaction_commit(struct super_block *sb)
{
@@ -230,6 +263,8 @@ int nilfs_transaction_commit(struct super_block *sb)
ti->ti_flags |= NILFS_TI_COMMIT;
if (ti->ti_count > 0) {
ti->ti_count--;
+ trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+ ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
return 0;
}
if (nilfs->ns_writer) {
@@ -241,6 +276,9 @@ int nilfs_transaction_commit(struct super_block *sb)
nilfs_segctor_do_flush(sci, 0);
}
up_read(&nilfs->ns_segctor_sem);
+ trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+ ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
+
current->journal_info = ti->ti_save;
if (ti->ti_flags & NILFS_TI_SYNC)
@@ -259,10 +297,15 @@ void nilfs_transaction_abort(struct super_block *sb)
BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
if (ti->ti_count > 0) {
ti->ti_count--;
+ trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+ ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
return;
}
up_read(&nilfs->ns_segctor_sem);
+ trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+ ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
+
current->journal_info = ti->ti_save;
if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
kmem_cache_free(nilfs_transaction_cachep, ti);
@@ -274,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb)
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_sc_info *sci = nilfs->ns_writer;
- if (!sci || !sci->sc_flush_request)
+ if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request)
return;
set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
@@ -305,10 +348,12 @@ static void nilfs_transaction_lock(struct super_block *sb,
ti->ti_count = 0;
ti->ti_save = cur_ti;
ti->ti_magic = NILFS_TI_MAGIC;
- INIT_LIST_HEAD(&ti->ti_garbage);
current->journal_info = ti;
for (;;) {
+ trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+ ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK);
+
down_write(&nilfs->ns_segctor_sem);
if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
break;
@@ -316,10 +361,13 @@ static void nilfs_transaction_lock(struct super_block *sb,
nilfs_segctor_do_immediate_flush(sci);
up_write(&nilfs->ns_segctor_sem);
- yield();
+ cond_resched();
}
if (gcflag)
ti->ti_flags |= NILFS_TI_GC;
+
+ trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+ ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK);
}
static void nilfs_transaction_unlock(struct super_block *sb)
@@ -332,16 +380,17 @@ static void nilfs_transaction_unlock(struct super_block *sb)
up_write(&nilfs->ns_segctor_sem);
current->journal_info = ti->ti_save;
- if (!list_empty(&ti->ti_garbage))
- nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
+
+ trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+ ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK);
}
static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
struct nilfs_segsum_pointer *ssp,
- unsigned bytes)
+ unsigned int bytes)
{
struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
- unsigned blocksize = sci->sc_super->s_blocksize;
+ unsigned int blocksize = sci->sc_super->s_blocksize;
void *p;
if (unlikely(ssp->offset + bytes > blocksize)) {
@@ -358,13 +407,15 @@ static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
/**
* nilfs_segctor_reset_segment_buffer - reset the current segment buffer
* @sci: nilfs_sc_info
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
{
struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
struct buffer_head *sumbh;
- unsigned sumbytes;
- unsigned flags = 0;
+ unsigned int sumbytes;
+ unsigned int flags = 0;
int err;
if (nilfs_doing_gc())
@@ -381,12 +432,32 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
return 0;
}
+/**
+ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
+ * @sci: segment constructor object
+ *
+ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
+ * the current segment summary block.
+ */
+static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
+{
+ struct nilfs_segsum_pointer *ssp;
+
+ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
+ if (ssp->offset < ssp->bh->b_size)
+ memset(ssp->bh->b_data + ssp->offset, 0,
+ ssp->bh->b_size - ssp->offset);
+}
+
static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{
sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
- return -E2BIG; /* The current segment is filled up
- (internal code) */
+ return -E2BIG; /*
+ * The current segment is filled up
+ * (internal code)
+ */
+ nilfs_segctor_zeropad_segsum(sci);
sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
return nilfs_segctor_reset_segment_buffer(sci);
}
@@ -413,9 +484,9 @@ static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
*/
static int nilfs_segctor_segsum_block_required(
struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
- unsigned binfo_size)
+ unsigned int binfo_size)
{
- unsigned blocksize = sci->sc_super->s_blocksize;
+ unsigned int blocksize = sci->sc_super->s_blocksize;
/* Size of finfo and binfo is enough small against blocksize */
return ssp->offset + binfo_size +
@@ -450,7 +521,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
ii = NILFS_I(inode);
- if (test_bit(NILFS_I_GCINODE, &ii->i_state))
+ if (ii->i_type & NILFS_I_TYPE_GC)
cno = ii->i_cno;
else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
cno = 0;
@@ -474,7 +545,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
struct buffer_head *bh,
struct inode *inode,
- unsigned binfo_size)
+ unsigned int binfo_size)
{
struct nilfs_segment_buffer *segbuf;
int required, err = 0;
@@ -491,6 +562,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
goto retry;
}
if (unlikely(required)) {
+ nilfs_segctor_zeropad_segsum(sci);
err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err))
goto failed;
@@ -558,7 +630,7 @@ static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
*vblocknr = binfo->bi_v.bi_vblocknr;
}
-static struct nilfs_sc_operations nilfs_sc_file_ops = {
+static const struct nilfs_sc_operations nilfs_sc_file_ops = {
.collect_data = nilfs_collect_file_data,
.collect_node = nilfs_collect_file_node,
.collect_bmap = nilfs_collect_file_bmap,
@@ -607,7 +679,7 @@ static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
*binfo_dat = binfo->bi_dat;
}
-static struct nilfs_sc_operations nilfs_sc_dat_ops = {
+static const struct nilfs_sc_operations nilfs_sc_dat_ops = {
.collect_data = nilfs_collect_dat_data,
.collect_node = nilfs_collect_file_node,
.collect_bmap = nilfs_collect_dat_bmap,
@@ -615,7 +687,7 @@ static struct nilfs_sc_operations nilfs_sc_dat_ops = {
.write_node_binfo = nilfs_write_dat_node_binfo,
};
-static struct nilfs_sc_operations nilfs_sc_dsync_ops = {
+static const struct nilfs_sc_operations nilfs_sc_dsync_ops = {
.collect_data = nilfs_collect_file_data,
.collect_node = NULL,
.collect_bmap = NULL,
@@ -629,7 +701,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
loff_t start, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
- struct pagevec pvec;
+ struct folio_batch fbatch;
pgoff_t index = 0, last = ULONG_MAX;
size_t ndirties = 0;
int i;
@@ -643,41 +715,46 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
index = start >> PAGE_SHIFT;
last = end >> PAGE_SHIFT;
}
- pagevec_init(&pvec, 0);
+ folio_batch_init(&fbatch);
repeat:
if (unlikely(index > last) ||
- !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- min_t(pgoff_t, last - index,
- PAGEVEC_SIZE - 1) + 1))
+ !filemap_get_folios_tag(mapping, &index, last,
+ PAGECACHE_TAG_DIRTY, &fbatch))
return ndirties;
- for (i = 0; i < pagevec_count(&pvec); i++) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct buffer_head *bh, *head;
- struct page *page = pvec.pages[i];
-
- if (unlikely(page->index > last))
- break;
+ struct folio *folio = fbatch.folios[i];
- lock_page(page);
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
- unlock_page(page);
+ folio_lock(folio);
+ if (unlikely(folio->mapping != mapping)) {
+ /* Exclude folios removed from the address space */
+ folio_unlock(folio);
+ continue;
+ }
+ head = folio_buffers(folio);
+ if (!head)
+ head = create_empty_buffers(folio,
+ i_blocksize(inode), 0);
- bh = head = page_buffers(page);
+ bh = head;
do {
- if (!buffer_dirty(bh))
+ if (!buffer_dirty(bh) || buffer_async_write(bh))
continue;
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers, listp);
ndirties++;
if (unlikely(ndirties >= nlimit)) {
- pagevec_release(&pvec);
+ folio_unlock(folio);
+ folio_batch_release(&fbatch);
cond_resched();
return ndirties;
}
} while (bh = bh->b_this_page, bh != head);
+
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
goto repeat;
}
@@ -686,20 +763,23 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
struct list_head *listp)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
- struct address_space *mapping = &ii->i_btnode_cache;
- struct pagevec pvec;
+ struct inode *btnc_inode = ii->i_assoc_inode;
+ struct folio_batch fbatch;
struct buffer_head *bh, *head;
unsigned int i;
pgoff_t index = 0;
- pagevec_init(&pvec, 0);
+ if (!btnc_inode)
+ return;
+ folio_batch_init(&fbatch);
- while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
- PAGEVEC_SIZE)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- bh = head = page_buffers(pvec.pages[i]);
+ while (filemap_get_folios_tag(btnc_inode->i_mapping, &index,
+ (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ bh = head = folio_buffers(fbatch.folios[i]);
do {
- if (buffer_dirty(bh)) {
+ if (buffer_dirty(bh) &&
+ !buffer_async_write(bh)) {
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers,
listp);
@@ -707,7 +787,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
bh = bh->b_this_page;
} while (bh != head);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
@@ -717,7 +797,7 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs,
{
struct nilfs_inode_info *ii, *n;
struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
- unsigned nv = 0;
+ unsigned int nv = 0;
while (!list_empty(head)) {
spin_lock(&nilfs->ns_inode_lock);
@@ -745,6 +825,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs,
}
}
+static void nilfs_iput_work_func(struct work_struct *work)
+{
+ struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
+ sc_iput_work);
+ struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
+
+ nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
+}
+
static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
struct nilfs_root *root)
{
@@ -795,68 +884,6 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
nilfs_mdt_clear_dirty(nilfs->ns_dat);
}
-static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
-{
- struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- struct buffer_head *bh_cp;
- struct nilfs_checkpoint *raw_cp;
- int err;
-
- /* XXX: this interface will be changed */
- err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
- &raw_cp, &bh_cp);
- if (likely(!err)) {
- /* The following code is duplicated with cpfile. But, it is
- needed to collect the checkpoint even if it was not newly
- created */
- mark_buffer_dirty(bh_cp);
- nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
- nilfs_cpfile_put_checkpoint(
- nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
- } else
- WARN_ON(err == -EINVAL || err == -ENOENT);
-
- return err;
-}
-
-static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
-{
- struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- struct buffer_head *bh_cp;
- struct nilfs_checkpoint *raw_cp;
- int err;
-
- err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
- &raw_cp, &bh_cp);
- if (unlikely(err)) {
- WARN_ON(err == -EINVAL || err == -ENOENT);
- goto failed_ibh;
- }
- raw_cp->cp_snapshot_list.ssl_next = 0;
- raw_cp->cp_snapshot_list.ssl_prev = 0;
- raw_cp->cp_inodes_count =
- cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
- raw_cp->cp_blocks_count =
- cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
- raw_cp->cp_nblk_inc =
- cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
- raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
- raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
-
- if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
- nilfs_checkpoint_clear_minor(raw_cp);
- else
- nilfs_checkpoint_set_minor(raw_cp);
-
- nilfs_write_inode_common(sci->sc_root->ifile,
- &raw_cp->cp_ifile_inode, 1);
- nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
- return 0;
-
- failed_ibh:
- return err;
-}
-
static void nilfs_fill_in_file_bmap(struct inode *ifile,
struct nilfs_inode_info *ii)
@@ -870,7 +897,7 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile,
raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
ibh);
nilfs_bmap_write(ii->i_bmap, raw_inode);
- nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
+ nilfs_ifile_unmap_inode(raw_inode);
}
}
@@ -884,31 +911,64 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
}
}
+/**
+ * nilfs_write_root_mdt_inode - export root metadata inode information to
+ * the on-disk inode
+ * @inode: inode object of the root metadata file
+ * @raw_inode: on-disk inode
+ *
+ * nilfs_write_root_mdt_inode() writes inode information and bmap data of
+ * @inode to the inode area of the metadata file allocated on the super root
+ * block created to finalize the log. Since super root blocks are configured
+ * each time, this function zero-fills the unused area of @raw_inode.
+ */
+static void nilfs_write_root_mdt_inode(struct inode *inode,
+ struct nilfs_inode *raw_inode)
+{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+ nilfs_write_inode_common(inode, raw_inode);
+
+ /* zero-fill unused portion of raw_inode */
+ raw_inode->i_xattr = 0;
+ raw_inode->i_pad = 0;
+ memset((void *)raw_inode + sizeof(*raw_inode), 0,
+ nilfs->ns_inode_size - sizeof(*raw_inode));
+
+ nilfs_bmap_write(NILFS_I(inode)->i_bmap, raw_inode);
+}
+
static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
struct buffer_head *bh_sr;
struct nilfs_super_root *raw_sr;
- unsigned isz, srsz;
+ unsigned int isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
+
+ lock_buffer(bh_sr);
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
isz = nilfs->ns_inode_size;
srsz = NILFS_SR_BYTES(isz);
+ raw_sr->sr_sum = 0; /* Ensure initialization within this update */
raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
raw_sr->sr_flags = 0;
- nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
- NILFS_SR_DAT_OFFSET(isz), 1);
- nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
- NILFS_SR_CPFILE_OFFSET(isz), 1);
- nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
- NILFS_SR_SUFILE_OFFSET(isz), 1);
+ nilfs_write_root_mdt_inode(nilfs->ns_dat, (void *)raw_sr +
+ NILFS_SR_DAT_OFFSET(isz));
+ nilfs_write_root_mdt_inode(nilfs->ns_cpfile, (void *)raw_sr +
+ NILFS_SR_CPFILE_OFFSET(isz));
+ nilfs_write_root_mdt_inode(nilfs->ns_sufile, (void *)raw_sr +
+ NILFS_SR_SUFILE_OFFSET(isz));
+
memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
+ set_buffer_uptodate(bh_sr);
+ unlock_buffer(bh_sr);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -929,7 +989,7 @@ static void nilfs_drop_collected_inodes(struct list_head *head)
if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
continue;
- clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
+ clear_bit(NILFS_I_INODE_SYNC, &ii->i_state);
set_bit(NILFS_I_UPDATED, &ii->i_state);
}
}
@@ -974,7 +1034,7 @@ static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
struct inode *inode,
- struct nilfs_sc_operations *sc_ops)
+ const struct nilfs_sc_operations *sc_ops)
{
LIST_HEAD(data_buffers);
LIST_HEAD(node_buffers);
@@ -1046,15 +1106,68 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
return err;
}
+/**
+ * nilfs_free_segments - free the segments given by an array of segment numbers
+ * @nilfs: nilfs object
+ * @segnumv: array of segment numbers to be freed
+ * @nsegs: number of segments to be freed in @segnumv
+ *
+ * nilfs_free_segments() wraps nilfs_sufile_freev() and
+ * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file
+ * (sufile) to free all segments given by @segnumv and @nsegs at once. If
+ * it fails midway, it cancels the changes so that none of the segments are
+ * freed. If @nsegs is 0, this function does nothing.
+ *
+ * The freeing of segments is not finalized until the writing of a log with
+ * a super root block containing this sufile change is complete, and it can
+ * be canceled with nilfs_sufile_cancel_freev() until then.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid segment number.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ */
+static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv,
+ size_t nsegs)
+{
+ size_t ndone;
+ int ret;
+
+ if (!nsegs)
+ return 0;
+
+ ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone);
+ if (unlikely(ret)) {
+ nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone,
+ NULL);
+ /*
+ * If a segment usage of the segments to be freed is in a
+ * hole block, nilfs_sufile_freev() will return -ENOENT.
+ * In this case, -EINVAL should be returned to the caller
+ * since there is something wrong with the given segment
+ * number array. This error can only occur during GC, so
+ * there is no need to worry about it propagating to other
+ * callers (such as fsync).
+ */
+ if (ret == -ENOENT) {
+ nilfs_err(nilfs->ns_sb,
+ "The segment usage entry %llu to be freed is invalid (in a hole)",
+ (unsigned long long)segnumv[ndone]);
+ ret = -EINVAL;
+ }
+ }
+ return ret;
+}
+
static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
{
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
struct list_head *head;
struct nilfs_inode_info *ii;
- size_t ndone;
int err = 0;
- switch (sci->sc_stage.scnt) {
+ switch (nilfs_sc_cstage_get(sci)) {
case NILFS_ST_INIT:
/* Pre-processes */
sci->sc_stage.flags = 0;
@@ -1063,7 +1176,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
sci->sc_nblk_inc = 0;
sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
if (mode == SC_LSEG_DSYNC) {
- sci->sc_stage.scnt = NILFS_ST_DSYNC;
+ nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC);
goto dsync_mode;
}
}
@@ -1071,10 +1184,11 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
sci->sc_stage.dirty_file_ptr = NULL;
sci->sc_stage.gc_inode_ptr = NULL;
if (mode == SC_FLUSH_DAT) {
- sci->sc_stage.scnt = NILFS_ST_DAT;
+ nilfs_sc_cstage_set(sci, NILFS_ST_DAT);
goto dat_stage;
}
- sci->sc_stage.scnt++; /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_GC:
if (nilfs_doing_gc()) {
head = &sci->sc_gc_inodes;
@@ -1095,7 +1209,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
}
sci->sc_stage.gc_inode_ptr = NULL;
}
- sci->sc_stage.scnt++; /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_FILE:
head = &sci->sc_dirty_files;
ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
@@ -1117,45 +1232,44 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
}
sci->sc_stage.dirty_file_ptr = NULL;
if (mode == SC_FLUSH_FILE) {
- sci->sc_stage.scnt = NILFS_ST_DONE;
+ nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
return 0;
}
- sci->sc_stage.scnt++;
+ nilfs_sc_cstage_inc(sci);
sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
- /* Fall through */
+ fallthrough;
case NILFS_ST_IFILE:
err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile,
&nilfs_sc_file_ops);
if (unlikely(err))
break;
- sci->sc_stage.scnt++;
+ nilfs_sc_cstage_inc(sci);
/* Creating a checkpoint */
- err = nilfs_segctor_create_checkpoint(sci);
+ err = nilfs_cpfile_create_checkpoint(nilfs->ns_cpfile,
+ nilfs->ns_cno);
if (unlikely(err))
break;
- /* Fall through */
+ fallthrough;
case NILFS_ST_CPFILE:
err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
&nilfs_sc_file_ops);
if (unlikely(err))
break;
- sci->sc_stage.scnt++; /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_SUFILE:
- err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
- sci->sc_nfreesegs, &ndone);
- if (unlikely(err)) {
- nilfs_sufile_cancel_freev(nilfs->ns_sufile,
- sci->sc_freesegs, ndone,
- NULL);
+ err = nilfs_free_segments(nilfs, sci->sc_freesegs,
+ sci->sc_nfreesegs);
+ if (unlikely(err))
break;
- }
sci->sc_stage.flags |= NILFS_CF_SUFREED;
err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
&nilfs_sc_file_ops);
if (unlikely(err))
break;
- sci->sc_stage.scnt++; /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_DAT:
dat_stage:
err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
@@ -1163,10 +1277,11 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
if (unlikely(err))
break;
if (mode == SC_FLUSH_DAT) {
- sci->sc_stage.scnt = NILFS_ST_DONE;
+ nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
return 0;
}
- sci->sc_stage.scnt++; /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_SR:
if (mode == SC_LSEG_SR) {
/* Appending a super root */
@@ -1176,7 +1291,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
}
/* End of a logical segment */
sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
- sci->sc_stage.scnt = NILFS_ST_DONE;
+ nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
return 0;
case NILFS_ST_DSYNC:
dsync_mode:
@@ -1189,7 +1304,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
if (unlikely(err))
break;
sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
- sci->sc_stage.scnt = NILFS_ST_DONE;
+ nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
return 0;
case NILFS_ST_DONE:
return 0;
@@ -1205,6 +1320,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
* nilfs_segctor_begin_construction - setup segment buffer to make a new log
* @sci: nilfs_sc_info
* @nilfs: nilfs object
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
@@ -1337,8 +1454,10 @@ static void nilfs_free_incomplete_logs(struct list_head *logs,
if (atomic_read(&segbuf->sb_err)) {
/* Case 1: The first segment failed */
if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
- /* Case 1a: Partial segment appended into an existing
- segment */
+ /*
+ * Case 1a: Partial segment appended into an existing
+ * segment
+ */
nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
segbuf->sb_fseg_end);
else /* Case 1b: New full segment */
@@ -1434,25 +1553,29 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
goto failed;
/* The current segment is filled up */
- if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
+ if (mode != SC_LSEG_SR ||
+ nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE)
break;
nilfs_clear_logs(&sci->sc_segbufs);
- err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
- if (unlikely(err))
- return err;
-
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
sci->sc_freesegs,
sci->sc_nfreesegs,
NULL);
WARN_ON(err); /* do not happen */
+ sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
}
+
+ err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+ if (unlikely(err))
+ return err;
+
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
+ nilfs_segctor_zeropad_segsum(sci);
nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
return 0;
@@ -1478,7 +1601,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
sector_t blocknr;
unsigned long nfinfo = segbuf->sb_sum.nfinfo;
unsigned long nblocks = 0, ndatablk = 0;
- struct nilfs_sc_operations *sc_op = NULL;
+ const struct nilfs_sc_operations *sc_op = NULL;
struct nilfs_segsum_pointer ssp;
struct nilfs_finfo *finfo = NULL;
union nilfs_binfo binfo;
@@ -1503,7 +1626,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
nblocks = le32_to_cpu(finfo->fi_nblocks);
ndatablk = le32_to_cpu(finfo->fi_ndatablk);
- inode = bh->b_page->mapping->host;
+ inode = bh->b_folio->mapping->host;
if (mode == SC_LSEG_DSYNC)
sc_op = &nilfs_sc_dsync_ops;
@@ -1556,65 +1679,95 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
return 0;
}
-static void nilfs_begin_page_io(struct page *page)
+static void nilfs_begin_folio_io(struct folio *folio)
{
- if (!page || PageWriteback(page))
- /* For split b-tree node pages, this function may be called
- twice. We ignore the 2nd or later calls by this check. */
+ if (!folio || folio_test_writeback(folio))
+ /*
+ * For split b-tree node pages, this function may be called
+ * twice. We ignore the 2nd or later calls by this check.
+ */
return;
- lock_page(page);
- clear_page_dirty_for_io(page);
- set_page_writeback(page);
- unlock_page(page);
+ folio_lock(folio);
+ folio_clear_dirty_for_io(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
}
-static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
+/**
+ * nilfs_prepare_write_logs - prepare to write logs
+ * @logs: logs to prepare for writing
+ * @seed: checksum seed value
+ *
+ * nilfs_prepare_write_logs() adds checksums and prepares the block
+ * buffers/folios for writing logs. In order to stabilize folios of
+ * memory-mapped file blocks by putting them in writeback state before
+ * calculating the checksums, first prepare to write payload blocks other
+ * than segment summary and super root blocks in which the checksums will
+ * be embedded.
+ */
+static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed)
{
struct nilfs_segment_buffer *segbuf;
- struct page *bd_page = NULL, *fs_page = NULL;
-
- list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
- struct buffer_head *bh;
+ struct folio *bd_folio = NULL, *fs_folio = NULL;
+ struct buffer_head *bh;
- list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
+ /* Prepare to write payload blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
+ list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- if (bh->b_page != bd_page) {
- if (bd_page) {
- lock_page(bd_page);
- clear_page_dirty_for_io(bd_page);
- set_page_writeback(bd_page);
- unlock_page(bd_page);
- }
- bd_page = bh->b_page;
+ if (bh == segbuf->sb_super_root)
+ break;
+ set_buffer_async_write(bh);
+ if (bh->b_folio != fs_folio) {
+ nilfs_begin_folio_io(fs_folio);
+ fs_folio = bh->b_folio;
}
}
+ }
+ nilfs_begin_folio_io(fs_folio);
- list_for_each_entry(bh, &segbuf->sb_payload_buffers,
+ nilfs_add_checksums_on_logs(logs, seed);
+
+ /* Prepare to write segment summary blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
+ list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
- if (bh == segbuf->sb_super_root) {
- if (bh->b_page != bd_page) {
- lock_page(bd_page);
- clear_page_dirty_for_io(bd_page);
- set_page_writeback(bd_page);
- unlock_page(bd_page);
- bd_page = bh->b_page;
- }
- break;
- }
- if (bh->b_page != fs_page) {
- nilfs_begin_page_io(fs_page);
- fs_page = bh->b_page;
+ mark_buffer_dirty(bh);
+ if (bh->b_folio == bd_folio)
+ continue;
+ if (bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
}
+ bd_folio = bh->b_folio;
}
}
- if (bd_page) {
- lock_page(bd_page);
- clear_page_dirty_for_io(bd_page);
- set_page_writeback(bd_page);
- unlock_page(bd_page);
+
+ /* Prepare to write super root block */
+ bh = NILFS_LAST_SEGBUF(logs)->sb_super_root;
+ if (bh) {
+ mark_buffer_dirty(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+ bd_folio = bh->b_folio;
+ }
+ }
+
+ if (bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
}
- nilfs_begin_page_io(fs_page);
}
static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1627,17 +1780,18 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
return ret;
}
-static void nilfs_end_page_io(struct page *page, int err)
+static void nilfs_end_folio_io(struct folio *folio, int err)
{
- if (!page)
+ if (!folio)
return;
- if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
+ if (buffer_nilfs_node(folio_buffers(folio)) &&
+ !folio_test_writeback(folio)) {
/*
* For b-tree node pages, this function may be called twice
* or more because they might be split in a segment.
*/
- if (PageDirty(page)) {
+ if (folio_test_dirty(folio)) {
/*
* For pages holding split b-tree node buffers, dirty
* flag on the buffers may be cleared discretely.
@@ -1645,30 +1799,24 @@ static void nilfs_end_page_io(struct page *page, int err)
* remaining buffers, and it must be cancelled if
* all the buffers get cleaned later.
*/
- lock_page(page);
- if (nilfs_page_buffers_clean(page))
- __nilfs_clear_page_dirty(page);
- unlock_page(page);
+ folio_lock(folio);
+ if (nilfs_folio_buffers_clean(folio))
+ __nilfs_clear_folio_dirty(folio);
+ folio_unlock(folio);
}
return;
}
- if (!err) {
- if (!nilfs_page_buffers_clean(page))
- __set_page_dirty_nobuffers(page);
- ClearPageError(page);
- } else {
- __set_page_dirty_nobuffers(page);
- SetPageError(page);
- }
+ if (err || !nilfs_folio_buffers_clean(folio))
+ filemap_dirty_folio(folio->mapping, folio);
- end_page_writeback(page);
+ folio_end_writeback(folio);
}
static void nilfs_abort_logs(struct list_head *logs, int err)
{
struct nilfs_segment_buffer *segbuf;
- struct page *bd_page = NULL, *fs_page = NULL;
+ struct folio *bd_folio = NULL, *fs_folio = NULL;
struct buffer_head *bh;
if (list_empty(logs))
@@ -1677,32 +1825,35 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
- if (bh->b_page != bd_page) {
- if (bd_page)
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ clear_buffer_uptodate(bh);
+ if (bh->b_folio != bd_folio) {
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
if (bh == segbuf->sb_super_root) {
- if (bh->b_page != bd_page) {
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ clear_buffer_uptodate(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
break;
}
- if (bh->b_page != fs_page) {
- nilfs_end_page_io(fs_page, err);
- fs_page = bh->b_page;
+ clear_buffer_async_write(bh);
+ if (bh->b_folio != fs_folio) {
+ nilfs_end_folio_io(fs_folio, err);
+ fs_folio = bh->b_folio;
}
}
}
- if (bd_page)
- end_page_writeback(bd_page);
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
- nilfs_end_page_io(fs_page, err);
+ nilfs_end_folio_io(fs_folio, err);
}
static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
@@ -1716,6 +1867,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
+ if (list_empty(&logs))
+ return; /* if the first segment buffer preparation failed */
+
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
@@ -1744,7 +1898,7 @@ static void nilfs_set_next_segment(struct the_nilfs *nilfs,
static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
{
struct nilfs_segment_buffer *segbuf;
- struct page *bd_page = NULL, *fs_page = NULL;
+ struct folio *bd_folio = NULL, *fs_folio = NULL;
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
int update_sr = false;
@@ -1755,41 +1909,45 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
- if (bh->b_page != bd_page) {
- if (bd_page)
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ if (bh->b_folio != bd_folio) {
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
}
/*
- * We assume that the buffers which belong to the same page
+ * We assume that the buffers which belong to the same folio
* continue over the buffer list.
- * Under this assumption, the last BHs of pages is
- * identifiable by the discontinuity of bh->b_page
- * (page != fs_page).
+ * Under this assumption, the last BHs of folios is
+ * identifiable by the discontinuity of bh->b_folio
+ * (folio != fs_folio).
*
* For B-tree node blocks, however, this assumption is not
- * guaranteed. The cleanup code of B-tree node pages needs
+ * guaranteed. The cleanup code of B-tree node folios needs
* special care.
*/
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- clear_buffer_delay(bh);
- clear_buffer_nilfs_volatile(bh);
- clear_buffer_nilfs_redirected(bh);
+ const unsigned long set_bits = BIT(BH_Uptodate);
+ const unsigned long clear_bits =
+ (BIT(BH_Dirty) | BIT(BH_Async_Write) |
+ BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
+ BIT(BH_NILFS_Redirected));
+
if (bh == segbuf->sb_super_root) {
- if (bh->b_page != bd_page) {
- end_page_writeback(bd_page);
- bd_page = bh->b_page;
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_end_writeback(bd_folio);
+ bd_folio = bh->b_folio;
}
update_sr = true;
break;
}
- if (bh->b_page != fs_page) {
- nilfs_end_page_io(fs_page, 0);
- fs_page = bh->b_page;
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
+ if (bh->b_folio != fs_folio) {
+ nilfs_end_folio_io(fs_folio, 0);
+ fs_folio = bh->b_folio;
}
}
@@ -1803,13 +1961,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
}
}
/*
- * Since pages may continue over multiple segment buffers,
- * end of the last page must be checked outside of the loop.
+ * Since folios may continue over multiple segment buffers,
+ * end of the last folio must be checked outside of the loop.
*/
- if (bd_page)
- end_page_writeback(bd_page);
+ if (bd_folio)
+ folio_end_writeback(bd_folio);
- nilfs_end_page_io(fs_page, 0);
+ nilfs_end_folio_io(fs_folio, 0);
nilfs_drop_collected_inodes(&sci->sc_dirty_files);
@@ -1824,6 +1982,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
nilfs_set_next_segment(nilfs, segbuf);
if (update_sr) {
+ nilfs->ns_flushed_device = 0;
nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
@@ -1864,12 +2023,11 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
err = nilfs_ifile_get_inode_block(
ifile, ii->vfs_inode.i_ino, &ibh);
if (unlikely(err)) {
- nilfs_warning(sci->sc_super, __func__,
- "failed to get inode block.\n");
+ nilfs_warn(sci->sc_super,
+ "log writer: error %d getting inode block (ino=%lu)",
+ err, ii->vfs_inode.i_ino);
return err;
}
- mark_buffer_dirty(ibh);
- nilfs_mdt_mark_dirty(ifile);
spin_lock(&nilfs->ns_inode_lock);
if (likely(!ii->i_bh))
ii->i_bh = ibh;
@@ -1878,6 +2036,10 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
goto retry;
}
+ // Always redirty the buffer to avoid race condition
+ mark_buffer_dirty(ii->i_bh);
+ nilfs_mdt_mark_dirty(ifile);
+
clear_bit(NILFS_I_QUEUED, &ii->i_state);
set_bit(NILFS_I_BUSY, &ii->i_state);
list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
@@ -1890,8 +2052,9 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
- struct nilfs_transaction_info *ti = current->journal_info;
struct nilfs_inode_info *ii, *n;
+ int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE);
+ int defer_iput = false;
spin_lock(&nilfs->ns_inode_lock);
list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
@@ -1902,9 +2065,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
clear_bit(NILFS_I_BUSY, &ii->i_state);
brelse(ii->i_bh);
ii->i_bh = NULL;
- list_move_tail(&ii->i_dirty, &ti->ti_garbage);
+ list_del_init(&ii->i_dirty);
+ if (!ii->vfs_inode.i_nlink || during_mount) {
+ /*
+ * Defer calling iput() to avoid deadlocks if
+ * i_nlink == 0 or mount is not yet finished.
+ */
+ list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
+ defer_iput = true;
+ } else {
+ spin_unlock(&nilfs->ns_inode_lock);
+ iput(&ii->vfs_inode);
+ spin_lock(&nilfs->ns_inode_lock);
+ }
}
spin_unlock(&nilfs->ns_inode_lock);
+
+ if (defer_iput)
+ schedule_work(&sci->sc_iput_work);
}
/*
@@ -1915,7 +2093,10 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
int err;
- sci->sc_stage.scnt = NILFS_ST_INIT;
+ if (sb_rdonly(sci->sc_super))
+ return -EROFS;
+
+ nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
sci->sc_cno = nilfs->ns_cno;
err = nilfs_segctor_collect_dirty_files(sci, nilfs);
@@ -1933,17 +2114,17 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
err = nilfs_segctor_begin_construction(sci, nilfs);
if (unlikely(err))
- goto out;
+ goto failed;
/* Update time stamp */
- sci->sc_seg_ctime = get_seconds();
+ sci->sc_seg_ctime = ktime_get_real_seconds();
err = nilfs_segctor_collect(sci, nilfs, mode);
if (unlikely(err))
goto failed;
/* Avoid empty segment */
- if (sci->sc_stage.scnt == NILFS_ST_DONE &&
+ if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE &&
nilfs_segbuf_empty(sci->sc_curseg)) {
nilfs_segctor_abort_construction(sci, nilfs, 1);
goto out;
@@ -1957,8 +2138,12 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_fill_in_file_bmap(sci);
if (mode == SC_LSEG_SR &&
- sci->sc_stage.scnt >= NILFS_ST_CPFILE) {
- err = nilfs_segctor_fill_in_checkpoint(sci);
+ nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
+ err = nilfs_cpfile_finalize_checkpoint(
+ nilfs->ns_cpfile, nilfs->ns_cno, sci->sc_root,
+ sci->sc_nblk_inc + sci->sc_nblk_this_inc,
+ sci->sc_seg_ctime,
+ !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags));
if (unlikely(err))
goto failed_to_write;
@@ -1967,17 +2152,14 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
/* Write partial segments */
- nilfs_segctor_prepare_write(sci);
-
- nilfs_add_checksums_on_logs(&sci->sc_segbufs,
- nilfs->ns_crc_seed);
+ nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed);
err = nilfs_segctor_write(sci, nilfs);
if (unlikely(err))
goto failed_to_write;
- if (sci->sc_stage.scnt == NILFS_ST_DONE ||
- nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
+ if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
+ nilfs->ns_blocksize_bits != PAGE_SHIFT) {
/*
* At this point, we avoid double buffering
* for blocksize < pagesize because page dirty
@@ -1989,17 +2171,16 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
if (err)
goto failed_to_write;
}
- } while (sci->sc_stage.scnt != NILFS_ST_DONE);
+ } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE);
out:
nilfs_segctor_drop_written_files(sci, nilfs);
return err;
failed_to_write:
- if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
- nilfs_redirty_inodes(&sci->sc_dirty_files);
-
failed:
+ if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE)
+ nilfs_redirty_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
nilfs_redirty_inodes(&sci->sc_gc_inodes);
nilfs_segctor_abort_construction(sci, nilfs, err);
@@ -2018,8 +2199,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
spin_lock(&sci->sc_state_lock);
if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
- sci->sc_timer.expires = jiffies + sci->sc_interval;
- add_timer(&sci->sc_timer);
+ if (sci->sc_task) {
+ sci->sc_timer.expires = jiffies + sci->sc_interval;
+ add_timer(&sci->sc_timer);
+ }
sci->sc_state |= NILFS_SEGCTOR_COMMIT;
}
spin_unlock(&sci->sc_state_lock);
@@ -2028,34 +2211,18 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
{
spin_lock(&sci->sc_state_lock);
- if (!(sci->sc_flush_request & (1 << bn))) {
+ if (!(sci->sc_flush_request & BIT(bn))) {
unsigned long prev_req = sci->sc_flush_request;
- sci->sc_flush_request |= (1 << bn);
+ sci->sc_flush_request |= BIT(bn);
if (!prev_req)
wake_up(&sci->sc_wait_daemon);
}
spin_unlock(&sci->sc_state_lock);
}
-/**
- * nilfs_flush_segment - trigger a segment construction for resource control
- * @sb: super block
- * @ino: inode number of the file to be flushed out.
- */
-void nilfs_flush_segment(struct super_block *sb, ino_t ino)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- struct nilfs_sc_info *sci = nilfs->ns_writer;
-
- if (!sci || nilfs_doing_construction())
- return;
- nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
- /* assign bit 0 to data files */
-}
-
struct nilfs_segctor_wait_request {
- wait_queue_t wq;
+ wait_queue_entry_t wq;
__u32 seq;
int err;
atomic_t done;
@@ -2066,19 +2233,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
struct nilfs_segctor_wait_request wait_req;
int err = 0;
- spin_lock(&sci->sc_state_lock);
init_wait(&wait_req.wq);
wait_req.err = 0;
atomic_set(&wait_req.done, 0);
+ init_waitqueue_entry(&wait_req.wq, current);
+
+ /*
+ * To prevent a race issue where completion notifications from the
+ * log writer thread are missed, increment the request sequence count
+ * "sc_seq_request" and insert a wait queue entry using the current
+ * sequence number into the "sc_wait_request" queue at the same time
+ * within the lock section of "sc_state_lock".
+ */
+ spin_lock(&sci->sc_state_lock);
wait_req.seq = ++sci->sc_seq_request;
+ add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
spin_unlock(&sci->sc_state_lock);
- init_waitqueue_entry(&wait_req.wq, current);
- add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
- set_current_state(TASK_INTERRUPTIBLE);
wake_up(&sci->sc_wait_daemon);
for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /*
+ * Synchronize only while the log writer thread is alive.
+ * Leave flushing out after the log writer thread exits to
+ * the cleanup work in nilfs_segctor_destroy().
+ */
+ if (!sci->sc_task)
+ break;
+
if (atomic_read(&wait_req.done)) {
err = wait_req.err;
break;
@@ -2094,16 +2278,15 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
return err;
}
-static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
+static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force)
{
struct nilfs_segctor_wait_request *wrq, *n;
unsigned long flags;
spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
- list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
- wq.task_list) {
+ list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
if (!atomic_read(&wrq->done) &&
- nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
+ (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) {
wrq->err = err;
atomic_set(&wrq->done, 1);
}
@@ -2120,34 +2303,27 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
* nilfs_construct_segment - construct a logical segment
* @sb: super block
*
- * Return Value: On success, 0 is retured. On errors, one of the following
- * negative error code is returned.
- *
- * %-EROFS - Read only filesystem.
- *
- * %-EIO - I/O error
- *
- * %-ENOSPC - No space left on device (only in a panic state).
- *
- * %-ERESTARTSYS - Interrupted.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
+ * * %-EROFS - Read only filesystem.
*/
int nilfs_construct_segment(struct super_block *sb)
{
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_sc_info *sci = nilfs->ns_writer;
struct nilfs_transaction_info *ti;
- int err;
- if (!sci)
+ if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
/* A call inside transactions causes a deadlock. */
BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
- err = nilfs_segctor_sync(sci);
- return err;
+ return nilfs_segctor_sync(sci);
}
/**
@@ -2157,18 +2333,13 @@ int nilfs_construct_segment(struct super_block *sb)
* @start: start byte offset
* @end: end byte offset (inclusive)
*
- * Return Value: On success, 0 is retured. On errors, one of the following
- * negative error code is returned.
- *
- * %-EROFS - Read only filesystem.
- *
- * %-EIO - I/O error
- *
- * %-ENOSPC - No space left on device (only in a panic state).
- *
- * %-ERESTARTSYS - Interrupted.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No space left on device (only in a panic state).
+ * * %-ERESTARTSYS - Interrupted.
+ * * %-EROFS - Read only filesystem.
*/
int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
loff_t start, loff_t end)
@@ -2179,13 +2350,13 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
struct nilfs_transaction_info ti;
int err = 0;
- if (!sci)
+ if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
nilfs_transaction_lock(sb, &ti, 0);
ii = NILFS_I(inode);
- if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) ||
+ if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) ||
nilfs_test_opt(nilfs, STRICT_ORDER) ||
test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
nilfs_discontinued(nilfs)) {
@@ -2207,13 +2378,15 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
sci->sc_dsync_end = end;
err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
+ if (!err)
+ nilfs->ns_flushed_device = 0;
nilfs_transaction_unlock(sb);
return err;
}
#define FLUSH_FILE_BIT (0x1) /* data file only */
-#define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */
+#define FLUSH_DAT_BIT BIT(NILFS_DAT_INO) /* DAT only */
/**
* nilfs_segctor_accept - record accepted sequence count of log-write requests
@@ -2221,10 +2394,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
*/
static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
{
+ bool thread_is_alive;
+
spin_lock(&sci->sc_state_lock);
sci->sc_seq_accepted = sci->sc_seq_request;
+ thread_is_alive = (bool)sci->sc_task;
spin_unlock(&sci->sc_state_lock);
- del_timer_sync(&sci->sc_timer);
+
+ /*
+ * This function does not race with the log writer thread's
+ * termination. Therefore, deleting sc_timer, which should not be
+ * done after the log writer thread exits, can be done safely outside
+ * the area protected by sc_state_lock.
+ */
+ if (thread_is_alive)
+ timer_delete_sync(&sci->sc_timer);
}
/**
@@ -2241,7 +2425,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
if (mode == SC_LSEG_SR) {
sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
sci->sc_seq_done = sci->sc_seq_accepted;
- nilfs_segctor_wakeup(sci, err);
+ nilfs_segctor_wakeup(sci, err, false);
sci->sc_flush_request = 0;
} else {
if (mode == SC_FLUSH_FILE)
@@ -2250,7 +2434,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
sci->sc_flush_request &= ~FLUSH_DAT_BIT;
/* re-enable timer if checkpoint creation was not done */
- if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task &&
time_before(jiffies, sci->sc_timer.expires))
add_timer(&sci->sc_timer);
}
@@ -2261,6 +2445,8 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
* nilfs_segctor_construct - form logs and write them to disk
* @sci: segment constructor object
* @mode: mode of log forming
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
{
@@ -2297,10 +2483,11 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
return err;
}
-static void nilfs_construction_timeout(unsigned long data)
+static void nilfs_construction_timeout(struct timer_list *t)
{
- struct task_struct *p = (struct task_struct *)data;
- wake_up_process(p);
+ struct nilfs_sc_info *sci = timer_container_of(sci, t, sc_timer);
+
+ wake_up_process(sci->sc_task);
}
static void
@@ -2313,7 +2500,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
continue;
list_del_init(&ii->i_dirty);
truncate_inode_pages(&ii->vfs_inode.i_data, 0);
- nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
iput(&ii->vfs_inode);
}
}
@@ -2352,8 +2539,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
if (likely(!err))
break;
- nilfs_warning(sb, __func__,
- "segment construction failed. (err=%d)", err);
+ nilfs_warn(sb, "error %d cleaning segments", err);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(sci->sc_interval);
}
@@ -2361,9 +2547,9 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
sci->sc_nfreesegs);
if (ret) {
- printk(KERN_WARNING
- "NILFS warning: error %d on discard request, "
- "turning discards off for the device\n", ret);
+ nilfs_warn(sb,
+ "error %d on discard request, turning discards off for the device",
+ ret);
nilfs_clear_opt(nilfs, DISCARD);
}
}
@@ -2397,7 +2583,6 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
{
int mode = 0;
- int err;
spin_lock(&sci->sc_state_lock);
mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
@@ -2405,7 +2590,7 @@ static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
spin_unlock(&sci->sc_state_lock);
if (mode) {
- err = nilfs_segctor_do_construct(sci, mode);
+ nilfs_segctor_do_construct(sci, mode);
spin_lock(&sci->sc_state_lock);
sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
@@ -2428,123 +2613,85 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
}
/**
- * nilfs_segctor_thread - main loop of the segment constructor thread.
+ * nilfs_log_write_required - determine whether log writing is required
+ * @sci: nilfs_sc_info struct
+ * @modep: location for storing log writing mode
+ *
+ * Return: true if log writing is required, false otherwise. If log writing
+ * is required, the mode is stored in the location pointed to by @modep.
+ */
+static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep)
+{
+ bool timedout, ret = true;
+
+ spin_lock(&sci->sc_state_lock);
+ timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ time_after_eq(jiffies, sci->sc_timer.expires));
+ if (timedout || sci->sc_seq_request != sci->sc_seq_done)
+ *modep = SC_LSEG_SR;
+ else if (sci->sc_flush_request)
+ *modep = nilfs_segctor_flush_mode(sci);
+ else
+ ret = false;
+
+ spin_unlock(&sci->sc_state_lock);
+ return ret;
+}
+
+/**
+ * nilfs_segctor_thread - main loop of the log writer thread
* @arg: pointer to a struct nilfs_sc_info.
*
- * nilfs_segctor_thread() initializes a timer and serves as a daemon
- * to execute segment constructions.
+ * nilfs_segctor_thread() is the main loop function of the log writer kernel
+ * thread, which determines whether log writing is necessary, and if so,
+ * performs the log write in the background, or waits if not. It is also
+ * used to decide the background writeback of the superblock.
+ *
+ * Return: Always 0.
*/
static int nilfs_segctor_thread(void *arg)
{
struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- int timeout = 0;
- sci->sc_timer.data = (unsigned long)current;
- sci->sc_timer.function = nilfs_construction_timeout;
+ nilfs_info(sci->sc_super,
+ "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds",
+ sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
- /* start sync. */
- sci->sc_task = current;
- wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
- printk(KERN_INFO
- "segctord starting. Construction interval = %lu seconds, "
- "CP frequency < %lu seconds\n",
- sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
+ set_freezable();
- spin_lock(&sci->sc_state_lock);
- loop:
- for (;;) {
+ while (!kthread_should_stop()) {
+ DEFINE_WAIT(wait);
+ bool should_write;
int mode;
- if (sci->sc_state & NILFS_SEGCTOR_QUIT)
- goto end_thread;
-
- if (timeout || sci->sc_seq_request != sci->sc_seq_done)
- mode = SC_LSEG_SR;
- else if (!sci->sc_flush_request)
- break;
- else
- mode = nilfs_segctor_flush_mode(sci);
-
- spin_unlock(&sci->sc_state_lock);
- nilfs_segctor_thread_construct(sci, mode);
- spin_lock(&sci->sc_state_lock);
- timeout = 0;
- }
-
-
- if (freezing(current)) {
- spin_unlock(&sci->sc_state_lock);
- try_to_freeze();
- spin_lock(&sci->sc_state_lock);
- } else {
- DEFINE_WAIT(wait);
- int should_sleep = 1;
+ if (freezing(current)) {
+ try_to_freeze();
+ continue;
+ }
prepare_to_wait(&sci->sc_wait_daemon, &wait,
TASK_INTERRUPTIBLE);
-
- if (sci->sc_seq_request != sci->sc_seq_done)
- should_sleep = 0;
- else if (sci->sc_flush_request)
- should_sleep = 0;
- else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
- should_sleep = time_before(jiffies,
- sci->sc_timer.expires);
-
- if (should_sleep) {
- spin_unlock(&sci->sc_state_lock);
+ should_write = nilfs_log_write_required(sci, &mode);
+ if (!should_write)
schedule();
- spin_lock(&sci->sc_state_lock);
- }
finish_wait(&sci->sc_wait_daemon, &wait);
- timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
- time_after_eq(jiffies, sci->sc_timer.expires));
if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
set_nilfs_discontinued(nilfs);
- }
- goto loop;
- end_thread:
- spin_unlock(&sci->sc_state_lock);
+ if (should_write)
+ nilfs_segctor_thread_construct(sci, mode);
+ }
/* end sync. */
+ spin_lock(&sci->sc_state_lock);
sci->sc_task = NULL;
- wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
- return 0;
-}
-
-static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
-{
- struct task_struct *t;
-
- t = kthread_run(nilfs_segctor_thread, sci, "segctord");
- if (IS_ERR(t)) {
- int err = PTR_ERR(t);
-
- printk(KERN_ERR "NILFS: error %d creating segctord thread\n",
- err);
- return err;
- }
- wait_event(sci->sc_wait_task, sci->sc_task != NULL);
+ timer_shutdown_sync(&sci->sc_timer);
+ spin_unlock(&sci->sc_state_lock);
return 0;
}
-static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
- __acquires(&sci->sc_state_lock)
- __releases(&sci->sc_state_lock)
-{
- sci->sc_state |= NILFS_SEGCTOR_QUIT;
-
- while (sci->sc_task) {
- wake_up(&sci->sc_wait_daemon);
- spin_unlock(&sci->sc_state_lock);
- wait_event(sci->sc_wait_task, sci->sc_task == NULL);
- spin_lock(&sci->sc_state_lock);
- }
-}
-
/*
* Setup & clean-up functions
*/
@@ -2565,13 +2712,13 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
init_waitqueue_head(&sci->sc_wait_request);
init_waitqueue_head(&sci->sc_wait_daemon);
- init_waitqueue_head(&sci->sc_wait_task);
spin_lock_init(&sci->sc_state_lock);
INIT_LIST_HEAD(&sci->sc_dirty_files);
INIT_LIST_HEAD(&sci->sc_segbufs);
INIT_LIST_HEAD(&sci->sc_write_logs);
INIT_LIST_HEAD(&sci->sc_gc_inodes);
- init_timer(&sci->sc_timer);
+ INIT_LIST_HEAD(&sci->sc_iput_queue);
+ INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2588,8 +2735,10 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
{
int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
- /* The segctord thread was stopped and its timer was removed.
- But some tasks remain. */
+ /*
+ * The segctord thread was stopped and its timer was removed.
+ * But some tasks remain.
+ */
do {
struct nilfs_transaction_info ti;
@@ -2597,7 +2746,9 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
nilfs_transaction_unlock(sci->sc_super);
- } while (ret && retrycount-- > 0);
+ flush_work(&sci->sc_iput_work);
+
+ } while (ret && ret != -EROFS && retrycount-- > 0);
}
/**
@@ -2615,21 +2766,46 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
up_write(&nilfs->ns_segctor_sem);
+ if (sci->sc_task) {
+ wake_up(&sci->sc_wait_daemon);
+ if (kthread_stop(sci->sc_task)) {
+ spin_lock(&sci->sc_state_lock);
+ sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
+ spin_unlock(&sci->sc_state_lock);
+ }
+ }
+
spin_lock(&sci->sc_state_lock);
- nilfs_segctor_kill_thread(sci);
flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
|| sci->sc_seq_request != sci->sc_seq_done);
spin_unlock(&sci->sc_state_lock);
+ /*
+ * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can
+ * be called from delayed iput() via nilfs_evict_inode() and can race
+ * with the above log writer thread termination.
+ */
+ nilfs_segctor_wakeup(sci, 0, true);
+
+ if (flush_work(&sci->sc_iput_work))
+ flag = true;
+
if (flag || !nilfs_segctor_confirm(sci))
nilfs_segctor_write_out(sci);
if (!list_empty(&sci->sc_dirty_files)) {
- nilfs_warning(sci->sc_super, __func__,
- "dirty file(s) after the final construction\n");
+ nilfs_warn(sci->sc_super,
+ "disposed unprocessed dirty file(s) when stopping log writer");
nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
}
+ if (!list_empty(&sci->sc_iput_queue)) {
+ nilfs_warn(sci->sc_super,
+ "disposed unprocessed inode(s) in iput queue when stopping log writer");
+ nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
+ }
+
WARN_ON(!list_empty(&sci->sc_segbufs));
WARN_ON(!list_empty(&sci->sc_write_logs));
@@ -2637,7 +2813,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
down_write(&nilfs->ns_segctor_sem);
- del_timer_sync(&sci->sc_timer);
kfree(sci);
}
@@ -2649,35 +2824,45 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
* This allocates a log writer object, initializes it, and starts the
* log writer.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error code is returned.
- *
- * %-ENOMEM - Insufficient memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINTR - Log writer thread creation failed due to interruption.
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
{
struct the_nilfs *nilfs = sb->s_fs_info;
+ struct nilfs_sc_info *sci;
+ struct task_struct *t;
int err;
if (nilfs->ns_writer) {
/*
- * This happens if the filesystem was remounted
- * read/write after nilfs_error degenerated it into a
- * read-only mount.
+ * This happens if the filesystem is made read-only by
+ * __nilfs_error or nilfs_remount and then remounted
+ * read/write. In these cases, reuse the existing
+ * writer.
*/
- nilfs_detach_log_writer(sb);
+ return 0;
}
- nilfs->ns_writer = nilfs_segctor_new(sb, root);
- if (!nilfs->ns_writer)
+ sci = nilfs_segctor_new(sb, root);
+ if (unlikely(!sci))
return -ENOMEM;
- err = nilfs_segctor_start_thread(nilfs->ns_writer);
- if (err) {
- kfree(nilfs->ns_writer);
- nilfs->ns_writer = NULL;
+ nilfs->ns_writer = sci;
+ t = kthread_create(nilfs_segctor_thread, sci, "segctord");
+ if (IS_ERR(t)) {
+ err = PTR_ERR(t);
+ nilfs_err(sb, "error %d creating segctord thread", err);
+ nilfs_detach_log_writer(sb);
+ return err;
}
- return err;
+ sci->sc_task = t;
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
+
+ wake_up_process(sci->sc_task);
+ return 0;
}
/**
@@ -2697,16 +2882,18 @@ void nilfs_detach_log_writer(struct super_block *sb)
nilfs_segctor_destroy(nilfs->ns_writer);
nilfs->ns_writer = NULL;
}
+ set_nilfs_purging(nilfs);
/* Force to free the list of dirty files */
spin_lock(&nilfs->ns_inode_lock);
if (!list_empty(&nilfs->ns_dirty_files)) {
list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
- nilfs_warning(sb, __func__,
- "Hit dirty file after stopped log writer\n");
+ nilfs_warn(sb,
+ "disposed unprocessed dirty file(s) when detaching log writer");
}
spin_unlock(&nilfs->ns_inode_lock);
up_write(&nilfs->ns_segctor_sem);
nilfs_dispose_list(nilfs, &garbage_list, 1);
+ clear_nilfs_purging(nilfs);
}
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 38a1d0013314..4b39ed43ae72 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * segment.h - NILFS Segment constructor prototypes and definitions
+ * NILFS Segment constructor prototypes and definitions
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*
*/
#ifndef _NILFS_SEGMENT_H
@@ -26,7 +13,7 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
-#include <linux/nilfs2_fs.h>
+#include <linux/workqueue.h>
#include "nilfs.h"
struct nilfs_root;
@@ -35,10 +22,10 @@ struct nilfs_root;
* struct nilfs_recovery_info - Recovery information
* @ri_need_recovery: Recovery status
* @ri_super_root: Block number of the last super root
- * @ri_ri_cno: Number of the last checkpoint
+ * @ri_cno: Number of the last checkpoint
* @ri_lsegs_start: Region for roll-forwarding (start block number)
* @ri_lsegs_end: Region for roll-forwarding (end block number)
- * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start
+ * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start
* @ri_used_segments: List of segments to be mark active
* @ri_pseg_start: Block number of the last partial segment
* @ri_seq: Sequence number on the last partial segment
@@ -66,14 +53,15 @@ struct nilfs_recovery_info {
/**
* struct nilfs_cstage - Context of collection stage
- * @scnt: Stage count
+ * @scnt: Stage count, must be accessed via wrappers:
+ * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get()
* @flags: State flags
* @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
* @gc_inode_ptr: Pointer on the list of gc-inodes
*/
struct nilfs_cstage {
int scnt;
- unsigned flags;
+ unsigned int flags;
struct nilfs_inode_info *dirty_file_ptr;
struct nilfs_inode_info *gc_inode_ptr;
};
@@ -82,7 +70,7 @@ struct nilfs_segment_buffer;
struct nilfs_segsum_pointer {
struct buffer_head *bh;
- unsigned offset; /* offset in bytes */
+ unsigned int offset; /* offset in bytes */
};
/**
@@ -92,6 +80,8 @@ struct nilfs_segsum_pointer {
* @sc_nblk_inc: Block count of current generation
* @sc_dirty_files: List of files to be written
* @sc_gc_inodes: List of GC inodes having blocks to be written
+ * @sc_iput_queue: list of inodes for which iput should be done
+ * @sc_iput_work: work struct to defer iput call
* @sc_freesegs: array of segment numbers to be freed
* @sc_nfreesegs: number of segments on @sc_freesegs
* @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -115,9 +105,8 @@ struct nilfs_segsum_pointer {
* @sc_flush_request: inode bitmap of metadata files to be flushed
* @sc_wait_request: Client request queue
* @sc_wait_daemon: Daemon wait queue
- * @sc_wait_task: Start/end wait queue to control segctord task
* @sc_seq_request: Request counter
- * @sc_seq_accept: Accepted request count
+ * @sc_seq_accepted: Accepted request count
* @sc_seq_done: Completion counter
* @sc_sync: Request of explicit sync operation
* @sc_interval: Timeout value of background construction
@@ -135,6 +124,8 @@ struct nilfs_sc_info {
struct list_head sc_dirty_files;
struct list_head sc_gc_inodes;
+ struct list_head sc_iput_queue;
+ struct work_struct sc_iput_work;
__u64 *sc_freesegs;
size_t sc_nfreesegs;
@@ -156,7 +147,7 @@ struct nilfs_sc_info {
unsigned long sc_blk_cnt;
unsigned long sc_datablk_cnt;
unsigned long sc_nblk_this_inc;
- time_t sc_seg_ctime;
+ time64_t sc_seg_ctime;
__u64 sc_cno;
unsigned long sc_flags;
@@ -166,7 +157,6 @@ struct nilfs_sc_info {
wait_queue_head_t sc_wait_request;
wait_queue_head_t sc_wait_daemon;
- wait_queue_head_t sc_wait_task;
__u32 sc_seq_request;
__u32 sc_seq_accepted;
@@ -187,31 +177,40 @@ enum {
NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */
NILFS_SC_UNCLOSED, /* Logical segment is not closed */
NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */
- NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a
- checkpoint */
- NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files
- other than DAT, cpfile, sufile, or files
- moved by GC */
+ NILFS_SC_PRIOR_FLUSH, /*
+ * Requesting immediate flush without making a
+ * checkpoint
+ */
+ NILFS_SC_HAVE_DELTA, /*
+ * Next checkpoint will have update of files
+ * other than DAT, cpfile, sufile, or files
+ * moved by GC.
+ */
};
/* sc_state */
-#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */
#define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */
/*
* Constant parameters
*/
-#define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when
- destroying segctord */
+#define NILFS_SC_CLEANUP_RETRY 3 /*
+ * Retry count of construction when
+ * destroying segctord
+ */
/*
* Default values of timeout, in seconds.
*/
-#define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks.
- It triggers construction of a
- logical segment with a super root */
-#define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root
- creation */
+#define NILFS_SC_DEFAULT_TIMEOUT 5 /*
+ * Timeout value of dirty blocks.
+ * It triggers construction of a
+ * logical segment with a super root.
+ */
+#define NILFS_SC_DEFAULT_SR_FREQ 30 /*
+ * Maximum frequency of super root
+ * creation
+ */
/*
* The default threshold amount of data, in block counts.
@@ -227,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
loff_t, loff_t);
-extern void nilfs_flush_segment(struct super_block *, ino_t);
extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
void **);
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 3127e9f438a7..83f93337c01b 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -1,24 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * sufile.c - NILFS segment usage file.
+ * NILFS segment usage file.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
- * Revised by Ryusuke Konishi <ryusuke@osrg.net>.
+ * Written by Koji Sato.
+ * Revised by Ryusuke Konishi.
*/
#include <linux/kernel.h>
@@ -26,10 +13,11 @@
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/errno.h>
-#include <linux/nilfs2_fs.h>
#include "mdt.h"
#include "sufile.h"
+#include <trace/events/nilfs2.h>
+
/**
* struct nilfs_sufile_info - on-memory private data of sufile
* @mi: on-memory private data of metadata file
@@ -59,7 +47,8 @@ static unsigned long
nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum)
{
__u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset;
- do_div(t, nilfs_sufile_segment_usages_per_block(sufile));
+
+ t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile));
return (unsigned long)t;
}
@@ -67,6 +56,7 @@ static unsigned long
nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum)
{
__u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset;
+
return do_div(t, nilfs_sufile_segment_usages_per_block(sufile));
}
@@ -80,19 +70,35 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr,
max - curr + 1);
}
-static struct nilfs_segment_usage *
-nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum,
- struct buffer_head *bh, void *kaddr)
+/**
+ * nilfs_sufile_segment_usage_offset - calculate the byte offset of a segment
+ * usage entry in the folio containing it
+ * @sufile: segment usage file inode
+ * @segnum: number of segment usage
+ * @bh: buffer head of block containing segment usage indexed by @segnum
+ *
+ * Return: Byte offset in the folio of the segment usage entry.
+ */
+static size_t nilfs_sufile_segment_usage_offset(const struct inode *sufile,
+ __u64 segnum,
+ struct buffer_head *bh)
{
- return kaddr + bh_offset(bh) +
+ return offset_in_folio(bh->b_folio, bh->b_data) +
nilfs_sufile_get_offset(sufile, segnum) *
NILFS_MDT(sufile)->mi_entry_size;
}
-static inline int nilfs_sufile_get_header_block(struct inode *sufile,
- struct buffer_head **bhp)
+static int nilfs_sufile_get_header_block(struct inode *sufile,
+ struct buffer_head **bhp)
{
- return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp);
+ int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp);
+
+ if (unlikely(err == -ENOENT)) {
+ nilfs_error(sufile->i_sb,
+ "missing header block in segment usage metadata");
+ err = -EIO;
+ }
+ return err;
}
static inline int
@@ -115,13 +121,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
u64 ncleanadd, u64 ndirtyadd)
{
struct nilfs_sufile_header *header;
- void *kaddr;
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(header_bh);
}
@@ -129,6 +133,8 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
/**
* nilfs_sufile_get_ncleansegs - return the number of clean segments
* @sufile: inode of segment usage file
+ *
+ * Return: Number of clean segments.
*/
unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile)
{
@@ -151,17 +157,13 @@ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile)
* of successfully modified segments from the head is stored in the
* place @ndone points to.
*
- * Return Value: On success, zero is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - Given segment usage is in hole block (may be returned if
- * @create is zero)
- *
- * %-EINVAL - Invalid segment usage number
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid segment usage number
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOENT - Given segment usage is in hole block (may be returned if
+ * @create is zero)
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs,
int create, size_t *ndone,
@@ -181,9 +183,9 @@ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs,
down_write(&NILFS_MDT(sufile)->mi_sem);
for (seg = segnumv; seg < segnumv + nsegs; seg++) {
if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) {
- printk(KERN_WARNING
- "%s: invalid segment number: %llu\n", __func__,
- (unsigned long long)*seg);
+ nilfs_warn(sufile->i_sb,
+ "%s: invalid segment number: %llu",
+ __func__, (unsigned long long)*seg);
nerr++;
}
}
@@ -240,8 +242,8 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
int ret;
if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
- printk(KERN_WARNING "%s: invalid segment number: %llu\n",
- __func__, (unsigned long long)segnum);
+ nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu",
+ __func__, (unsigned long long)segnum);
return -EINVAL;
}
down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -268,10 +270,7 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
* @start: minimum segment number of allocatable region (inclusive)
* @end: maximum segment number of allocatable region (inclusive)
*
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-ERANGE - invalid segment region
+ * Return: 0 on success, or %-ERANGE if segment range is invalid.
*/
int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
{
@@ -296,17 +295,14 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
* @sufile: inode of segment usage file
* @segnump: pointer to segment number
*
- * Description: nilfs_sufile_alloc() allocates a clean segment.
- *
- * Return Value: On success, 0 is returned and the segment number of the
- * allocated segment is stored in the place pointed by @segnump. On error, one
- * of the following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Description: nilfs_sufile_alloc() allocates a clean segment, and stores
+ * its segment number in the place pointed to by @segnump.
*
- * %-ENOSPC - No clean segment left.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - No clean segment left.
*/
int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
{
@@ -316,8 +312,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
__u64 segnum, maxsegnum, last_alloc;
+ size_t offset;
void *kaddr;
- unsigned long nsegments, ncleansegs, nsus, cnt;
+ unsigned long nsegments, nsus, cnt;
int ret, j;
down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -325,11 +322,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
ret = nilfs_sufile_get_header_block(sufile, &header_bh);
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
- ncleansegs = le64_to_cpu(header->sh_ncleansegs);
+ header = kmap_local_folio(header_bh->b_folio, 0);
last_alloc = le64_to_cpu(header->sh_last_alloc);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
nsegments = nilfs_sufile_get_nsegments(sufile);
maxsegnum = sui->allocmax;
@@ -358,13 +353,15 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
break; /* never happens */
}
}
+ trace_nilfs2_segment_usage_check(sufile, segnum, cnt);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
&su_bh);
if (ret < 0)
goto out_header;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio, offset);
nsus = nilfs_sufile_segment_usages_in_block(
sufile, segnum, maxsegnum);
@@ -373,14 +370,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
continue;
/* found a clean segment */
nilfs_segment_usage_set_dirty(su);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
le64_add_cpu(&header->sh_ncleansegs, -1);
le64_add_cpu(&header->sh_ndirtysegs, 1);
header->sh_last_alloc = cpu_to_le64(segnum);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
sui->ncleansegs--;
mark_buffer_dirty(header_bh);
@@ -388,10 +384,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
nilfs_mdt_mark_dirty(sufile);
brelse(su_bh);
*segnump = segnum;
+
+ trace_nilfs2_segment_usage_allocated(sufile, segnum);
+
goto out_header;
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
brelse(su_bh);
}
@@ -411,18 +410,18 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
if (unlikely(!nilfs_segment_usage_clean(su))) {
- printk(KERN_WARNING "%s: segment %llu must be clean\n",
- __func__, (unsigned long long)segnum);
- kunmap_atomic(kaddr);
+ nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean",
+ __func__, (unsigned long long)segnum);
+ kunmap_local(su);
return;
}
nilfs_segment_usage_set_dirty(su);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
nilfs_sufile_mod_counter(header_bh, -1, 1);
NILFS_SUI(sufile)->ncleansegs--;
@@ -436,14 +435,14 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int clean, dirty;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
- if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
+ if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) &&
su->su_nblocks == cpu_to_le32(0)) {
- kunmap_atomic(kaddr);
+ kunmap_local(su);
return;
}
clean = nilfs_segment_usage_clean(su);
@@ -452,8 +451,8 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
/* make the segment garbage */
su->su_lastmod = cpu_to_le64(0);
su->su_nblocks = cpu_to_le32(0);
- su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
- kunmap_atomic(kaddr);
+ su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY));
+ kunmap_local(su);
nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
NILFS_SUI(sufile)->ncleansegs -= clean;
@@ -467,47 +466,94 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int sudirty;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
if (nilfs_segment_usage_clean(su)) {
- printk(KERN_WARNING "%s: segment %llu is already clean\n",
- __func__, (unsigned long long)segnum);
- kunmap_atomic(kaddr);
+ nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean",
+ __func__, (unsigned long long)segnum);
+ kunmap_local(su);
return;
}
- WARN_ON(nilfs_segment_usage_error(su));
- WARN_ON(!nilfs_segment_usage_dirty(su));
+ if (unlikely(nilfs_segment_usage_error(su)))
+ nilfs_warn(sufile->i_sb, "free segment %llu marked in error",
+ (unsigned long long)segnum);
sudirty = nilfs_segment_usage_dirty(su);
+ if (unlikely(!sudirty))
+ nilfs_warn(sufile->i_sb, "free unallocated segment %llu",
+ (unsigned long long)segnum);
+
nilfs_segment_usage_set_clean(su);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
mark_buffer_dirty(su_bh);
nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
NILFS_SUI(sufile)->ncleansegs++;
nilfs_mdt_mark_dirty(sufile);
+
+ trace_nilfs2_segment_usage_freed(sufile, segnum);
}
/**
* nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty
* @sufile: inode of segment usage file
* @segnum: segment number
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
+ size_t offset;
+ struct nilfs_segment_usage *su;
int ret;
+ down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
- if (!ret) {
+ if (unlikely(ret)) {
+ if (ret == -ENOENT) {
+ nilfs_error(sufile->i_sb,
+ "segment usage for segment %llu is unreadable due to a hole block",
+ (unsigned long long)segnum);
+ ret = -EIO;
+ }
+ goto out_sem;
+ }
+
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh);
+ su = kmap_local_folio(bh->b_folio, offset);
+ if (unlikely(nilfs_segment_usage_error(su))) {
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+
+ kunmap_local(su);
+ brelse(bh);
+ if (nilfs_segment_is_active(nilfs, segnum)) {
+ nilfs_error(sufile->i_sb,
+ "active segment %llu is erroneous",
+ (unsigned long long)segnum);
+ } else {
+ /*
+ * Segments marked erroneous are never allocated by
+ * nilfs_sufile_alloc(); only active segments, ie,
+ * the segments indexed by ns_segnum or ns_nextnum,
+ * can be erroneous here.
+ */
+ WARN_ON_ONCE(1);
+ }
+ ret = -EIO;
+ } else {
+ nilfs_segment_usage_set_dirty(su);
+ kunmap_local(su);
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
brelse(bh);
}
+out_sem:
+ up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
@@ -517,13 +563,15 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
* @segnum: segment number
* @nblocks: number of live blocks in the segment
* @modtime: modification time (option)
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
- unsigned long nblocks, time_t modtime)
+ unsigned long nblocks, time64_t modtime)
{
struct buffer_head *bh;
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int ret;
down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -531,13 +579,18 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
- WARN_ON(nilfs_segment_usage_error(su));
- if (modtime)
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh);
+ su = kmap_local_folio(bh->b_folio, offset);
+ if (modtime) {
+ /*
+ * Check segusage error and set su_lastmod only when updating
+ * this entry with a valid timestamp, not for cancellation.
+ */
+ WARN_ON_ONCE(nilfs_segment_usage_error(su));
su->su_lastmod = cpu_to_le64(modtime);
+ }
su->su_nblocks = cpu_to_le32(nblocks);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
@@ -551,25 +604,21 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
/**
* nilfs_sufile_get_stat - get segment usage statistics
* @sufile: inode of segment usage file
- * @stat: pointer to a structure of segment usage statistics
- *
- * Description: nilfs_sufile_get_stat() returns information about segment
- * usage.
- *
- * Return Value: On success, 0 is returned, and segment usage information is
- * stored in the place pointed by @stat. On error, one of the following
- * negative error codes is returned.
+ * @sustat: pointer to a structure of segment usage statistics
*
- * %-EIO - I/O error.
+ * Description: nilfs_sufile_get_stat() retrieves segment usage statistics
+ * and stores them in the location pointed to by @sustat.
*
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
{
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
- void *kaddr;
int ret;
down_read(&NILFS_MDT(sufile)->mi_sem);
@@ -578,8 +627,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
if (ret < 0)
goto out_sem;
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs);
@@ -588,7 +636,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
spin_lock(&nilfs->ns_last_segment_lock);
sustat->ss_prot_seq = nilfs->ns_prot_seq;
spin_unlock(&nilfs->ns_last_segment_lock);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(header_bh);
out_sem:
@@ -601,18 +649,18 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
struct buffer_head *su_bh)
{
struct nilfs_segment_usage *su;
- void *kaddr;
+ size_t offset;
int suclean;
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
if (nilfs_segment_usage_error(su)) {
- kunmap_atomic(kaddr);
+ kunmap_local(su);
return;
}
suclean = nilfs_segment_usage_clean(su);
nilfs_segment_usage_set_error(su);
- kunmap_atomic(kaddr);
+ kunmap_local(su);
if (suclean) {
nilfs_sufile_mod_counter(header_bh, -1, 0);
@@ -623,22 +671,18 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
}
/**
- * nilfs_sufile_truncate_range - truncate range of segment array
- * @sufile: inode of segment usage file
- * @start: start segment number (inclusive)
- * @end: end segment number (inclusive)
- *
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid number of segments specified
- *
- * %-EBUSY - Dirty or active segments are present in the range
- */
+ * nilfs_sufile_truncate_range - truncate range of segment array
+ * @sufile: inode of segment usage file
+ * @start: start segment number (inclusive)
+ * @end: end segment number (inclusive)
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Dirty or active segments are present in the range.
+ * * %-EINVAL - Invalid number of segments specified.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ */
static int nilfs_sufile_truncate_range(struct inode *sufile,
__u64 start, __u64 end)
{
@@ -650,7 +694,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
unsigned long segusages_per_block;
unsigned long nsegs, ncleaned;
__u64 segnum;
- void *kaddr;
+ size_t offset;
ssize_t n, nc;
int ret;
int j;
@@ -681,16 +725,16 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
/* hole */
continue;
}
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kmap_local_folio(su_bh->b_folio, offset);
su2 = su;
for (j = 0; j < n; j++, su = (void *)su + susz) {
if ((le32_to_cpu(su->su_flags) &
- ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
+ ~BIT(NILFS_SEGMENT_USAGE_ERROR)) ||
nilfs_segment_is_active(nilfs, segnum + j)) {
ret = -EBUSY;
- kunmap_atomic(kaddr);
+ kunmap_local(su2);
brelse(su_bh);
goto out_header;
}
@@ -702,7 +746,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
nc++;
}
}
- kunmap_atomic(kaddr);
+ kunmap_local(su2);
if (nc > 0) {
mark_buffer_dirty(su_bh);
ncleaned += nc;
@@ -732,16 +776,12 @@ out:
* @sufile: inode of segment usage file
* @newnsegs: new number of segments
*
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOSPC - Enough free space is not left for shrinking
- *
- * %-EBUSY - Dirty or active segments exist in the region to be truncated
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EBUSY - Dirty or active segments exist in the region to be truncated.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-ENOSPC - Enough free space is not left for shrinking.
*/
int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
{
@@ -749,7 +789,6 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
- void *kaddr;
unsigned long nsegs, nrsvsegs;
int ret = 0;
@@ -776,12 +815,20 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
goto out_header;
sui->ncleansegs -= nsegs - newnsegs;
+
+ /*
+ * If the sufile is successfully truncated, immediately adjust
+ * the segment allocation space while locking the semaphore
+ * "mi_sem" so that nilfs_sufile_alloc() never allocates
+ * segments in the truncated space.
+ */
+ sui->allocmax = newnsegs - 1;
+ sui->allocmin = 0;
}
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(sufile);
@@ -795,30 +842,27 @@ out:
}
/**
- * nilfs_sufile_get_suinfo -
+ * nilfs_sufile_get_suinfo - get segment usage information
* @sufile: inode of segment usage file
* @segnum: segment number to start looking
- * @buf: array of suinfo
- * @sisz: byte size of suinfo
- * @nsi: size of suinfo array
- *
- * Description:
- *
- * Return Value: On success, 0 is returned and .... On error, one of the
- * following negative error codes is returned.
+ * @buf: array of suinfo
+ * @sisz: byte size of suinfo
+ * @nsi: size of suinfo array
*
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: Count of segment usage info items stored in the output buffer on
+ * success, or one of the following negative error codes on failure:
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
- unsigned sisz, size_t nsi)
+ unsigned int sisz, size_t nsi)
{
struct buffer_head *su_bh;
struct nilfs_segment_usage *su;
struct nilfs_suinfo *si = buf;
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ size_t offset;
void *kaddr;
unsigned long nsegs, segusages_per_block;
ssize_t n;
@@ -846,20 +890,20 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
continue;
}
- kaddr = kmap_atomic(su_bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio, offset);
for (j = 0; j < n;
j++, su = (void *)su + susz, si = (void *)si + sisz) {
si->sui_lastmod = le64_to_cpu(su->su_lastmod);
si->sui_nblocks = le32_to_cpu(su->su_nblocks);
si->sui_flags = le32_to_cpu(su->su_flags) &
- ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
+ ~BIT(NILFS_SEGMENT_USAGE_ACTIVE);
if (nilfs_segment_is_active(nilfs, segnum + j))
si->sui_flags |=
- (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
+ BIT(NILFS_SEGMENT_USAGE_ACTIVE);
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
brelse(su_bh);
}
ret = nsegs;
@@ -870,11 +914,295 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
}
/**
+ * nilfs_sufile_set_suinfo - sets segment usage info
+ * @sufile: inode of segment usage file
+ * @buf: array of suinfo_update
+ * @supsz: byte size of suinfo_update
+ * @nsup: size of suinfo_update array
+ *
+ * Description: Takes an array of nilfs_suinfo_update structs and updates
+ * segment usage accordingly. Only the fields indicated by the sup_flags
+ * are updated.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid values in input (segment number, flags or nblocks).
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
+ */
+ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
+ unsigned int supsz, size_t nsup)
+{
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ struct buffer_head *header_bh, *bh;
+ struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup;
+ struct nilfs_segment_usage *su;
+ size_t offset;
+ unsigned long blkoff, prev_blkoff;
+ int cleansi, cleansu, dirtysi, dirtysu;
+ long ncleaned = 0, ndirtied = 0;
+ int ret = 0;
+
+ if (unlikely(nsup == 0))
+ return ret;
+
+ for (sup = buf; sup < supend; sup = (void *)sup + supsz) {
+ if (sup->sup_segnum >= nilfs->ns_nsegments
+ || (sup->sup_flags &
+ (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS))
+ || (nilfs_suinfo_update_nblocks(sup) &&
+ sup->sup_sui.sui_nblocks >
+ nilfs->ns_blocks_per_segment))
+ return -EINVAL;
+ }
+
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (ret < 0)
+ goto out_sem;
+
+ sup = buf;
+ blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum);
+ ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh);
+ if (ret < 0)
+ goto out_header;
+
+ for (;;) {
+ offset = nilfs_sufile_segment_usage_offset(
+ sufile, sup->sup_segnum, bh);
+ su = kmap_local_folio(bh->b_folio, offset);
+
+ if (nilfs_suinfo_update_lastmod(sup))
+ su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod);
+
+ if (nilfs_suinfo_update_nblocks(sup))
+ su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks);
+
+ if (nilfs_suinfo_update_flags(sup)) {
+ /*
+ * Active flag is a virtual flag projected by running
+ * nilfs kernel code - drop it not to write it to
+ * disk.
+ */
+ sup->sup_sui.sui_flags &=
+ ~BIT(NILFS_SEGMENT_USAGE_ACTIVE);
+
+ cleansi = nilfs_suinfo_clean(&sup->sup_sui);
+ cleansu = nilfs_segment_usage_clean(su);
+ dirtysi = nilfs_suinfo_dirty(&sup->sup_sui);
+ dirtysu = nilfs_segment_usage_dirty(su);
+
+ if (cleansi && !cleansu)
+ ++ncleaned;
+ else if (!cleansi && cleansu)
+ --ncleaned;
+
+ if (dirtysi && !dirtysu)
+ ++ndirtied;
+ else if (!dirtysi && dirtysu)
+ --ndirtied;
+
+ su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags);
+ }
+
+ kunmap_local(su);
+
+ sup = (void *)sup + supsz;
+ if (sup >= supend)
+ break;
+
+ prev_blkoff = blkoff;
+ blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum);
+ if (blkoff == prev_blkoff)
+ continue;
+
+ /* get different block */
+ mark_buffer_dirty(bh);
+ put_bh(bh);
+ ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh);
+ if (unlikely(ret < 0))
+ goto out_mark;
+ }
+ mark_buffer_dirty(bh);
+ put_bh(bh);
+
+ out_mark:
+ if (ncleaned || ndirtied) {
+ nilfs_sufile_mod_counter(header_bh, (u64)ncleaned,
+ (u64)ndirtied);
+ NILFS_SUI(sufile)->ncleansegs += ncleaned;
+ }
+ nilfs_mdt_mark_dirty(sufile);
+ out_header:
+ put_bh(header_bh);
+ out_sem:
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
+}
+
+/**
+ * nilfs_sufile_trim_fs() - trim ioctl handle function
+ * @sufile: inode of segment usage file
+ * @range: fstrim_range structure
+ *
+ * start: First Byte to trim
+ * len: number of Bytes to trim from start
+ * minlen: minimum extent length in Bytes
+ *
+ * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes
+ * from start to start+len. start is rounded up to the next block boundary
+ * and start+len is rounded down. For each clean segment blkdev_issue_discard
+ * function is invoked.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
+{
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ struct buffer_head *su_bh;
+ struct nilfs_segment_usage *su;
+ size_t offset;
+ void *kaddr;
+ size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size;
+ sector_t seg_start, seg_end, start_block, end_block;
+ sector_t start = 0, nblocks = 0;
+ u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0;
+ int ret = 0;
+ unsigned int sects_per_block;
+
+ sects_per_block = (1 << nilfs->ns_blocksize_bits) /
+ bdev_logical_block_size(nilfs->ns_bdev);
+ len = range->len >> nilfs->ns_blocksize_bits;
+ minlen = range->minlen >> nilfs->ns_blocksize_bits;
+ max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment);
+
+ if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits)
+ return -EINVAL;
+
+ start_block = (range->start + nilfs->ns_blocksize - 1) >>
+ nilfs->ns_blocksize_bits;
+
+ /*
+ * range->len can be very large (actually, it is set to
+ * ULLONG_MAX by default) - truncate upper end of the range
+ * carefully so as not to overflow.
+ */
+ if (max_blocks - start_block < len)
+ end_block = max_blocks - 1;
+ else
+ end_block = start_block + len - 1;
+
+ segnum = nilfs_get_segnum_of_block(nilfs, start_block);
+ segnum_end = nilfs_get_segnum_of_block(nilfs, end_block);
+
+ down_read(&NILFS_MDT(sufile)->mi_sem);
+
+ while (segnum <= segnum_end) {
+ n = nilfs_sufile_segment_usages_in_block(sufile, segnum,
+ segnum_end);
+
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
+ &su_bh);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ goto out_sem;
+ /* hole */
+ segnum += n;
+ continue;
+ }
+
+ offset = nilfs_sufile_segment_usage_offset(sufile, segnum,
+ su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio, offset);
+ for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) {
+ if (!nilfs_segment_usage_clean(su))
+ continue;
+
+ nilfs_get_segment_range(nilfs, segnum, &seg_start,
+ &seg_end);
+
+ if (!nblocks) {
+ /* start new extent */
+ start = seg_start;
+ nblocks = seg_end - seg_start + 1;
+ continue;
+ }
+
+ if (start + nblocks == seg_start) {
+ /* add to previous extent */
+ nblocks += seg_end - seg_start + 1;
+ continue;
+ }
+
+ /* discard previous extent */
+ if (start < start_block) {
+ nblocks -= start_block - start;
+ start = start_block;
+ }
+
+ if (nblocks >= minlen) {
+ kunmap_local(kaddr);
+
+ ret = blkdev_issue_discard(nilfs->ns_bdev,
+ start * sects_per_block,
+ nblocks * sects_per_block,
+ GFP_NOFS);
+ if (ret < 0) {
+ put_bh(su_bh);
+ goto out_sem;
+ }
+
+ ndiscarded += nblocks;
+ offset = nilfs_sufile_segment_usage_offset(
+ sufile, segnum, su_bh);
+ su = kaddr = kmap_local_folio(su_bh->b_folio,
+ offset);
+ }
+
+ /* start new extent */
+ start = seg_start;
+ nblocks = seg_end - seg_start + 1;
+ }
+ kunmap_local(kaddr);
+ put_bh(su_bh);
+ }
+
+
+ if (nblocks) {
+ /* discard last extent */
+ if (start < start_block) {
+ nblocks -= start_block - start;
+ start = start_block;
+ }
+ if (start + nblocks > end_block + 1)
+ nblocks = end_block - start + 1;
+
+ if (nblocks >= minlen) {
+ ret = blkdev_issue_discard(nilfs->ns_bdev,
+ start * sects_per_block,
+ nblocks * sects_per_block,
+ GFP_NOFS);
+ if (!ret)
+ ndiscarded += nblocks;
+ }
+ }
+
+out_sem:
+ up_read(&NILFS_MDT(sufile)->mi_sem);
+
+ range->len = ndiscarded << nilfs->ns_blocksize_bits;
+ return ret;
+}
+
+/**
* nilfs_sufile_read - read or get sufile inode
* @sb: super block instance
* @susize: size of a segment usage entry
* @raw_inode: on-disk sufile inode
* @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_inode *raw_inode, struct inode **inodep)
@@ -883,13 +1211,22 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_sufile_info *sui;
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
- void *kaddr;
int err;
+ if (susize > sb->s_blocksize) {
+ nilfs_err(sb, "too large segment usage size: %zu bytes",
+ susize);
+ return -EINVAL;
+ } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) {
+ nilfs_err(sb, "too small segment usage size: %zu bytes",
+ susize);
+ return -EINVAL;
+ }
+
sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO);
if (unlikely(!sufile))
return -ENOMEM;
- if (!(sufile->i_state & I_NEW))
+ if (!(inode_state_read_once(sufile) & I_NEW))
goto out;
err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui));
@@ -903,15 +1240,20 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
if (err)
goto failed;
- err = nilfs_sufile_get_header_block(sufile, &header_bh);
- if (err)
+ err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh);
+ if (unlikely(err)) {
+ if (err == -ENOENT) {
+ nilfs_err(sb,
+ "missing header block in segment usage metadata");
+ err = -EINVAL;
+ }
goto failed;
+ }
sui = NILFS_SUI(sufile);
- kaddr = kmap_atomic(header_bh->b_page);
- header = kaddr + bh_offset(header_bh);
+ header = kmap_local_folio(header_bh->b_folio, 0);
sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
- kunmap_atomic(kaddr);
+ kunmap_local(header);
brelse(header_bh);
sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index e84bc5b51fc1..cd6f28ab3521 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * sufile.h - NILFS segment usage file.
+ * NILFS segment usage file.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Koji Sato <koji@osrg.net>.
+ * Written by Koji Sato.
*/
#ifndef _NILFS_SUFILE_H
@@ -25,7 +12,6 @@
#include <linux/fs.h>
#include <linux/buffer_head.h>
-#include <linux/nilfs2_fs.h>
#include "mdt.h"
@@ -40,10 +26,11 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
int nilfs_sufile_alloc(struct inode *, __u64 *);
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
- unsigned long nblocks, time_t modtime);
+ unsigned long nblocks, time64_t modtime);
int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
-ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned,
+ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned int,
size_t);
+ssize_t nilfs_sufile_set_suinfo(struct inode *, void *, unsigned int, size_t);
int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *,
void (*dofunc)(struct inode *, __u64,
@@ -65,11 +52,14 @@ void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_inode *raw_inode, struct inode **inodep);
+int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range);
/**
* nilfs_sufile_scrap - make a segment garbage
* @sufile: inode of segment usage file
* @segnum: segment number to be freed
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
{
@@ -80,6 +70,8 @@ static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
* nilfs_sufile_free - free segment
* @sufile: inode of segment usage file
* @segnum: segment number to be freed
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
{
@@ -92,6 +84,8 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
* @segnumv: array of segment numbers
* @nsegs: size of @segnumv array
* @ndone: place to store the number of freed segments
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv,
size_t nsegs, size_t *ndone)
@@ -107,8 +101,7 @@ static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv,
* @nsegs: size of @segnumv array
* @ndone: place to store the number of cancelled segments
*
- * Return Value: On success, 0 is returned. On error, a negative error codes
- * is returned.
+ * Return: 0 on success, or a negative error code on failure.
*/
static inline int nilfs_sufile_cancel_freev(struct inode *sufile,
__u64 *segnumv, size_t nsegs,
@@ -126,14 +119,11 @@ static inline int nilfs_sufile_cancel_freev(struct inode *sufile,
* Description: nilfs_sufile_set_error() marks the segment specified by
* @segnum as erroneous. The error segment will never be used again.
*
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid segment usage number.
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - Invalid segment usage number.
+ * * %-EIO - I/O error (including metadata corruption).
+ * * %-ENOMEM - Insufficient memory available.
*/
static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
{
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index af3ba0478cdf..badc2cbc895e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * super.c - NILFS module and super block management.
+ * NILFS module and super block management.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*/
/*
* linux/fs/ext2/super.c
@@ -42,12 +29,13 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
-#include <linux/parser.h>
#include <linux/crc32.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "nilfs.h"
#include "export.h"
#include "mdt.h"
@@ -73,7 +61,28 @@ struct kmem_cache *nilfs_segbuf_cachep;
struct kmem_cache *nilfs_btree_path_cache;
static int nilfs_setup_super(struct super_block *sb, int is_mount);
-static int nilfs_remount(struct super_block *sb, int *flags, char *data);
+
+void __nilfs_msg(struct super_block *sb, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ int level;
+
+ va_start(args, fmt);
+
+ level = printk_get_level(fmt);
+ vaf.fmt = printk_skip_level(fmt);
+ vaf.va = &args;
+
+ if (sb)
+ printk("%c%cNILFS (%s): %pV\n",
+ KERN_SOH_ASCII, level, sb->s_id, &vaf);
+ else
+ printk("%c%cNILFS: %pV\n",
+ KERN_SOH_ASCII, level, &vaf);
+
+ va_end(args);
+}
static void nilfs_set_error(struct super_block *sb)
{
@@ -95,19 +104,24 @@ static void nilfs_set_error(struct super_block *sb)
}
/**
- * nilfs_error() - report failure condition on a filesystem
+ * __nilfs_error() - report failure condition on a filesystem
+ * @sb: super block instance
+ * @function: name of calling function
+ * @fmt: format string for message to be output
+ * @...: optional arguments to @fmt
+ *
+ * __nilfs_error() sets an ERROR_FS flag on the superblock as well as
+ * reporting an error message. This function should be called when
+ * NILFS detects incoherences or defects of meta data on disk.
*
- * nilfs_error() sets an ERROR_FS flag on the superblock as well as
- * reporting an error message. It should be called when NILFS detects
- * incoherences or defects of meta data on disk. As for sustainable
- * errors such as a single-shot I/O error, nilfs_warning() or the printk()
- * function should be used instead.
+ * This implements the body of nilfs_error() macro. Normally,
+ * nilfs_error() should be used. As for sustainable errors such as a
+ * single-shot I/O error, nilfs_err() should be used instead.
*
- * The segment constructor must not call this function because it can
- * kill itself.
+ * Callers should not add a trailing newline since this will do it.
*/
-void nilfs_error(struct super_block *sb, const char *function,
- const char *fmt, ...)
+void __nilfs_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
struct the_nilfs *nilfs = sb->s_fs_info;
struct va_format vaf;
@@ -123,12 +137,12 @@ void nilfs_error(struct super_block *sb, const char *function,
va_end(args);
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb)) {
nilfs_set_error(sb);
if (nilfs_test_opt(nilfs, ERRORS_RO)) {
printk(KERN_CRIT "Remounting filesystem read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
}
@@ -137,56 +151,30 @@ void nilfs_error(struct super_block *sb, const char *function,
sb->s_id);
}
-void nilfs_warning(struct super_block *sb, const char *function,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
- sb->s_id, function, &vaf);
-
- va_end(args);
-}
-
-
struct inode *nilfs_alloc_inode(struct super_block *sb)
{
struct nilfs_inode_info *ii;
- ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS);
+ ii = alloc_inode_sb(sb, nilfs_inode_cachep, GFP_NOFS);
if (!ii)
return NULL;
ii->i_bh = NULL;
ii->i_state = 0;
+ ii->i_type = 0;
ii->i_cno = 0;
- ii->vfs_inode.i_version = 1;
- nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
+ ii->i_assoc_inode = NULL;
+ ii->i_bmap = &ii->i_bmap_data;
return &ii->vfs_inode;
}
-static void nilfs_i_callback(struct rcu_head *head)
+static void nilfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+ if (nilfs_is_metadata_file_inode(inode))
+ nilfs_mdt_destroy(inode);
- if (mdi) {
- kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
- kfree(mdi);
- }
kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
}
-void nilfs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, nilfs_i_callback);
-}
-
static int nilfs_sync_super(struct super_block *sb, int flag)
{
struct the_nilfs *nilfs = sb->s_fs_info;
@@ -196,14 +184,13 @@ static int nilfs_sync_super(struct super_block *sb, int flag)
set_buffer_dirty(nilfs->ns_sbh[0]);
if (nilfs_test_opt(nilfs, BARRIER)) {
err = __sync_dirty_buffer(nilfs->ns_sbh[0],
- WRITE_SYNC | WRITE_FLUSH_FUA);
+ REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
} else {
err = sync_dirty_buffer(nilfs->ns_sbh[0]);
}
if (unlikely(err)) {
- printk(KERN_ERR
- "NILFS: unable to write superblock (err=%d)\n", err);
+ nilfs_err(sb, "unable to write superblock: err=%d", err);
if (err == -EIO && nilfs->ns_sbh[1]) {
/*
* sbp[0] points to newer log than sbp[1],
@@ -273,13 +260,12 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) {
memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
} else {
- printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
- sb->s_id);
+ nilfs_crit(sb, "superblock broke");
return NULL;
}
} else if (sbp[1] &&
sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
- memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
+ memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
}
if (flip && sbp[1])
@@ -292,10 +278,10 @@ int nilfs_commit_super(struct super_block *sb, int flag)
{
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_super_block **sbp = nilfs->ns_sbp;
- time_t t;
+ time64_t t;
/* nilfs->ns_sem must be locked by the caller. */
- t = get_seconds();
+ t = ktime_get_real_seconds();
nilfs->ns_sbwtime = t;
sbp[0]->s_wtime = cpu_to_le64(t);
sbp[0]->s_sum = 0;
@@ -310,6 +296,9 @@ int nilfs_commit_super(struct super_block *sb, int flag)
nilfs->ns_sbsize));
}
clear_nilfs_sb_dirty(nilfs);
+ nilfs->ns_flushed_device = 1;
+ /* make sure store to ns_flushed_device cannot be reordered */
+ smp_wmb();
return nilfs_sync_super(sb, flag);
}
@@ -320,6 +309,8 @@ int nilfs_commit_super(struct super_block *sb, int flag)
* This function restores state flags in the on-disk super block.
* This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
* filesystem was not clean previously.
+ *
+ * Return: 0 on success, %-EIO if I/O error or superblock is corrupted.
*/
int nilfs_cleanup_super(struct super_block *sb)
{
@@ -350,6 +341,8 @@ int nilfs_cleanup_super(struct super_block *sb)
* nilfs_move_2nd_super - relocate secondary super block
* @sb: super block instance
* @sb2off: new offset of the secondary super block (in bytes)
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
{
@@ -358,7 +351,7 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
struct nilfs_super_block *nsbp;
sector_t blocknr, newblocknr;
unsigned long offset;
- int sb2i = -1; /* array index of the secondary superblock */
+ int sb2i; /* array index of the secondary superblock */
int ret = 0;
/* nilfs->ns_sem must be locked by the caller. */
@@ -369,6 +362,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
} else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
sb2i = 0;
blocknr = nilfs->ns_sbh[0]->b_blocknr;
+ } else {
+ sb2i = -1;
+ blocknr = 0;
}
if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
goto out; /* super block location is unchanged */
@@ -378,17 +374,38 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
offset = sb2off & (nilfs->ns_blocksize - 1);
nsbh = sb_getblk(sb, newblocknr);
if (!nsbh) {
- printk(KERN_WARNING
- "NILFS warning: unable to move secondary superblock "
- "to block %llu\n", (unsigned long long)newblocknr);
+ nilfs_warn(sb,
+ "unable to move secondary superblock to block %llu",
+ (unsigned long long)newblocknr);
ret = -EIO;
goto out;
}
nsbp = (void *)nsbh->b_data + offset;
- memset(nsbp, 0, nilfs->ns_blocksize);
+ lock_buffer(nsbh);
if (sb2i >= 0) {
+ /*
+ * The position of the second superblock only changes by 4KiB,
+ * which is larger than the maximum superblock data size
+ * (= 1KiB), so there is no need to use memmove() to allow
+ * overlap between source and destination.
+ */
memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+
+ /*
+ * Zero fill after copy to avoid overwriting in case of move
+ * within the same block.
+ */
+ memset(nsbh->b_data, 0, offset);
+ memset((void *)nsbp + nilfs->ns_sbsize, 0,
+ nsbh->b_size - offset - nilfs->ns_sbsize);
+ } else {
+ memset(nsbh->b_data, 0, nsbh->b_size);
+ }
+ set_buffer_uptodate(nsbh);
+ unlock_buffer(nsbh);
+
+ if (sb2i >= 0) {
brelse(nilfs->ns_sbh[sb2i]);
nilfs->ns_sbh[sb2i] = nsbh;
nilfs->ns_sbp[sb2i] = nsbp;
@@ -407,6 +424,8 @@ out:
* nilfs_resize_fs - resize the filesystem
* @sb: super block instance
* @newsize: new size of the filesystem (in bytes)
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
{
@@ -417,11 +436,20 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
int ret;
ret = -ERANGE;
- devsize = i_size_read(sb->s_bdev->bd_inode);
+ devsize = bdev_nr_bytes(sb->s_bdev);
if (newsize > devsize)
goto out;
/*
+ * Prevent underflow in second superblock position calculation.
+ * The exact minimum size check is done in nilfs_sufile_resize().
+ */
+ if (newsize < 4096) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /*
* Write lock is required to protect some functions depending
* on the number of segments, the number of reserved segments,
* and so forth.
@@ -430,7 +458,7 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
newnsegs = sb2off >> nilfs->ns_blocksize_bits;
- do_div(newnsegs, nilfs->ns_blocks_per_segment);
+ newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment);
ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
up_write(&nilfs->ns_segctor_sem);
@@ -480,12 +508,13 @@ static void nilfs_put_super(struct super_block *sb)
nilfs_detach_log_writer(sb);
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb)) {
down_write(&nilfs->ns_sem);
nilfs_cleanup_super(sb);
up_write(&nilfs->ns_sem);
}
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
@@ -514,6 +543,9 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
}
up_write(&nilfs->ns_sem);
+ if (!err)
+ err = nilfs_flush_device(nilfs);
+
return err;
}
@@ -522,8 +554,6 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
{
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_root *root;
- struct nilfs_checkpoint *raw_cp;
- struct buffer_head *bh_cp;
int err = -ENOMEM;
root = nilfs_find_or_create_root(
@@ -535,39 +565,19 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
goto reuse; /* already attached checkpoint */
down_read(&nilfs->ns_segctor_sem);
- err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
- &bh_cp);
+ err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size);
up_read(&nilfs->ns_segctor_sem);
- if (unlikely(err)) {
- if (err == -ENOENT || err == -EINVAL) {
- printk(KERN_ERR
- "NILFS: Invalid checkpoint "
- "(checkpoint number=%llu)\n",
- (unsigned long long)cno);
- err = -EINVAL;
- }
+ if (unlikely(err))
goto failed;
- }
-
- err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
- &raw_cp->cp_ifile_inode, &root->ifile);
- if (err)
- goto failed_bh;
-
- atomic64_set(&root->inodes_count,
- le64_to_cpu(raw_cp->cp_inodes_count));
- atomic64_set(&root->blocks_count,
- le64_to_cpu(raw_cp->cp_blocks_count));
-
- nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
reuse:
*rootp = root;
return 0;
- failed_bh:
- nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
failed:
+ if (err == -EINVAL)
+ nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)",
+ (unsigned long long)cno);
nilfs_put_root(root);
return err;
@@ -578,7 +588,7 @@ static int nilfs_freeze(struct super_block *sb)
struct the_nilfs *nilfs = sb->s_fs_info;
int err;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 0;
/* Mark super block clean */
@@ -592,7 +602,7 @@ static int nilfs_unfreeze(struct super_block *sb)
{
struct the_nilfs *nilfs = sb->s_fs_info;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 0;
down_write(&nilfs->ns_sem);
@@ -604,7 +614,7 @@ static int nilfs_unfreeze(struct super_block *sb)
static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
- struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
+ struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
struct the_nilfs *nilfs = root->nilfs;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
unsigned long long blocks;
@@ -639,9 +649,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
err = nilfs_ifile_count_free_inodes(root->ifile,
&nmaxinodes, &nfreeinodes);
if (unlikely(err)) {
- printk(KERN_WARNING
- "NILFS warning: fail to count free inodes: err %d.\n",
- err);
+ nilfs_warn(sb, "failed to count free inodes: err=%d", err);
if (err == -ERANGE) {
/*
* If nilfs_palloc_count_max_entries() returns
@@ -665,8 +673,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = nmaxinodes;
buf->f_ffree = nfreeinodes;
buf->f_namelen = NILFS_NAME_LEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
@@ -675,7 +682,7 @@ static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
{
struct super_block *sb = dentry->d_sb;
struct the_nilfs *nilfs = sb->s_fs_info;
- struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
+ struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
if (!nilfs_test_opt(nilfs, BARRIER))
seq_puts(seq, ",nobarrier");
@@ -697,7 +704,7 @@ static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
static const struct super_operations nilfs_sops = {
.alloc_inode = nilfs_alloc_inode,
- .destroy_inode = nilfs_destroy_inode,
+ .free_inode = nilfs_free_inode,
.dirty_inode = nilfs_dirty_inode,
.evict_inode = nilfs_evict_inode,
.put_super = nilfs_put_super,
@@ -705,105 +712,98 @@ static const struct super_operations nilfs_sops = {
.freeze_fs = nilfs_freeze,
.unfreeze_fs = nilfs_unfreeze,
.statfs = nilfs_statfs,
- .remount_fs = nilfs_remount,
.show_options = nilfs_show_options
};
enum {
- Opt_err_cont, Opt_err_panic, Opt_err_ro,
- Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
- Opt_discard, Opt_nodiscard, Opt_err,
+ Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery,
+ Opt_discard,
};
-static match_table_t tokens = {
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_snapshot, "cp=%u"},
- {Opt_order, "order=%s"},
- {Opt_norecovery, "norecovery"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_err, NULL}
+static const struct constant_table nilfs_param_err[] = {
+ {"continue", NILFS_MOUNT_ERRORS_CONT},
+ {"panic", NILFS_MOUNT_ERRORS_PANIC},
+ {"remount-ro", NILFS_MOUNT_ERRORS_RO},
+ {}
};
-static int parse_options(char *options, struct super_block *sb, int is_remount)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- if (!options)
- return 1;
+static const struct fs_parameter_spec nilfs_param_spec[] = {
+ fsparam_enum ("errors", Opt_err, nilfs_param_err),
+ fsparam_flag_no ("barrier", Opt_barrier),
+ fsparam_u64 ("cp", Opt_snapshot),
+ fsparam_string ("order", Opt_order),
+ fsparam_flag ("norecovery", Opt_norecovery),
+ fsparam_flag_no ("discard", Opt_discard),
+ {}
+};
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
+struct nilfs_fs_context {
+ unsigned long ns_mount_opt;
+ __u64 cno;
+};
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_barrier:
- nilfs_set_opt(nilfs, BARRIER);
- break;
- case Opt_nobarrier:
+static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct nilfs_fs_context *nilfs = fc->fs_private;
+ int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, nilfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_barrier:
+ if (result.negated)
nilfs_clear_opt(nilfs, BARRIER);
- break;
- case Opt_order:
- if (strcmp(args[0].from, "relaxed") == 0)
- /* Ordered data semantics */
- nilfs_clear_opt(nilfs, STRICT_ORDER);
- else if (strcmp(args[0].from, "strict") == 0)
- /* Strict in-order semantics */
- nilfs_set_opt(nilfs, STRICT_ORDER);
- else
- return 0;
- break;
- case Opt_err_panic:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
- break;
- case Opt_err_ro:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
- break;
- case Opt_err_cont:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
- break;
- case Opt_snapshot:
- if (is_remount) {
- printk(KERN_ERR
- "NILFS: \"%s\" option is invalid "
- "for remount.\n", p);
- return 0;
- }
- break;
- case Opt_norecovery:
- nilfs_set_opt(nilfs, NORECOVERY);
- break;
- case Opt_discard:
- nilfs_set_opt(nilfs, DISCARD);
- break;
- case Opt_nodiscard:
- nilfs_clear_opt(nilfs, DISCARD);
- break;
- default:
- printk(KERN_ERR
- "NILFS: Unrecognized mount option \"%s\"\n", p);
- return 0;
+ else
+ nilfs_set_opt(nilfs, BARRIER);
+ break;
+ case Opt_order:
+ if (strcmp(param->string, "relaxed") == 0)
+ /* Ordered data semantics */
+ nilfs_clear_opt(nilfs, STRICT_ORDER);
+ else if (strcmp(param->string, "strict") == 0)
+ /* Strict in-order semantics */
+ nilfs_set_opt(nilfs, STRICT_ORDER);
+ else
+ return -EINVAL;
+ break;
+ case Opt_err:
+ nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE;
+ nilfs->ns_mount_opt |= result.uint_32;
+ break;
+ case Opt_snapshot:
+ if (is_remount) {
+ struct super_block *sb = fc->root->d_sb;
+
+ nilfs_err(sb,
+ "\"%s\" option is invalid for remount",
+ param->key);
+ return -EINVAL;
+ }
+ if (result.uint_64 == 0) {
+ nilfs_err(NULL,
+ "invalid option \"cp=0\": invalid checkpoint number 0");
+ return -EINVAL;
}
+ nilfs->cno = result.uint_64;
+ break;
+ case Opt_norecovery:
+ nilfs_set_opt(nilfs, NORECOVERY);
+ break;
+ case Opt_discard:
+ if (result.negated)
+ nilfs_clear_opt(nilfs, DISCARD);
+ else
+ nilfs_set_opt(nilfs, DISCARD);
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
-}
-static inline void
-nilfs_set_default_options(struct super_block *sb,
- struct nilfs_super_block *sbp)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
-
- nilfs->ns_mount_opt =
- NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
+ return 0;
}
static int nilfs_setup_super(struct super_block *sb, int is_mount)
@@ -825,19 +825,17 @@ static int nilfs_setup_super(struct super_block *sb, int is_mount)
mnt_count = le16_to_cpu(sbp[0]->s_mnt_count);
if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
- printk(KERN_WARNING
- "NILFS warning: mounting fs with errors\n");
+ nilfs_warn(sb, "mounting fs with errors");
#if 0
} else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) {
- printk(KERN_WARNING
- "NILFS warning: maximal mount count reached\n");
+ nilfs_warn(sb, "maximal mount count reached");
#endif
}
if (!max_mnt_count)
sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
- sbp[0]->s_mtime = cpu_to_le64(get_seconds());
+ sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds());
skip_mount_setup:
sbp[0]->s_state =
@@ -862,9 +860,8 @@ struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset);
}
-int nilfs_store_magic_and_option(struct super_block *sb,
- struct nilfs_super_block *sbp,
- char *data)
+int nilfs_store_magic(struct super_block *sb,
+ struct nilfs_super_block *sbp)
{
struct the_nilfs *nilfs = sb->s_fs_info;
@@ -872,17 +869,15 @@ int nilfs_store_magic_and_option(struct super_block *sb,
/* FS independent flags */
#ifdef NILFS_ATIME_DISABLE
- sb->s_flags |= MS_NOATIME;
+ sb->s_flags |= SB_NOATIME;
#endif
- nilfs_set_default_options(sb, sbp);
-
nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
- return !parse_options(data, sb, 0) ? -EINVAL : 0 ;
+ return 0;
}
int nilfs_check_feature_compatibility(struct super_block *sb,
@@ -893,17 +888,17 @@ int nilfs_check_feature_compatibility(struct super_block *sb,
features = le64_to_cpu(sbp->s_feature_incompat) &
~NILFS_FEATURE_INCOMPAT_SUPP;
if (features) {
- printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
- "optional features (%llx)\n",
- (unsigned long long)features);
+ nilfs_err(sb,
+ "couldn't mount because of unsupported optional features (%llx)",
+ (unsigned long long)features);
return -EINVAL;
}
features = le64_to_cpu(sbp->s_feature_compat_ro) &
~NILFS_FEATURE_COMPAT_RO_SUPP;
- if (!(sb->s_flags & MS_RDONLY) && features) {
- printk(KERN_ERR "NILFS: couldn't mount RDWR because of "
- "unsupported optional features (%llx)\n",
- (unsigned long long)features);
+ if (!sb_rdonly(sb) && features) {
+ nilfs_err(sb,
+ "couldn't mount RDWR because of unsupported optional features (%llx)",
+ (unsigned long long)features);
return -EINVAL;
}
return 0;
@@ -919,13 +914,13 @@ static int nilfs_get_root_dentry(struct super_block *sb,
inode = nilfs_iget(sb, root, NILFS_ROOT_INO);
if (IS_ERR(inode)) {
- printk(KERN_ERR "NILFS: get root inode failed\n");
ret = PTR_ERR(inode);
+ nilfs_err(sb, "error %d getting root inode", ret);
goto out;
}
if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) {
iput(inode);
- printk(KERN_ERR "NILFS: corrupt root inode.\n");
+ nilfs_err(sb, "corrupt root inode");
ret = -EINVAL;
goto out;
}
@@ -942,7 +937,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
iput(inode);
}
} else {
- dentry = d_obtain_alias(inode);
+ dentry = d_obtain_root(inode);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto failed_dentry;
@@ -953,7 +948,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
return ret;
failed_dentry:
- printk(KERN_ERR "NILFS: get root dentry failed\n");
+ nilfs_err(sb, "error %d getting root dentry", ret);
goto out;
}
@@ -973,18 +968,18 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
ret = (ret == -ENOENT) ? -EINVAL : ret;
goto out;
} else if (!ret) {
- printk(KERN_ERR "NILFS: The specified checkpoint is "
- "not a snapshot (checkpoint number=%llu).\n",
- (unsigned long long)cno);
+ nilfs_err(s,
+ "The specified checkpoint is not a snapshot (checkpoint number=%llu)",
+ (unsigned long long)cno);
ret = -EINVAL;
goto out;
}
ret = nilfs_attach_checkpoint(s, cno, false, &root);
if (ret) {
- printk(KERN_ERR "NILFS: error loading snapshot "
- "(checkpoint number=%llu).\n",
- (unsigned long long)cno);
+ nilfs_err(s,
+ "error %d while loading snapshot (checkpoint number=%llu)",
+ ret, (unsigned long long)cno);
goto out;
}
ret = nilfs_get_root_dentry(s, root, root_dentry);
@@ -994,23 +989,16 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
return ret;
}
-static int nilfs_tree_was_touched(struct dentry *root_dentry)
-{
- return d_count(root_dentry) > 1;
-}
-
/**
- * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint
+ * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint
* @root_dentry: root dentry of the tree to be shrunk
*
- * This function returns true if the tree was in-use.
+ * Return: true if the tree was in-use, false otherwise.
*/
-static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
+static bool nilfs_tree_is_busy(struct dentry *root_dentry)
{
- if (have_submounts(root_dentry))
- return true;
shrink_dcache_parent(root_dentry);
- return nilfs_tree_was_touched(root_dentry);
+ return d_count(root_dentry) > 1;
}
int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
@@ -1021,7 +1009,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
struct dentry *dentry;
int ret;
- if (cno < 0 || cno > nilfs->ns_cno)
+ if (cno > nilfs->ns_cno)
return false;
if (cno >= nilfs_last_cno(nilfs))
@@ -1034,8 +1022,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
if (inode) {
dentry = d_find_alias(inode);
if (dentry) {
- if (nilfs_tree_was_touched(dentry))
- ret = nilfs_try_to_shrink_tree(dentry);
+ ret = nilfs_tree_is_busy(dentry);
dput(dentry);
}
iput(inode);
@@ -1048,53 +1035,61 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
/**
* nilfs_fill_super() - initialize a super block instance
* @sb: super_block
- * @data: mount options
- * @silent: silent mode flag
+ * @fc: filesystem context
*
* This function is called exclusively by nilfs->ns_mount_mutex.
* So, the recovery process is protected from other simultaneous mounts.
+ *
+ * Return: 0 on success, or a negative error code on failure.
*/
static int
-nilfs_fill_super(struct super_block *sb, void *data, int silent)
+nilfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct the_nilfs *nilfs;
struct nilfs_root *fsroot;
- struct backing_dev_info *bdi;
+ struct nilfs_fs_context *ctx = fc->fs_private;
__u64 cno;
int err;
- nilfs = alloc_nilfs(sb->s_bdev);
+ nilfs = alloc_nilfs(sb);
if (!nilfs)
return -ENOMEM;
sb->s_fs_info = nilfs;
- err = init_nilfs(nilfs, sb, (char *)data);
+ err = init_nilfs(nilfs, sb);
if (err)
goto failed_nilfs;
+ /* Copy in parsed mount options */
+ nilfs->ns_mount_opt = ctx->ns_mount_opt;
+
sb->s_op = &nilfs_sops;
sb->s_export_op = &nilfs_export_ops;
sb->s_root = NULL;
sb->s_time_gran = 1;
sb->s_max_links = NILFS_LINK_MAX;
- bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
- sb->s_bdi = bdi ? : &default_backing_dev_info;
+ sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi);
err = load_nilfs(nilfs, sb);
if (err)
goto failed_nilfs;
+ super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid,
+ sizeof(nilfs->ns_sbp[0]->s_uuid));
+ super_set_sysfs_name_bdev(sb);
+
cno = nilfs_last_cno(nilfs);
err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
if (err) {
- printk(KERN_ERR "NILFS: error loading last checkpoint "
- "(checkpoint number=%llu).\n", (unsigned long long)cno);
+ nilfs_err(sb,
+ "error %d while loading last checkpoint (checkpoint number=%llu)",
+ err, (unsigned long long)cno);
goto failed_unload;
}
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb)) {
err = nilfs_attach_log_writer(sb, fsroot);
if (err)
goto failed_checkpoint;
@@ -1106,7 +1101,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
nilfs_put_root(fsroot);
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb)) {
down_write(&nilfs->ns_sem);
nilfs_setup_super(sb, true);
up_write(&nilfs->ns_sem);
@@ -1121,6 +1116,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
nilfs_put_root(fsroot);
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
@@ -1130,37 +1126,26 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
return err;
}
-static int nilfs_remount(struct super_block *sb, int *flags, char *data)
+static int nilfs_reconfigure(struct fs_context *fc)
{
+ struct nilfs_fs_context *ctx = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
struct the_nilfs *nilfs = sb->s_fs_info;
- unsigned long old_sb_flags;
- unsigned long old_mount_opt;
int err;
- old_sb_flags = sb->s_flags;
- old_mount_opt = nilfs->ns_mount_opt;
-
- if (!parse_options(data, sb, 1)) {
- err = -EINVAL;
- goto restore_opts;
- }
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL);
+ sync_filesystem(sb);
err = -EINVAL;
if (!nilfs_valid_fs(nilfs)) {
- printk(KERN_WARNING "NILFS (device %s): couldn't "
- "remount because the filesystem is in an "
- "incomplete recovery state.\n", sb->s_id);
- goto restore_opts;
+ nilfs_warn(sb,
+ "couldn't remount because the filesystem is in an incomplete recovery state");
+ goto ignore_opts;
}
-
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
- if (*flags & MS_RDONLY) {
- /* Shutting down log writer */
- nilfs_detach_log_writer(sb);
- sb->s_flags |= MS_RDONLY;
+ if (fc->sb_flags & SB_RDONLY) {
+ sb->s_flags |= SB_RDONLY;
/*
* Remounting a valid RW partition RDONLY, so set
@@ -1183,206 +1168,148 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
~NILFS_FEATURE_COMPAT_RO_SUPP;
up_read(&nilfs->ns_sem);
if (features) {
- printk(KERN_WARNING "NILFS (device %s): couldn't "
- "remount RDWR because of unsupported optional "
- "features (%llx)\n",
- sb->s_id, (unsigned long long)features);
+ nilfs_warn(sb,
+ "couldn't remount RDWR because of unsupported optional features (%llx)",
+ (unsigned long long)features);
err = -EROFS;
- goto restore_opts;
+ goto ignore_opts;
}
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
- root = NILFS_I(sb->s_root->d_inode)->i_root;
+ root = NILFS_I(d_inode(sb->s_root))->i_root;
err = nilfs_attach_log_writer(sb, root);
- if (err)
- goto restore_opts;
+ if (err) {
+ sb->s_flags |= SB_RDONLY;
+ goto ignore_opts;
+ }
down_write(&nilfs->ns_sem);
nilfs_setup_super(sb, true);
up_write(&nilfs->ns_sem);
}
out:
- return 0;
-
- restore_opts:
- sb->s_flags = old_sb_flags;
- nilfs->ns_mount_opt = old_mount_opt;
- return err;
-}
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL);
+ /* Copy over parsed remount options */
+ nilfs->ns_mount_opt = ctx->ns_mount_opt;
-struct nilfs_super_data {
- struct block_device *bdev;
- __u64 cno;
- int flags;
-};
-
-/**
- * nilfs_identify - pre-read mount options needed to identify mount instance
- * @data: mount options
- * @sd: nilfs_super_data
- */
-static int nilfs_identify(char *data, struct nilfs_super_data *sd)
-{
- char *p, *options = data;
- substring_t args[MAX_OPT_ARGS];
- int token;
- int ret = 0;
-
- do {
- p = strsep(&options, ",");
- if (p != NULL && *p) {
- token = match_token(p, tokens, args);
- if (token == Opt_snapshot) {
- if (!(sd->flags & MS_RDONLY)) {
- ret++;
- } else {
- sd->cno = simple_strtoull(args[0].from,
- NULL, 0);
- /*
- * No need to see the end pointer;
- * match_token() has done syntax
- * checking.
- */
- if (sd->cno == 0)
- ret++;
- }
- }
- if (ret)
- printk(KERN_ERR
- "NILFS: invalid mount option: %s\n", p);
- }
- if (!options)
- break;
- BUG_ON(options == data);
- *(options - 1) = ',';
- } while (!ret);
- return ret;
-}
-
-static int nilfs_set_bdev_super(struct super_block *s, void *data)
-{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
return 0;
-}
-static int nilfs_test_bdev_super(struct super_block *s, void *data)
-{
- return (void *)s->s_bdev == data;
+ ignore_opts:
+ return err;
}
-static struct dentry *
-nilfs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
+static int
+nilfs_get_tree(struct fs_context *fc)
{
- struct nilfs_super_data sd;
+ struct nilfs_fs_context *ctx = fc->fs_private;
struct super_block *s;
- fmode_t mode = FMODE_READ | FMODE_EXCL;
- struct dentry *root_dentry;
- int err, s_new = false;
-
- if (!(flags & MS_RDONLY))
- mode |= FMODE_WRITE;
-
- sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
- if (IS_ERR(sd.bdev))
- return ERR_CAST(sd.bdev);
-
- sd.cno = 0;
- sd.flags = flags;
- if (nilfs_identify((char *)data, &sd)) {
- err = -EINVAL;
- goto failed;
- }
+ dev_t dev;
+ int err;
- /*
- * once the super is inserted into the list by sget, s_umount
- * will protect the lockfs code from trying to start a snapshot
- * while we are mounting
- */
- mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
- if (sd.bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
- err = -EBUSY;
- goto failed;
- }
- s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
- sd.bdev);
- mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s)) {
- err = PTR_ERR(s);
- goto failed;
+ if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) {
+ nilfs_err(NULL,
+ "invalid option \"cp=%llu\": read-only option is not specified",
+ ctx->cno);
+ return -EINVAL;
}
- if (!s->s_root) {
- char b[BDEVNAME_SIZE];
-
- s_new = true;
+ err = lookup_bdev(fc->source, &dev);
+ if (err)
+ return err;
- /* New superblock instance created */
- s->s_mode = mode;
- strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
- sb_set_blocksize(s, block_size(sd.bdev));
+ s = sget_dev(fc, dev);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
- err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+ if (!s->s_root) {
+ err = setup_bdev_super(s, fc->sb_flags, fc);
+ if (!err)
+ err = nilfs_fill_super(s, fc);
if (err)
goto failed_super;
- s->s_flags |= MS_ACTIVE;
- } else if (!sd.cno) {
- int busy = false;
-
- if (nilfs_tree_was_touched(s->s_root)) {
- busy = nilfs_try_to_shrink_tree(s->s_root);
- if (busy && (flags ^ s->s_flags) & MS_RDONLY) {
- printk(KERN_ERR "NILFS: the device already "
- "has a %s mount.\n",
- (s->s_flags & MS_RDONLY) ?
- "read-only" : "read/write");
+ s->s_flags |= SB_ACTIVE;
+ } else if (!ctx->cno) {
+ if (nilfs_tree_is_busy(s->s_root)) {
+ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
+ nilfs_err(s,
+ "the device already has a %s mount.",
+ sb_rdonly(s) ? "read-only" : "read/write");
err = -EBUSY;
goto failed_super;
}
- }
- if (!busy) {
+ } else {
/*
- * Try remount to setup mount states if the current
+ * Try reconfigure to setup mount states if the current
* tree is not mounted and only snapshots use this sb.
+ *
+ * Since nilfs_reconfigure() requires fc->root to be
+ * set, set it first and release it on failure.
*/
- err = nilfs_remount(s, &flags, data);
- if (err)
+ fc->root = dget(s->s_root);
+ err = nilfs_reconfigure(fc);
+ if (err) {
+ dput(fc->root);
+ fc->root = NULL; /* prevent double release */
goto failed_super;
+ }
+ return 0;
}
}
- if (sd.cno) {
- err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
+ if (ctx->cno) {
+ struct dentry *root_dentry;
+
+ err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry);
if (err)
goto failed_super;
- } else {
- root_dentry = dget(s->s_root);
+ fc->root = root_dentry;
+ return 0;
}
- if (!s_new)
- blkdev_put(sd.bdev, mode);
-
- return root_dentry;
+ fc->root = dget(s->s_root);
+ return 0;
failed_super:
deactivate_locked_super(s);
+ return err;
+}
- failed:
- if (!s_new)
- blkdev_put(sd.bdev, mode);
- return ERR_PTR(err);
+static void nilfs_free_fc(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations nilfs_context_ops = {
+ .parse_param = nilfs_parse_param,
+ .get_tree = nilfs_get_tree,
+ .reconfigure = nilfs_reconfigure,
+ .free = nilfs_free_fc,
+};
+
+static int nilfs_init_fs_context(struct fs_context *fc)
+{
+ struct nilfs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
+ fc->fs_private = ctx;
+ fc->ops = &nilfs_context_ops;
+
+ return 0;
}
struct file_system_type nilfs_fs_type = {
.owner = THIS_MODULE,
.name = "nilfs2",
- .mount = nilfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = nilfs_init_fs_context,
+ .parameters = nilfs_param_spec,
};
MODULE_ALIAS_FS("nilfs2");
@@ -1394,8 +1321,6 @@ static void nilfs_inode_init_once(void *obj)
#ifdef CONFIG_NILFS_XATTR
init_rwsem(&ii->xattr_sem);
#endif
- address_space_init_once(&ii->i_btnode_cache);
- ii->i_bmap = &ii->i_bmap_data;
inode_init_once(&ii->vfs_inode);
}
@@ -1412,21 +1337,18 @@ static void nilfs_destroy_cachep(void)
*/
rcu_barrier();
- if (nilfs_inode_cachep)
- kmem_cache_destroy(nilfs_inode_cachep);
- if (nilfs_transaction_cachep)
- kmem_cache_destroy(nilfs_transaction_cachep);
- if (nilfs_segbuf_cachep)
- kmem_cache_destroy(nilfs_segbuf_cachep);
- if (nilfs_btree_path_cache)
- kmem_cache_destroy(nilfs_btree_path_cache);
+ kmem_cache_destroy(nilfs_inode_cachep);
+ kmem_cache_destroy(nilfs_transaction_cachep);
+ kmem_cache_destroy(nilfs_segbuf_cachep);
+ kmem_cache_destroy(nilfs_btree_path_cache);
}
static int __init nilfs_init_cachep(void)
{
nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
sizeof(struct nilfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once);
+ SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
+ nilfs_inode_init_once);
if (!nilfs_inode_cachep)
goto fail;
@@ -1463,13 +1385,19 @@ static int __init init_nilfs_fs(void)
if (err)
goto fail;
- err = register_filesystem(&nilfs_fs_type);
+ err = nilfs_sysfs_init();
if (err)
goto free_cachep;
+ err = register_filesystem(&nilfs_fs_type);
+ if (err)
+ goto deinit_sysfs_entry;
+
printk(KERN_INFO "NILFS version 2 loaded\n");
return 0;
+deinit_sysfs_entry:
+ nilfs_sysfs_exit();
free_cachep:
nilfs_destroy_cachep();
fail:
@@ -1479,6 +1407,7 @@ fail:
static void __exit exit_nilfs_fs(void)
{
nilfs_destroy_cachep();
+ nilfs_sysfs_exit();
unregister_filesystem(&nilfs_fs_type);
}
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644
index 000000000000..bc52afbfc5c7
--- /dev/null
+++ b/fs/nilfs2/sysfs.c
@@ -0,0 +1,1140 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Sysfs support implementation.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#include <linux/kobject.h>
+
+#include "nilfs.h"
+#include "mdt.h"
+#include "sufile.h"
+#include "cpfile.h"
+#include "sysfs.h"
+
+/* /sys/fs/<nilfs>/ */
+static struct kset *nilfs_kset;
+
+#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
+static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
+ struct attribute *attr, char *buf) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->show ? a->show(a, nilfs, buf) : 0; \
+} \
+static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
+ struct attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->store ? a->store(a, nilfs, buf, len) : 0; \
+} \
+static const struct sysfs_ops nilfs_##name##_attr_ops = { \
+ .show = nilfs_##name##_attr_show, \
+ .store = nilfs_##name##_attr_store, \
+}
+
+#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
+static void nilfs_##name##_attr_release(struct kobject *kobj) \
+{ \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \
+ struct nilfs_sysfs_##parent_name##_subgroups, \
+ sg_##name##_kobj); \
+ complete(&subgroups->sg_##name##_kobj_unregister); \
+} \
+static const struct kobj_type nilfs_##name##_ktype = { \
+ .default_groups = nilfs_##name##_groups, \
+ .sysfs_ops = &nilfs_##name##_attr_ops, \
+ .release = nilfs_##name##_attr_release, \
+}
+
+#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
+static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ struct kobject *parent; \
+ struct kobject *kobj; \
+ struct completion *kobj_unregister; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ int err; \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ kobj = &subgroups->sg_##name##_kobj; \
+ kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
+ parent = &nilfs->ns_##parent_name##_kobj; \
+ kobj->kset = nilfs_kset; \
+ init_completion(kobj_unregister); \
+ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
+ #name); \
+ if (err) \
+ kobject_put(kobj); \
+ return err; \
+} \
+static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+}
+
+/************************************************************************
+ * NILFS snapshot attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return sysfs_emit(buf, "%llu\n",
+ (unsigned long long)atomic64_read(&root->inodes_count));
+}
+
+static ssize_t
+nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return sysfs_emit(buf, "%llu\n",
+ (unsigned long long)atomic64_read(&root->blocks_count));
+}
+
+static const char snapshot_readme_str[] =
+ "The group contains details about mounted snapshot.\n\n"
+ "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
+ "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
+
+static ssize_t
+nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return sysfs_emit(buf, snapshot_readme_str);
+}
+
+NILFS_SNAPSHOT_RO_ATTR(inodes_count);
+NILFS_SNAPSHOT_RO_ATTR(blocks_count);
+NILFS_SNAPSHOT_RO_ATTR(README);
+
+static struct attribute *nilfs_snapshot_attrs[] = {
+ NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
+ NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
+ NILFS_SNAPSHOT_ATTR_LIST(README),
+ NULL,
+};
+ATTRIBUTE_GROUPS(nilfs_snapshot);
+
+static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->show ? a->show(a, root, buf) : 0;
+}
+
+static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->store ? a->store(a, root, buf, len) : 0;
+}
+
+static void nilfs_snapshot_attr_release(struct kobject *kobj)
+{
+ struct nilfs_root *root = container_of(kobj, struct nilfs_root,
+ snapshot_kobj);
+ complete(&root->snapshot_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_snapshot_attr_ops = {
+ .show = nilfs_snapshot_attr_show,
+ .store = nilfs_snapshot_attr_store,
+};
+
+static const struct kobj_type nilfs_snapshot_ktype = {
+ .default_groups = nilfs_snapshot_groups,
+ .sysfs_ops = &nilfs_snapshot_attr_ops,
+ .release = nilfs_snapshot_attr_release,
+};
+
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
+{
+ struct the_nilfs *nilfs;
+ struct kobject *parent;
+ int err;
+
+ nilfs = root->nilfs;
+ parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
+ root->snapshot_kobj.kset = nilfs_kset;
+ init_completion(&root->snapshot_kobj_unregister);
+
+ if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ &nilfs->ns_dev_kobj,
+ "current_checkpoint");
+ } else {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ parent,
+ "%llu", root->cno);
+ }
+
+ if (err)
+ kobject_put(&root->snapshot_kobj);
+
+ return err;
+}
+
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
+{
+ kobject_put(&root->snapshot_kobj);
+}
+
+/************************************************************************
+ * NILFS mounted snapshots attrs *
+ ************************************************************************/
+
+static const char mounted_snapshots_readme_str[] =
+ "The mounted_snapshots group contains group for\n"
+ "every mounted snapshot.\n";
+
+static ssize_t
+nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return sysfs_emit(buf, mounted_snapshots_readme_str);
+}
+
+NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
+
+static struct attribute *nilfs_mounted_snapshots_attrs[] = {
+ NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
+ NULL,
+};
+ATTRIBUTE_GROUPS(nilfs_mounted_snapshots);
+
+NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
+
+/************************************************************************
+ * NILFS checkpoints attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 ncheckpoints;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d",
+ err);
+ return err;
+ }
+
+ ncheckpoints = cpstat.cs_ncps;
+
+ return sysfs_emit(buf, "%llu\n", ncheckpoints);
+}
+
+static ssize_t
+nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nsnapshots;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d",
+ err);
+ return err;
+ }
+
+ nsnapshots = cpstat.cs_nsss;
+
+ return sysfs_emit(buf, "%llu\n", nsnapshots);
+}
+
+static ssize_t
+nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return sysfs_emit(buf, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_segctor_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%llu\n", cno);
+}
+
+static const char checkpoints_readme_str[] =
+ "The checkpoints group contains attributes that describe\n"
+ "details about volume's checkpoints.\n\n"
+ "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
+ "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
+
+static ssize_t
+nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return sysfs_emit(buf, checkpoints_readme_str);
+}
+
+NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
+NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
+NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(README);
+
+static struct attribute *nilfs_checkpoints_attrs[] = {
+ NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(README),
+ NULL,
+};
+ATTRIBUTE_GROUPS(nilfs_checkpoints);
+
+NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
+NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
+NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
+
+/************************************************************************
+ * NILFS segments attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return sysfs_emit(buf, "%lu\n", nilfs->ns_nsegments);
+}
+
+static ssize_t
+nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return sysfs_emit(buf, "%lu\n", nilfs->ns_blocks_per_segment);
+}
+
+static ssize_t
+nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long ncleansegs;
+
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+ return sysfs_emit(buf, "%lu\n", ncleansegs);
+}
+
+static ssize_t
+nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_sustat sustat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ nilfs_err(nilfs->ns_sb, "unable to get segment stat: err=%d",
+ err);
+ return err;
+ }
+
+ return sysfs_emit(buf, "%llu\n", sustat.ss_ndirtysegs);
+}
+
+static const char segments_readme_str[] =
+ "The segments group contains attributes that describe\n"
+ "details about volume's segments.\n\n"
+ "(1) segments_number\n\tshow number of segments on volume.\n\n"
+ "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
+ "(3) clean_segments\n\tshow count of clean segments.\n\n"
+ "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
+
+static ssize_t
+nilfs_segments_README_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return sysfs_emit(buf, segments_readme_str);
+}
+
+NILFS_SEGMENTS_RO_ATTR(segments_number);
+NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
+NILFS_SEGMENTS_RO_ATTR(clean_segments);
+NILFS_SEGMENTS_RO_ATTR(dirty_segments);
+NILFS_SEGMENTS_RO_ATTR(README);
+
+static struct attribute *nilfs_segments_attrs[] = {
+ NILFS_SEGMENTS_ATTR_LIST(segments_number),
+ NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
+ NILFS_SEGMENTS_ATTR_LIST(clean_segments),
+ NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
+ NILFS_SEGMENTS_ATTR_LIST(README),
+ NULL,
+};
+ATTRIBUTE_GROUPS(nilfs_segments);
+
+NILFS_DEV_INT_GROUP_OPS(segments, dev);
+NILFS_DEV_INT_GROUP_TYPE(segments, dev);
+NILFS_DEV_INT_GROUP_FNS(segments, dev);
+
+/************************************************************************
+ * NILFS segctor attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t last_pseg;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_pseg = nilfs->ns_last_pseg;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return sysfs_emit(buf, "%llu\n",
+ (unsigned long long)last_pseg);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 last_seq;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_seq = nilfs->ns_last_seq;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return sysfs_emit(buf, "%llu\n", last_seq);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return sysfs_emit(buf, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 seg_seq;
+
+ down_read(&nilfs->ns_segctor_sem);
+ seg_seq = nilfs->ns_seg_seq;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%llu\n", seg_seq);
+}
+
+static ssize_t
+nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 segnum;
+
+ down_read(&nilfs->ns_segctor_sem);
+ segnum = nilfs->ns_segnum;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%llu\n", segnum);
+}
+
+static ssize_t
+nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nextnum;
+
+ down_read(&nilfs->ns_segctor_sem);
+ nextnum = nilfs->ns_nextnum;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%llu\n", nextnum);
+}
+
+static ssize_t
+nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long pseg_offset;
+
+ down_read(&nilfs->ns_segctor_sem);
+ pseg_offset = nilfs->ns_pseg_offset;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%lu\n", pseg_offset);
+}
+
+static ssize_t
+nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_segctor_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%llu\n", cno);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time64_t ctime;
+
+ down_read(&nilfs->ns_segctor_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%ptTs\n", &ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time64_t ctime;
+
+ down_read(&nilfs->ns_segctor_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%llu\n", ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time64_t nongc_ctime;
+
+ down_read(&nilfs->ns_segctor_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%ptTs\n", &nongc_ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time64_t nongc_ctime;
+
+ down_read(&nilfs->ns_segctor_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%llu\n", nongc_ctime);
+}
+
+static ssize_t
+nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u32 ndirtyblks;
+
+ down_read(&nilfs->ns_segctor_sem);
+ ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
+ up_read(&nilfs->ns_segctor_sem);
+
+ return sysfs_emit(buf, "%u\n", ndirtyblks);
+}
+
+static const char segctor_readme_str[] =
+ "The segctor group contains attributes that describe\n"
+ "segctor thread activity details.\n\n"
+ "(1) last_pseg_block\n"
+ "\tshow start block number of the latest segment.\n\n"
+ "(2) last_seg_sequence\n"
+ "\tshow sequence value of the latest segment.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
+ "(5) current_last_full_seg\n"
+ "\tshow index number of the latest full segment.\n\n"
+ "(6) next_full_seg\n"
+ "\tshow index number of the full segment index to be used next.\n\n"
+ "(7) next_pseg_offset\n"
+ "\tshow offset of next partial segment in the current full segment.\n\n"
+ "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
+ "(9) last_seg_write_time\n"
+ "\tshow write time of the last segment in human-readable format.\n\n"
+ "(10) last_seg_write_time_secs\n"
+ "\tshow write time of the last segment in seconds.\n\n"
+ "(11) last_nongc_write_time\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in human-readable format.\n\n"
+ "(12) last_nongc_write_time_secs\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in seconds.\n\n"
+ "(13) dirty_data_blocks_count\n"
+ "\tshow number of dirty data blocks.\n\n";
+
+static ssize_t
+nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return sysfs_emit(buf, segctor_readme_str);
+}
+
+NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
+NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
+NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
+NILFS_SEGCTOR_RO_ATTR(README);
+
+static struct attribute *nilfs_segctor_attrs[] = {
+ NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
+ NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
+ NILFS_SEGCTOR_ATTR_LIST(README),
+ NULL,
+};
+ATTRIBUTE_GROUPS(nilfs_segctor);
+
+NILFS_DEV_INT_GROUP_OPS(segctor, dev);
+NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
+NILFS_DEV_INT_GROUP_FNS(segctor, dev);
+
+/************************************************************************
+ * NILFS superblock attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time64_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return sysfs_emit(buf, "%ptTs\n", &sbwtime);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time64_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return sysfs_emit(buf, "%llu\n", sbwtime);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned int sbwcount;
+
+ down_read(&nilfs->ns_sem);
+ sbwcount = nilfs->ns_sbwcount;
+ up_read(&nilfs->ns_sem);
+
+ return sysfs_emit(buf, "%u\n", sbwcount);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned int sb_update_freq;
+
+ down_read(&nilfs->ns_sem);
+ sb_update_freq = nilfs->ns_sb_update_freq;
+ up_read(&nilfs->ns_sem);
+
+ return sysfs_emit(buf, "%u\n", sb_update_freq);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+ int err;
+
+ err = kstrtouint(skip_spaces(buf), 0, &val);
+ if (err) {
+ nilfs_err(nilfs->ns_sb, "unable to convert string: err=%d",
+ err);
+ return err;
+ }
+
+ if (val < NILFS_SB_FREQ) {
+ val = NILFS_SB_FREQ;
+ nilfs_warn(nilfs->ns_sb,
+ "superblock update frequency cannot be lesser than 10 seconds");
+ }
+
+ down_write(&nilfs->ns_sem);
+ nilfs->ns_sb_update_freq = val;
+ up_write(&nilfs->ns_sem);
+
+ return count;
+}
+
+static const char sb_readme_str[] =
+ "The superblock group contains attributes that describe\n"
+ "superblock's details.\n\n"
+ "(1) sb_write_time\n\tshow previous write time of super block "
+ "in human-readable format.\n\n"
+ "(2) sb_write_time_secs\n\tshow previous write time of super block "
+ "in seconds.\n\n"
+ "(3) sb_write_count\n\tshow write count of super block.\n\n"
+ "(4) sb_update_frequency\n"
+ "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
+ "\tYou can set preferable frequency of superblock update by command:\n\n"
+ "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
+
+static ssize_t
+nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return sysfs_emit(buf, sb_readme_str);
+}
+
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
+NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
+NILFS_SUPERBLOCK_RO_ATTR(README);
+
+static struct attribute *nilfs_superblock_attrs[] = {
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
+ NILFS_SUPERBLOCK_ATTR_LIST(README),
+ NULL,
+};
+ATTRIBUTE_GROUPS(nilfs_superblock);
+
+NILFS_DEV_INT_GROUP_OPS(superblock, dev);
+NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
+NILFS_DEV_INT_GROUP_FNS(superblock, dev);
+
+/************************************************************************
+ * NILFS device attrs *
+ ************************************************************************/
+
+static
+ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block *raw_sb;
+ u32 major;
+ u16 minor;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ major = le32_to_cpu(raw_sb->s_rev_level);
+ minor = le16_to_cpu(raw_sb->s_minor_rev_level);
+ up_read(&nilfs->ns_sem);
+
+ return sysfs_emit(buf, "%d.%d\n", major, minor);
+}
+
+static
+ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", nilfs->ns_blocksize);
+}
+
+static
+ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block *raw_sb;
+ u64 dev_size;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ dev_size = le64_to_cpu(raw_sb->s_dev_size);
+ up_read(&nilfs->ns_sem);
+
+ return sysfs_emit(buf, "%llu\n", dev_size);
+}
+
+static
+ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t free_blocks = 0;
+
+ nilfs_count_free_blocks(nilfs, &free_blocks);
+ return sysfs_emit(buf, "%llu\n",
+ (unsigned long long)free_blocks);
+}
+
+static
+ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid);
+ up_read(&nilfs->ns_sem);
+
+ return len;
+}
+
+static
+ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n",
+ raw_sb->s_volume_name);
+ up_read(&nilfs->ns_sem);
+
+ return len;
+}
+
+static const char dev_readme_str[] =
+ "The <device> group contains attributes that describe file system\n"
+ "partition's details.\n\n"
+ "(1) revision\n\tshow NILFS file system revision.\n\n"
+ "(2) blocksize\n\tshow volume block size in bytes.\n\n"
+ "(3) device_size\n\tshow volume size in bytes.\n\n"
+ "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
+ "(5) uuid\n\tshow volume's UUID.\n\n"
+ "(6) volume_name\n\tshow volume's name.\n\n";
+
+static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return sysfs_emit(buf, dev_readme_str);
+}
+
+NILFS_DEV_RO_ATTR(revision);
+NILFS_DEV_RO_ATTR(blocksize);
+NILFS_DEV_RO_ATTR(device_size);
+NILFS_DEV_RO_ATTR(free_blocks);
+NILFS_DEV_RO_ATTR(uuid);
+NILFS_DEV_RO_ATTR(volume_name);
+NILFS_DEV_RO_ATTR(README);
+
+static struct attribute *nilfs_dev_attrs[] = {
+ NILFS_DEV_ATTR_LIST(revision),
+ NILFS_DEV_ATTR_LIST(blocksize),
+ NILFS_DEV_ATTR_LIST(device_size),
+ NILFS_DEV_ATTR_LIST(free_blocks),
+ NILFS_DEV_ATTR_LIST(uuid),
+ NILFS_DEV_ATTR_LIST(volume_name),
+ NILFS_DEV_ATTR_LIST(README),
+ NULL,
+};
+ATTRIBUTE_GROUPS(nilfs_dev);
+
+static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->show ? a->show(a, nilfs, buf) : 0;
+}
+
+static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->store ? a->store(a, nilfs, buf, len) : 0;
+}
+
+static void nilfs_dev_attr_release(struct kobject *kobj)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ complete(&nilfs->ns_dev_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_dev_attr_ops = {
+ .show = nilfs_dev_attr_show,
+ .store = nilfs_dev_attr_store,
+};
+
+static const struct kobj_type nilfs_dev_ktype = {
+ .default_groups = nilfs_dev_groups,
+ .sysfs_ops = &nilfs_dev_attr_ops,
+ .release = nilfs_dev_attr_release,
+};
+
+int nilfs_sysfs_create_device_group(struct super_block *sb)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
+ int err;
+
+ nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
+ if (unlikely(!nilfs->ns_dev_subgroups)) {
+ err = -ENOMEM;
+ nilfs_err(sb, "unable to allocate memory for device group");
+ goto failed_create_device_group;
+ }
+
+ nilfs->ns_dev_kobj.kset = nilfs_kset;
+ init_completion(&nilfs->ns_dev_kobj_unregister);
+ err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto cleanup_dev_kobject;
+
+ err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
+ if (err)
+ goto cleanup_dev_kobject;
+
+ err = nilfs_sysfs_create_checkpoints_group(nilfs);
+ if (err)
+ goto delete_mounted_snapshots_group;
+
+ err = nilfs_sysfs_create_segments_group(nilfs);
+ if (err)
+ goto delete_checkpoints_group;
+
+ err = nilfs_sysfs_create_superblock_group(nilfs);
+ if (err)
+ goto delete_segments_group;
+
+ err = nilfs_sysfs_create_segctor_group(nilfs);
+ if (err)
+ goto delete_superblock_group;
+
+ return 0;
+
+delete_superblock_group:
+ nilfs_sysfs_delete_superblock_group(nilfs);
+
+delete_segments_group:
+ nilfs_sysfs_delete_segments_group(nilfs);
+
+delete_checkpoints_group:
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+
+delete_mounted_snapshots_group:
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+
+cleanup_dev_kobject:
+ kobject_put(&nilfs->ns_dev_kobj);
+ kfree(nilfs->ns_dev_subgroups);
+
+failed_create_device_group:
+ return err;
+}
+
+void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
+{
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+ nilfs_sysfs_delete_segments_group(nilfs);
+ nilfs_sysfs_delete_superblock_group(nilfs);
+ nilfs_sysfs_delete_segctor_group(nilfs);
+ kobject_del(&nilfs->ns_dev_kobj);
+ kobject_put(&nilfs->ns_dev_kobj);
+ kfree(nilfs->ns_dev_subgroups);
+}
+
+/************************************************************************
+ * NILFS feature attrs *
+ ************************************************************************/
+
+static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d.%d\n",
+ NILFS_CURRENT_REV, NILFS_MINOR_REV);
+}
+
+static const char features_readme_str[] =
+ "The features group contains attributes that describe NILFS file\n"
+ "system driver features.\n\n"
+ "(1) revision\n\tshow current revision of NILFS file system driver.\n";
+
+static ssize_t nilfs_feature_README_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, features_readme_str);
+}
+
+NILFS_FEATURE_RO_ATTR(revision);
+NILFS_FEATURE_RO_ATTR(README);
+
+static struct attribute *nilfs_feature_attrs[] = {
+ NILFS_FEATURE_ATTR_LIST(revision),
+ NILFS_FEATURE_ATTR_LIST(README),
+ NULL,
+};
+
+static const struct attribute_group nilfs_feature_attr_group = {
+ .name = "features",
+ .attrs = nilfs_feature_attrs,
+};
+
+int __init nilfs_sysfs_init(void)
+{
+ int err;
+
+ nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
+ if (!nilfs_kset) {
+ err = -ENOMEM;
+ nilfs_err(NULL, "unable to create sysfs entry: err=%d", err);
+ goto failed_sysfs_init;
+ }
+
+ err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ if (unlikely(err)) {
+ nilfs_err(NULL, "unable to create feature group: err=%d", err);
+ goto cleanup_sysfs_init;
+ }
+
+ return 0;
+
+cleanup_sysfs_init:
+ kset_unregister(nilfs_kset);
+
+failed_sysfs_init:
+ return err;
+}
+
+void nilfs_sysfs_exit(void)
+{
+ sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ kset_unregister(nilfs_kset);
+}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644
index 000000000000..d370cd5cce3f
--- /dev/null
+++ b/fs/nilfs2/sysfs.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Sysfs support declarations.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#ifndef _NILFS_SYSFS_H
+#define _NILFS_SYSFS_H
+
+#include <linux/sysfs.h>
+
+#define NILFS_ROOT_GROUP_NAME "nilfs2"
+
+/*
+ * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
+ * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
+ * @sg_superblock_kobj_unregister: completion state
+ * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
+ * @sg_segctor_kobj_unregister: completion state
+ * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
+ * @sg_mounted_snapshots_kobj_unregister: completion state
+ * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
+ * @sg_checkpoints_kobj_unregister: completion state
+ * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
+ * @sg_segments_kobj_unregister: completion state
+ */
+struct nilfs_sysfs_dev_subgroups {
+ /* /sys/fs/<nilfs>/<device>/superblock */
+ struct kobject sg_superblock_kobj;
+ struct completion sg_superblock_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segctor */
+ struct kobject sg_segctor_kobj;
+ struct completion sg_segctor_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
+ struct kobject sg_mounted_snapshots_kobj;
+ struct completion sg_mounted_snapshots_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/checkpoints */
+ struct kobject sg_checkpoints_kobj;
+ struct completion sg_checkpoints_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segments */
+ struct kobject sg_segments_kobj;
+ struct completion sg_segments_kobj_unregister;
+};
+
+#define NILFS_KOBJ_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct kobject *, struct kobj_attribute *, \
+ char *); \
+ ssize_t (*store)(struct kobject *, struct kobj_attribute *, \
+ const char *, size_t); \
+}
+
+NILFS_KOBJ_ATTR_STRUCT(feature);
+
+#define NILFS_DEV_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ const char *, size_t); \
+}
+
+NILFS_DEV_ATTR_STRUCT(dev);
+NILFS_DEV_ATTR_STRUCT(segments);
+NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
+NILFS_DEV_ATTR_STRUCT(checkpoints);
+NILFS_DEV_ATTR_STRUCT(superblock);
+NILFS_DEV_ATTR_STRUCT(segctor);
+
+#define NILFS_CP_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ const char *, size_t); \
+}
+
+NILFS_CP_ATTR_STRUCT(snapshot);
+
+#define NILFS_ATTR(type, name, mode, show, store) \
+ static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+#define NILFS_INFO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, NULL, NULL)
+#define NILFS_RO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
+#define NILFS_RW_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0644, \
+ nilfs_##type##_##name##_show, \
+ nilfs_##type##_##name##_store)
+
+#define NILFS_FEATURE_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(feature, name)
+#define NILFS_FEATURE_RO_ATTR(name) \
+ NILFS_RO_ATTR(feature, name)
+#define NILFS_FEATURE_RW_ATTR(name) \
+ NILFS_RW_ATTR(feature, name)
+
+#define NILFS_DEV_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(dev, name)
+#define NILFS_DEV_RO_ATTR(name) \
+ NILFS_RO_ATTR(dev, name)
+#define NILFS_DEV_RW_ATTR(name) \
+ NILFS_RW_ATTR(dev, name)
+
+#define NILFS_SEGMENTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(segments, name)
+#define NILFS_SEGMENTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(segs_info, name)
+
+#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(mounted_snapshots, name)
+
+#define NILFS_CHECKPOINTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(checkpoints, name)
+#define NILFS_CHECKPOINTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(checkpoints, name)
+
+#define NILFS_SNAPSHOT_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RO_ATTR(name) \
+ NILFS_RO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RW_ATTR(name) \
+ NILFS_RW_ATTR(snapshot, name)
+
+#define NILFS_SUPERBLOCK_RO_ATTR(name) \
+ NILFS_RO_ATTR(superblock, name)
+#define NILFS_SUPERBLOCK_RW_ATTR(name) \
+ NILFS_RW_ATTR(superblock, name)
+
+#define NILFS_SEGCTOR_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RO_ATTR(name) \
+ NILFS_RO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RW_ATTR(name) \
+ NILFS_RW_ATTR(segctor, name)
+
+#define NILFS_FEATURE_ATTR_LIST(name) \
+ (&nilfs_feature_attr_##name.attr)
+#define NILFS_DEV_ATTR_LIST(name) \
+ (&nilfs_dev_attr_##name.attr)
+#define NILFS_SEGMENTS_ATTR_LIST(name) \
+ (&nilfs_segments_attr_##name.attr)
+#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
+ (&nilfs_mounted_snapshots_attr_##name.attr)
+#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
+ (&nilfs_checkpoints_attr_##name.attr)
+#define NILFS_SNAPSHOT_ATTR_LIST(name) \
+ (&nilfs_snapshot_attr_##name.attr)
+#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
+ (&nilfs_superblock_attr_##name.attr)
+#define NILFS_SEGCTOR_ATTR_LIST(name) \
+ (&nilfs_segctor_attr_##name.attr)
+
+#endif /* _NILFS_SYSFS_H */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 94c451ce6d24..d0bcf744c553 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * the_nilfs.c - the_nilfs shared structure.
+ * the_nilfs shared structure.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*
*/
@@ -25,7 +12,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
-#include <linux/random.h>
+#include <linux/log2.h>
#include <linux/crc32.h>
#include "nilfs.h"
#include "segment.h"
@@ -60,12 +47,12 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs,
/**
* alloc_nilfs - allocate a nilfs object
- * @bdev: block device to which the_nilfs is related
+ * @sb: super block instance
*
- * Return Value: On success, pointer to the_nilfs is returned.
- * On error, NULL is returned.
+ * Return: a pointer to the allocated nilfs object on success, or NULL on
+ * failure.
*/
-struct the_nilfs *alloc_nilfs(struct block_device *bdev)
+struct the_nilfs *alloc_nilfs(struct super_block *sb)
{
struct the_nilfs *nilfs;
@@ -73,18 +60,19 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
if (!nilfs)
return NULL;
- nilfs->ns_bdev = bdev;
+ nilfs->ns_sb = sb;
+ nilfs->ns_bdev = sb->s_bdev;
atomic_set(&nilfs->ns_ndirtyblks, 0);
init_rwsem(&nilfs->ns_sem);
mutex_init(&nilfs->ns_snapshot_mount_mutex);
INIT_LIST_HEAD(&nilfs->ns_dirty_files);
INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
spin_lock_init(&nilfs->ns_inode_lock);
- spin_lock_init(&nilfs->ns_next_gen_lock);
spin_lock_init(&nilfs->ns_last_segment_lock);
nilfs->ns_cptree = RB_ROOT;
spin_lock_init(&nilfs->ns_cptree_lock);
init_rwsem(&nilfs->ns_segctor_sem);
+ nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
return nilfs;
}
@@ -110,8 +98,8 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
struct nilfs_super_root *raw_sr;
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct nilfs_inode *rawi;
- unsigned dat_entry_size, segment_usage_size, checkpoint_size;
- unsigned inode_size;
+ unsigned int dat_entry_size, segment_usage_size, checkpoint_size;
+ unsigned int inode_size;
int err;
err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1);
@@ -177,6 +165,9 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri)
* containing a super root from a given super block, and initializes
* relevant information on the nilfs object preparatory for log
* scanning and recovery.
+ *
+ * Return: 0 on success, or %-EINVAL if current segment number is out
+ * of range.
*/
static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
@@ -193,20 +184,57 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg);
nilfs->ns_cno = nilfs->ns_last_cno + 1;
if (nilfs->ns_segnum >= nilfs->ns_nsegments) {
- printk(KERN_ERR "NILFS invalid last segment number.\n");
+ nilfs_err(nilfs->ns_sb,
+ "pointed segment number is out of range: segnum=%llu, nsegments=%lu",
+ (unsigned long long)nilfs->ns_segnum,
+ nilfs->ns_nsegments);
ret = -EINVAL;
}
return ret;
}
/**
+ * nilfs_get_blocksize - get block size from raw superblock data
+ * @sb: super block instance
+ * @sbp: superblock raw data buffer
+ * @blocksize: place to store block size
+ *
+ * nilfs_get_blocksize() calculates the block size from the block size
+ * exponent information written in @sbp and stores it in @blocksize,
+ * or aborts with an error message if it's too large.
+ *
+ * Return: 0 on success, or %-EINVAL if the block size is too large.
+ */
+static int nilfs_get_blocksize(struct super_block *sb,
+ struct nilfs_super_block *sbp, int *blocksize)
+{
+ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
+
+ if (unlikely(shift_bits >
+ ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)) {
+ nilfs_err(sb, "too large filesystem blocksize: 2 ^ %u KiB",
+ shift_bits);
+ return -EINVAL;
+ }
+ *blocksize = BLOCK_SIZE << shift_bits;
+ return 0;
+}
+
+/**
* load_nilfs - load and recover the nilfs
* @nilfs: the_nilfs structure to be released
- * @sb: super block isntance used to recover past segment
+ * @sb: super block instance used to recover past segment
*
* load_nilfs() searches and load the latest super root,
* attaches the last segment, and does recovery if needed.
* The caller must call this exclusively for simultaneous mounts.
+ *
+ * Return: 0 on success, or one of the following negative error codes on
+ * failure:
+ * * %-EINVAL - No valid segment found.
+ * * %-EIO - I/O error.
+ * * %-ENOMEM - Insufficient memory available.
+ * * %-EROFS - Read only device or RO compat mode (if recovery is required)
*/
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
{
@@ -217,12 +245,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
int err;
if (!valid_fs) {
- printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
- if (s_flags & MS_RDONLY) {
- printk(KERN_INFO "NILFS: INFO: recovery "
- "required for readonly filesystem.\n");
- printk(KERN_INFO "NILFS: write access will "
- "be enabled during recovery.\n");
+ nilfs_warn(sb, "mounting unchecked fs");
+ if (s_flags & SB_RDONLY) {
+ nilfs_info(sb,
+ "recovery required for readonly filesystem");
+ nilfs_info(sb,
+ "write access will be enabled during recovery");
}
}
@@ -237,13 +265,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto scan_error;
if (!nilfs_valid_sb(sbp[1])) {
- printk(KERN_WARNING
- "NILFS warning: unable to fall back to spare"
- "super block\n");
+ nilfs_warn(sb,
+ "unable to fall back to spare super block");
goto scan_error;
}
- printk(KERN_INFO
- "NILFS: try rollback from an earlier position\n");
+ nilfs_info(sb, "trying rollback from an earlier position");
/*
* restore super block with its spare and reconfigure
@@ -254,12 +280,15 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
/* verify consistency between two super blocks */
- blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size);
+ err = nilfs_get_blocksize(sb, sbp[0], &blocksize);
+ if (err)
+ goto scan_error;
+
if (blocksize != nilfs->ns_blocksize) {
- printk(KERN_WARNING
- "NILFS warning: blocksize differs between "
- "two super blocks (%d != %d)\n",
- blocksize, nilfs->ns_blocksize);
+ nilfs_warn(sb,
+ "blocksize differs between two super blocks (%d != %d)",
+ blocksize, nilfs->ns_blocksize);
+ err = -EINVAL;
goto scan_error;
}
@@ -278,41 +307,44 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root);
if (unlikely(err)) {
- printk(KERN_ERR "NILFS: error loading super root.\n");
+ nilfs_err(sb, "error %d while loading super root", err);
goto failed;
}
+ err = nilfs_sysfs_create_device_group(sb);
+ if (unlikely(err))
+ goto sysfs_error;
+
if (valid_fs)
goto skip_recovery;
- if (s_flags & MS_RDONLY) {
+ if (s_flags & SB_RDONLY) {
__u64 features;
if (nilfs_test_opt(nilfs, NORECOVERY)) {
- printk(KERN_INFO "NILFS: norecovery option specified. "
- "skipping roll-forward recovery\n");
+ nilfs_info(sb,
+ "norecovery option specified, skipping roll-forward recovery");
goto skip_recovery;
}
features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
~NILFS_FEATURE_COMPAT_RO_SUPP;
if (features) {
- printk(KERN_ERR "NILFS: couldn't proceed with "
- "recovery because of unsupported optional "
- "features (%llx)\n",
- (unsigned long long)features);
+ nilfs_err(sb,
+ "couldn't proceed with recovery because of unsupported optional features (%llx)",
+ (unsigned long long)features);
err = -EROFS;
goto failed_unload;
}
if (really_read_only) {
- printk(KERN_ERR "NILFS: write access "
- "unavailable, cannot proceed.\n");
+ nilfs_err(sb,
+ "write access unavailable, cannot proceed");
err = -EROFS;
goto failed_unload;
}
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
} else if (nilfs_test_opt(nilfs, NORECOVERY)) {
- printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
- "option was specified for a read/write mount\n");
+ nilfs_err(sb,
+ "recovery cancelled because norecovery option was specified for a read/write mount");
err = -EINVAL;
goto failed_unload;
}
@@ -327,11 +359,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
up_write(&nilfs->ns_sem);
if (err) {
- printk(KERN_ERR "NILFS: failed to update super block. "
- "recovery unfinished.\n");
+ nilfs_err(sb,
+ "error %d updating super block. recovery unfinished.",
+ err);
goto failed_unload;
}
- printk(KERN_INFO "NILFS: recovery complete.\n");
+ nilfs_info(sb, "recovery complete");
skip_recovery:
nilfs_clear_recovery_info(&ri);
@@ -339,10 +372,13 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
return 0;
scan_error:
- printk(KERN_ERR "NILFS: error searching super root.\n");
+ nilfs_err(sb, "error %d while searching super root", err);
goto failed;
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
+
+ sysfs_error:
iput(nilfs->ns_cpfile);
iput(nilfs->ns_sufile);
iput(nilfs->ns_dat);
@@ -368,6 +404,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
* nilfs_nrsvsegs - calculate the number of reserved segments
* @nilfs: nilfs object
* @nsegs: total number of segments
+ *
+ * Return: Number of reserved segments.
*/
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
{
@@ -376,6 +414,20 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
100));
}
+/**
+ * nilfs_max_segment_count - calculate the maximum number of segments
+ * @nilfs: nilfs object
+ *
+ * Return: Maximum number of segments
+ */
+static u64 nilfs_max_segment_count(struct the_nilfs *nilfs)
+{
+ u64 max_count = U64_MAX;
+
+ max_count = div64_ul(max_count, nilfs->ns_blocks_per_segment);
+ return min_t(u64, max_count, ULONG_MAX);
+}
+
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
{
nilfs->ns_nsegments = nsegs;
@@ -385,13 +437,14 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
+ u64 nsegments, nblocks;
+
if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) {
- printk(KERN_ERR "NILFS: unsupported revision "
- "(superblock rev.=%d.%d, current rev.=%d.%d). "
- "Please check the version of mkfs.nilfs.\n",
- le32_to_cpu(sbp->s_rev_level),
- le16_to_cpu(sbp->s_minor_rev_level),
- NILFS_CURRENT_REV, NILFS_MINOR_REV);
+ nilfs_err(nilfs->ns_sb,
+ "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).",
+ le32_to_cpu(sbp->s_rev_level),
+ le16_to_cpu(sbp->s_minor_rev_level),
+ NILFS_CURRENT_REV, NILFS_MINOR_REV);
return -EINVAL;
}
nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes);
@@ -399,11 +452,28 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
return -EINVAL;
nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size);
+ if (nilfs->ns_inode_size > nilfs->ns_blocksize) {
+ nilfs_err(nilfs->ns_sb, "too large inode size: %d bytes",
+ nilfs->ns_inode_size);
+ return -EINVAL;
+ } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) {
+ nilfs_err(nilfs->ns_sb, "too small inode size: %d bytes",
+ nilfs->ns_inode_size);
+ return -EINVAL;
+ }
+
nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);
+ if (nilfs->ns_first_ino < NILFS_USER_INO) {
+ nilfs_err(nilfs->ns_sb,
+ "too small lower limit for non-reserved inode numbers: %u",
+ nilfs->ns_first_ino);
+ return -EINVAL;
+ }
nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) {
- printk(KERN_ERR "NILFS: too short segment.\n");
+ nilfs_err(nilfs->ns_sb, "too short segment: %lu blocks",
+ nilfs->ns_blocks_per_segment);
return -EINVAL;
}
@@ -412,11 +482,40 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
le32_to_cpu(sbp->s_r_segments_percentage);
if (nilfs->ns_r_segments_percentage < 1 ||
nilfs->ns_r_segments_percentage > 99) {
- printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n");
+ nilfs_err(nilfs->ns_sb,
+ "invalid reserved segments percentage: %lu",
+ nilfs->ns_r_segments_percentage);
+ return -EINVAL;
+ }
+
+ nsegments = le64_to_cpu(sbp->s_nsegments);
+ if (nsegments > nilfs_max_segment_count(nilfs)) {
+ nilfs_err(nilfs->ns_sb,
+ "segment count %llu exceeds upper limit (%llu segments)",
+ (unsigned long long)nsegments,
+ (unsigned long long)nilfs_max_segment_count(nilfs));
return -EINVAL;
}
- nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
+ nblocks = sb_bdev_nr_blocks(nilfs->ns_sb);
+ if (nblocks) {
+ u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment;
+ /*
+ * To avoid failing to mount early device images without a
+ * second superblock, exclude that block count from the
+ * "min_block_count" calculation.
+ */
+
+ if (nblocks < min_block_count) {
+ nilfs_err(nilfs->ns_sb,
+ "total number of segment blocks %llu exceeds device size (%llu blocks)",
+ (unsigned long long)min_block_count,
+ (unsigned long long)nblocks);
+ return -EINVAL;
+ }
+ }
+
+ nilfs_set_nsegments(nilfs, nsegments);
nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
return 0;
}
@@ -431,7 +530,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
return 0;
bytes = le16_to_cpu(sbp->s_bytes);
- if (bytes > BLOCK_SIZE)
+ if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
return 0;
crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
sumoff);
@@ -441,11 +540,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
return crc == le32_to_cpu(sbp->s_sum);
}
-static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
+/**
+ * nilfs_sb2_bad_offset - check the location of the second superblock
+ * @sbp: superblock raw data buffer
+ * @offset: byte offset of second superblock calculated from device size
+ *
+ * nilfs_sb2_bad_offset() checks if the position on the second
+ * superblock is valid or not based on the filesystem parameters
+ * stored in @sbp. If @offset points to a location within the segment
+ * area, or if the parameters themselves are not normal, it is
+ * determined to be invalid.
+ *
+ * Return: true if invalid, false if valid.
+ */
+static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
{
- return offset < ((le64_to_cpu(sbp->s_nsegments) *
- le32_to_cpu(sbp->s_blocks_per_segment)) <<
- (le32_to_cpu(sbp->s_log_block_size) + 10));
+ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
+ u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
+ u64 nsegments = le64_to_cpu(sbp->s_nsegments);
+ u64 index;
+
+ if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS ||
+ shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)
+ return true;
+
+ index = offset >> (shift_bits + BLOCK_SIZE_BITS);
+ do_div(index, blocks_per_segment);
+ return index < nsegments;
}
static void nilfs_release_super_block(struct the_nilfs *nilfs)
@@ -487,8 +608,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct buffer_head **sbh = nilfs->ns_sbh;
- u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size);
- int valid[2], swp = 0;
+ u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev);
+ int valid[2], swp = 0, older;
+
+ if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) {
+ nilfs_err(sb, "device size too small");
+ return -EINVAL;
+ }
+ sb2off = NILFS_SB2_OFFSET_BYTES(devsize);
sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
&sbh[0]);
@@ -496,16 +623,16 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
if (!sbp[0]) {
if (!sbp[1]) {
- printk(KERN_ERR "NILFS: unable to read superblock\n");
+ nilfs_err(sb, "unable to read superblock");
return -EIO;
}
- printk(KERN_WARNING
- "NILFS warning: unable to read primary superblock "
- "(blocksize = %d)\n", blocksize);
+ nilfs_warn(sb,
+ "unable to read primary superblock (blocksize = %d)",
+ blocksize);
} else if (!sbp[1]) {
- printk(KERN_WARNING
- "NILFS warning: unable to read secondary superblock "
- "(blocksize = %d)\n", blocksize);
+ nilfs_warn(sb,
+ "unable to read secondary superblock (blocksize = %d)",
+ blocksize);
}
/*
@@ -527,20 +654,36 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
}
if (!valid[swp]) {
nilfs_release_super_block(nilfs);
- printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n",
- sb->s_id);
+ nilfs_err(sb, "couldn't find nilfs on the device");
return -EINVAL;
}
if (!valid[!swp])
- printk(KERN_WARNING "NILFS warning: broken superblock. "
- "using spare superblock (blocksize = %d).\n", blocksize);
+ nilfs_warn(sb,
+ "broken superblock, retrying with spare superblock (blocksize = %d)",
+ blocksize);
if (swp)
nilfs_swap_super_block(nilfs);
+ /*
+ * Calculate the array index of the older superblock data.
+ * If one has been dropped, set index 0 pointing to the remaining one,
+ * otherwise set index 1 pointing to the old one (including if both
+ * are the same).
+ *
+ * Divided case valid[0] valid[1] swp -> older
+ * -------------------------------------------------------------
+ * Both SBs are invalid 0 0 N/A (Error)
+ * SB1 is invalid 0 1 1 0
+ * SB2 is invalid 1 0 0 0
+ * SB2 is newer 1 1 1 0
+ * SB2 is older or the same 1 1 0 1
+ */
+ older = valid[1] ^ swp;
+
nilfs->ns_sbwcount = 0;
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
- nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq);
+ nilfs->ns_prot_seq = le64_to_cpu(sbp[older]->s_last_seq);
*sbpp = sbp[0];
return 0;
}
@@ -549,26 +692,22 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
* init_nilfs - initialize a NILFS instance.
* @nilfs: the_nilfs structure
* @sb: super block
- * @data: mount options
*
* init_nilfs() performs common initialization per block device (e.g.
* reading the super block, getting disk layout information, initializing
* shared fields in the_nilfs).
*
- * Return Value: On success, 0 is returned. On error, a negative error
- * code is returned.
+ * Return: 0 on success, or a negative error code on failure.
*/
-int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
+int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
{
struct nilfs_super_block *sbp;
int blocksize;
int err;
- down_write(&nilfs->ns_sem);
-
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
if (!blocksize) {
- printk(KERN_ERR "NILFS: unable to set blocksize\n");
+ nilfs_err(sb, "unable to set blocksize");
err = -EINVAL;
goto out;
}
@@ -576,7 +715,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto out;
- err = nilfs_store_magic_and_option(sb, sbp, data);
+ err = nilfs_store_magic(sb, sbp);
if (err)
goto failed_sbh;
@@ -584,11 +723,14 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
- blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
- if (blocksize < NILFS_MIN_BLOCK_SIZE ||
- blocksize > NILFS_MAX_BLOCK_SIZE) {
- printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
- "filesystem blocksize %d\n", blocksize);
+ err = nilfs_get_blocksize(sb, sbp, &blocksize);
+ if (err)
+ goto failed_sbh;
+
+ if (blocksize < NILFS_MIN_BLOCK_SIZE) {
+ nilfs_err(sb,
+ "couldn't mount because of unsupported filesystem blocksize %d",
+ blocksize);
err = -EINVAL;
goto failed_sbh;
}
@@ -596,28 +738,30 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
if (blocksize < hw_blocksize) {
- printk(KERN_ERR
- "NILFS: blocksize %d too small for device "
- "(sector-size = %d).\n",
- blocksize, hw_blocksize);
+ nilfs_err(sb,
+ "blocksize %d too small for device (sector-size = %d)",
+ blocksize, hw_blocksize);
err = -EINVAL;
goto failed_sbh;
}
nilfs_release_super_block(nilfs);
- sb_set_blocksize(sb, blocksize);
+ if (!sb_set_blocksize(sb, blocksize)) {
+ nilfs_err(sb, "bad blocksize %d", blocksize);
+ err = -EINVAL;
+ goto out;
+ }
err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
if (err)
goto out;
- /* not failed_sbh; sbh is released automatically
- when reloading fails. */
+ /*
+ * Not to failed_sbh; sbh is released automatically
+ * when reloading fails.
+ */
}
nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
nilfs->ns_blocksize = blocksize;
- get_random_bytes(&nilfs->ns_next_generation,
- sizeof(nilfs->ns_next_generation));
-
err = nilfs_store_disk_layout(nilfs, sbp);
if (err)
goto failed_sbh;
@@ -633,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
set_nilfs_init(nilfs);
err = 0;
out:
- up_write(&nilfs->ns_sem);
return err;
failed_sbh:
@@ -664,7 +807,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
- GFP_NOFS, 0);
+ GFP_NOFS);
if (ret < 0)
return ret;
nblocks = 0;
@@ -674,7 +817,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
- GFP_NOFS, 0);
+ GFP_NOFS);
return ret;
}
@@ -682,9 +825,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
{
unsigned long ncleansegs;
- down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
- up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
*nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
return 0;
}
@@ -715,7 +856,7 @@ struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno)
} else if (cno > root->cno) {
n = n->rb_right;
} else {
- atomic_inc(&root->count);
+ refcount_inc(&root->count);
spin_unlock(&nilfs->ns_cptree_lock);
return root;
}
@@ -730,12 +871,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
{
struct rb_node **p, *parent;
struct nilfs_root *root, *new;
+ int err;
root = nilfs_lookup_root(nilfs, cno);
if (root)
return root;
- new = kmalloc(sizeof(*root), GFP_KERNEL);
+ new = kzalloc(sizeof(*root), GFP_KERNEL);
if (!new)
return NULL;
@@ -753,7 +895,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
} else if (cno > root->cno) {
p = &(*p)->rb_right;
} else {
- atomic_inc(&root->count);
+ refcount_inc(&root->count);
spin_unlock(&nilfs->ns_cptree_lock);
kfree(new);
return root;
@@ -763,7 +905,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
new->cno = cno;
new->ifile = NULL;
new->nilfs = nilfs;
- atomic_set(&new->count, 1);
+ refcount_set(&new->count, 1);
atomic64_set(&new->inodes_count, 0);
atomic64_set(&new->blocks_count, 0);
@@ -772,19 +914,25 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
spin_unlock(&nilfs->ns_cptree_lock);
+ err = nilfs_sysfs_create_snapshot_group(new);
+ if (err) {
+ kfree(new);
+ new = NULL;
+ }
+
return new;
}
void nilfs_put_root(struct nilfs_root *root)
{
- if (atomic_dec_and_test(&root->count)) {
- struct the_nilfs *nilfs = root->nilfs;
+ struct the_nilfs *nilfs = root->nilfs;
- spin_lock(&nilfs->ns_cptree_lock);
+ if (refcount_dec_and_lock(&root->count, &nilfs->ns_cptree_lock)) {
rb_erase(&root->rb_node, &nilfs->ns_cptree);
spin_unlock(&nilfs->ns_cptree_lock);
- if (root->ifile)
- iput(root->ifile);
+
+ nilfs_sysfs_delete_snapshot_group(root);
+ iput(root->ifile);
kfree(root);
}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index de8cc53b4a5c..4776a70f01ae 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * the_nilfs.h - the_nilfs shared structure.
+ * the_nilfs shared structure.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
+ * Written by Ryusuke Konishi.
*
*/
@@ -31,8 +18,10 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/slab.h>
+#include <linux/refcount.h>
struct nilfs_sc_info;
+struct nilfs_sysfs_dev_subgroups;
/* the_nilfs struct */
enum {
@@ -40,11 +29,14 @@ enum {
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
+ THE_NILFS_PURGING, /* disposing dirty files for cleanup */
};
/**
* struct the_nilfs - struct to supervise multiple nilfs mount points
* @ns_flags: flags
+ * @ns_flushed_device: flag indicating if all volatile data was flushed
+ * @ns_sb: back pointer to super block instance
* @ns_bdev: block device
* @ns_sem: semaphore for shared states
* @ns_snapshot_mount_mutex: mutex to protect snapshot mounts
@@ -54,6 +46,7 @@ enum {
* @ns_sbwcount: write count of super block
* @ns_sbsize: size of valid data in super block
* @ns_mount_state: file system state
+ * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
@@ -78,8 +71,6 @@ enum {
* @ns_dirty_files: list of dirty files
* @ns_inode_lock: lock protecting @ns_dirty_files
* @ns_gc_inodes: dummy inodes to keep live blocks
- * @ns_next_generation: next generation number for inodes
- * @ns_next_gen_lock: lock protecting @ns_next_generation
* @ns_mount_opt: mount options
* @ns_resuid: uid for reserved blocks
* @ns_resgid: gid for reserved blocks
@@ -95,10 +86,15 @@ enum {
* @ns_inode_size: size of on-disk inode
* @ns_first_ino: first not-special inode number
* @ns_crc_seed: seed value of CRC32 calculation
+ * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
+ * @ns_dev_kobj_unregister: completion state
+ * @ns_dev_subgroups: <device> subgroups pointer
*/
struct the_nilfs {
unsigned long ns_flags;
+ int ns_flushed_device;
+ struct super_block *ns_sb;
struct block_device *ns_bdev;
struct rw_semaphore ns_sem;
struct mutex ns_snapshot_mount_mutex;
@@ -110,25 +106,23 @@ struct the_nilfs {
*/
struct buffer_head *ns_sbh[2];
struct nilfs_super_block *ns_sbp[2];
- time_t ns_sbwtime;
- unsigned ns_sbwcount;
- unsigned ns_sbsize;
- unsigned ns_mount_state;
+ time64_t ns_sbwtime;
+ unsigned int ns_sbwcount;
+ unsigned int ns_sbsize;
+ unsigned int ns_mount_state;
+ unsigned int ns_sb_update_freq;
/*
- * Following fields are dedicated to a writable FS-instance.
- * Except for the period seeking checkpoint, code outside the segment
- * constructor must lock a segment semaphore while accessing these
- * fields.
- * The writable FS-instance is sole during a lifetime of the_nilfs.
+ * The following fields are updated by a writable FS-instance.
+ * These fields are protected by ns_segctor_sem outside load_nilfs().
*/
u64 ns_seg_seq;
__u64 ns_segnum;
__u64 ns_nextnum;
unsigned long ns_pseg_offset;
__u64 ns_cno;
- time_t ns_ctime;
- time_t ns_nongc_ctime;
+ time64_t ns_ctime;
+ time64_t ns_nongc_ctime;
atomic_t ns_ndirtyblks;
/*
@@ -165,10 +159,6 @@ struct the_nilfs {
/* GC inode list */
struct list_head ns_gc_inodes;
- /* Inode allocator */
- u32 ns_next_generation;
- spinlock_t ns_next_gen_lock;
-
/* Mount options */
unsigned long ns_mount_opt;
@@ -186,8 +176,13 @@ struct the_nilfs {
unsigned long ns_nrsvsegs;
unsigned long ns_first_data_block;
int ns_inode_size;
- int ns_first_ino;
+ unsigned int ns_first_ino;
u32 ns_crc_seed;
+
+ /* /sys/fs/<nilfs>/<device> */
+ struct kobject ns_dev_kobj;
+ struct completion ns_dev_kobj_unregister;
+ struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
};
#define THE_NILFS_FNS(bit, name) \
@@ -208,20 +203,16 @@ THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
/*
* Mount option operations
*/
#define nilfs_clear_opt(nilfs, opt) \
- do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
+ ((nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt)
#define nilfs_set_opt(nilfs, opt) \
- do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0)
+ ((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt)
#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
-#define nilfs_write_opt(nilfs, mask, opt) \
- do { (nilfs)->ns_mount_opt = \
- (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \
- NILFS_MOUNT_##opt); \
- } while (0)
/**
* struct nilfs_root - nilfs root object
@@ -232,17 +223,23 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
* @ifile: inode file
* @inodes_count: number of inodes
* @blocks_count: number of blocks
+ * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
+ * @snapshot_kobj_unregister: completion state for kernel object
*/
struct nilfs_root {
__u64 cno;
struct rb_node rb_node;
- atomic_t count;
+ refcount_t count;
struct the_nilfs *nilfs;
struct inode *ifile;
atomic64_t inodes_count;
atomic64_t blocks_count;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
+ struct kobject snapshot_kobj;
+ struct completion snapshot_kobj_unregister;
};
/* Special checkpoint number */
@@ -253,20 +250,23 @@ struct nilfs_root {
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
{
- u64 t = get_seconds();
- return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
+ u64 t = ktime_get_real_seconds();
+
+ return t < nilfs->ns_sbwtime ||
+ t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
}
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
{
int flip_bits = nilfs->ns_sbwcount & 0x0FL;
+
return (flip_bits != 0x08 && flip_bits != 0x0F);
}
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
-struct the_nilfs *alloc_nilfs(struct block_device *bdev);
+struct the_nilfs *alloc_nilfs(struct super_block *sb);
void destroy_nilfs(struct the_nilfs *nilfs);
-int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
+int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
@@ -283,12 +283,12 @@ void nilfs_swap_super_block(struct the_nilfs *);
static inline void nilfs_get_root(struct nilfs_root *root)
{
- atomic_inc(&root->count);
+ refcount_inc(&root->count);
}
static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
{
- unsigned valid_fs;
+ unsigned int valid_fs;
down_read(&nilfs->ns_sem);
valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
@@ -353,4 +353,24 @@ static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n)
return n == nilfs->ns_segnum || n == nilfs->ns_nextnum;
}
+static inline int nilfs_flush_device(struct the_nilfs *nilfs)
+{
+ int err;
+
+ if (!nilfs_test_opt(nilfs, BARRIER) || nilfs->ns_flushed_device)
+ return 0;
+
+ nilfs->ns_flushed_device = 1;
+ /*
+ * the store to ns_flushed_device must not be reordered after
+ * blkdev_issue_flush().
+ */
+ smp_wmb();
+
+ err = blkdev_issue_flush(nilfs->ns_bdev);
+ if (err != -EIO)
+ err = 0;
+ return err;
+}
+
#endif /* _THE_NILFS_H */