summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c5
-rw-r--r--fs/aio.c2
-rw-r--r--fs/btrfs/block-rsv.c3
-rw-r--r--fs/btrfs/dev-replace.c26
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/qgroup.c12
-rw-r--r--fs/btrfs/ref-verify.c1
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/scrub.c5
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/crypto/inline_crypt.c2
-rw-r--r--fs/crypto/keysetup.c4
-rw-r--r--fs/erofs/inode.c21
-rw-r--r--fs/erofs/zdata.c7
-rw-r--r--fs/ext4/ext4.h72
-rw-r--r--fs/ext4/extents.c7
-rw-r--r--fs/ext4/fast_commit.c174
-rw-r--r--fs/ext4/fast_commit.h6
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsmap.c2
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/inline.c1
-rw-r--r--fs/ext4/inode.c19
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/namei.c61
-rw-r--r--fs/ext4/super.c58
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/bmap.c8
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/rgrp.c10
-rw-r--r--fs/io-wq.c4
-rw-r--r--fs/io_uring.c188
-rw-r--r--fs/iomap/buffered-io.c30
-rw-r--r--fs/jbd2/checkpoint.c2
-rw-r--r--fs/jbd2/commit.c11
-rw-r--r--fs/jbd2/journal.c138
-rw-r--r--fs/jbd2/recovery.c6
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/nfs/dir.c15
-rw-r--r--fs/nfs/nfs42xattr.c2
-rw-r--r--fs/nfs/nfs42xdr.c4
-rw-r--r--fs/nfs/nfsroot.c6
-rw-r--r--fs/nfsd/nfs3proc.c6
-rw-r--r--fs/nfsd/nfs3xdr.c1
-rw-r--r--fs/nfsd/nfs4proc.c3
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/super.c1
-rw-r--r--fs/proc/cpuinfo.c2
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/seq_file.c45
-rw-r--r--fs/super.c49
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c16
-rw-r--r--fs/xfs/scrub/bmap.c2
-rw-r--r--fs/xfs/scrub/inode.c3
-rw-r--r--fs/xfs/scrub/refcount.c8
-rw-r--r--fs/xfs/xfs_aops.c20
-rw-r--r--fs/xfs/xfs_iops.c10
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_reflink.c3
64 files changed, 641 insertions, 519 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 50371207f327..c9195fc67fd8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -169,11 +169,14 @@ int afs_write_end(struct file *file, struct address_space *mapping,
unsigned int f, from = pos & (PAGE_SIZE - 1);
unsigned int t, to = from + copied;
loff_t i_size, maybe_i_size;
- int ret;
+ int ret = 0;
_enter("{%llx:%llu},{%lx}",
vnode->fid.vid, vnode->fid.vnode, page->index);
+ if (copied == 0)
+ goto out;
+
maybe_i_size = pos + copied;
i_size = i_size_read(&vnode->vfs_inode);
diff --git a/fs/aio.c b/fs/aio.c
index c45c20d87538..6a21d8919409 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
* we return to userspace.
*/
if (S_ISREG(file_inode(file)->i_mode)) {
- __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
+ sb_start_write(file_inode(file)->i_sb);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
req->ki_flags |= IOCB_WRITE;
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 7e1549a84fcc..bc920afe23bf 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -511,7 +511,8 @@ again:
/*DEFAULT_RATELIMIT_BURST*/ 1);
if (__ratelimit(&_rs))
WARN(1, KERN_DEBUG
- "BTRFS: block rsv returned %d\n", ret);
+ "BTRFS: block rsv %d returned %d\n",
+ block_rsv->type, ret);
}
try_reserve:
ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5b9e3f3ace22..10638537b9ef 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -91,6 +91,17 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
no_valid_dev_replace_entry_found:
+ /*
+ * We don't have a replace item or it's corrupted. If there is
+ * a replace target, fail the mount.
+ */
+ if (btrfs_find_device(fs_info->fs_devices,
+ BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
+ btrfs_err(fs_info,
+ "found replace target device without a valid replace item");
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = 0;
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED;
@@ -143,8 +154,19 @@ no_valid_dev_replace_entry_found:
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
- dev_replace->srcdev = NULL;
- dev_replace->tgtdev = NULL;
+ /*
+ * We don't have an active replace item but if there is a
+ * replace target, fail the mount.
+ */
+ if (btrfs_find_device(fs_info->fs_devices,
+ BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
+ btrfs_err(fs_info,
+ "replace devid present without an active replace item");
+ ret = -EUCLEAN;
+ } else {
+ dev_replace->srcdev = NULL;
+ dev_replace->tgtdev = NULL;
+ }
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ab408a23ba32..69a384145dc6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1274,6 +1274,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
u64 page_start;
u64 page_end;
u64 page_cnt;
+ u64 start = (u64)start_index << PAGE_SHIFT;
int ret;
int i;
int i_done;
@@ -1290,8 +1291,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
- start_index << PAGE_SHIFT,
- page_cnt << PAGE_SHIFT);
+ start, page_cnt << PAGE_SHIFT);
if (ret)
return ret;
i_done = 0;
@@ -1380,8 +1380,7 @@ again:
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
- start_index << PAGE_SHIFT,
- (page_cnt - i_done) << PAGE_SHIFT, true);
+ start, (page_cnt - i_done) << PAGE_SHIFT, true);
}
@@ -1408,8 +1407,7 @@ out:
put_page(pages[i]);
}
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
- start_index << PAGE_SHIFT,
- page_cnt << PAGE_SHIFT, true);
+ start, page_cnt << PAGE_SHIFT, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return ret;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c54ea6586632..77c54749f432 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3435,24 +3435,20 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode,
{
struct rb_node *node;
struct rb_node *next;
- struct ulist_node *entry = NULL;
+ struct ulist_node *entry;
int ret = 0;
node = reserved->range_changed.root.rb_node;
+ if (!node)
+ return 0;
while (node) {
entry = rb_entry(node, struct ulist_node, rb_node);
if (entry->val < start)
node = node->rb_right;
- else if (entry)
- node = node->rb_left;
else
- break;
+ node = node->rb_left;
}
- /* Empty changeset */
- if (!entry)
- return 0;
-
if (entry->val > start && rb_prev(&entry->rb_node))
entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
rb_node);
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 7f03dbe5b609..78693d3dd15b 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -860,6 +860,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
"dropping a ref for a root that doesn't have a ref on the block");
dump_block_entry(fs_info, be);
dump_ref_action(fs_info, ra);
+ kfree(ref);
kfree(ra);
goto out_unlock;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3602806d71bd..9ba92d86da0b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1648,6 +1648,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
struct btrfs_root_item *root_item;
struct btrfs_path *path;
struct extent_buffer *leaf;
+ int reserve_level;
int level;
int max_level;
int replaced = 0;
@@ -1696,7 +1697,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
* Thus the needed metadata size is at most root_level * nodesize,
* and * 2 since we have two trees to COW.
*/
- min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2;
+ reserve_level = max_t(int, 1, btrfs_root_level(root_item));
+ min_reserved = fs_info->nodesize * reserve_level * 2;
memset(&next_key, 0, sizeof(next_key));
while (1) {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index cf63f1e27a27..e71e7586e9eb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3866,8 +3866,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
- rcu_str_deref(dev->name));
+ btrfs_err_in_rcu(fs_info,
+ "scrub on devid %llu: filesystem on %s is not writable",
+ devid, rcu_str_deref(dev->name));
ret = -EROFS;
goto out;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b1e48078c318..a6406b3b8c2b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1056,22 +1056,13 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
continue;
}
- if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
- /*
- * In the first step, keep the device which has
- * the correct fsid and the devid that is used
- * for the dev_replace procedure.
- * In the second step, the dev_replace state is
- * read from the device tree and it is known
- * whether the procedure is really active or
- * not, which means whether this device is
- * used or whether it should be removed.
- */
- if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
- &device->dev_state)) {
- continue;
- }
- }
+ /*
+ * We have already validated the presence of BTRFS_DEV_REPLACE_DEVID,
+ * in btrfs_init_dev_replace() so just continue.
+ */
+ if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+ continue;
+
if (device->bdev) {
blkdev_put(device->bdev, device->mode);
device->bdev = NULL;
@@ -1080,9 +1071,6 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
list_del_init(&device->dev_alloc_list);
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
- if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
- &device->dev_state))
- fs_devices->rw_devices--;
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index 89bffa82ed74..c57bebfa48fe 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -74,7 +74,7 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
int i;
/* The file must need contents encryption, not filenames encryption */
- if (!fscrypt_needs_contents_encryption(inode))
+ if (!S_ISREG(inode->i_mode))
return 0;
/* The crypto mode must have a blk-crypto counterpart */
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index d3c3e5d9b41f..d595abb8ef90 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -269,9 +269,7 @@ unlock:
* New inodes may not have an inode number assigned yet.
* Hashing their inode number is delayed until later.
*/
- if (ci->ci_inode->i_ino == 0)
- WARN_ON(!(ci->ci_inode->i_state & I_CREATING));
- else
+ if (ci->ci_inode->i_ino)
fscrypt_hash_inode_number(ci, mk);
return 0;
}
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 139d0bed42f8..3e21c0e8adae 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -107,11 +107,9 @@ static struct page *erofs_read_inode(struct inode *inode,
i_gid_write(inode, le32_to_cpu(die->i_gid));
set_nlink(inode, le32_to_cpu(die->i_nlink));
- /* ns timestamp */
- inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
- le64_to_cpu(die->i_ctime);
- inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
- le32_to_cpu(die->i_ctime_nsec);
+ /* extended inode has its own timestamp */
+ inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime);
+ inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec);
inode->i_size = le64_to_cpu(die->i_size);
@@ -149,11 +147,9 @@ static struct page *erofs_read_inode(struct inode *inode,
i_gid_write(inode, le16_to_cpu(dic->i_gid));
set_nlink(inode, le16_to_cpu(dic->i_nlink));
- /* use build time to derive all file time */
- inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
- sbi->build_time;
- inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
- sbi->build_time_nsec;
+ /* use build time for compact inodes */
+ inode->i_ctime.tv_sec = sbi->build_time;
+ inode->i_ctime.tv_nsec = sbi->build_time_nsec;
inode->i_size = le32_to_cpu(dic->i_size);
if (erofs_inode_is_data_compressed(vi->datalayout))
@@ -167,6 +163,11 @@ static struct page *erofs_read_inode(struct inode *inode,
goto err_out;
}
+ inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
+ inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
+ inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
+ inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
+
if (!nblks)
/* measure inode.i_blocks as generic filesystems */
inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 50912a5420b4..86fd3bf62af6 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1078,8 +1078,11 @@ out_allocpage:
cond_resched();
goto repeat;
}
- set_page_private(page, (unsigned long)pcl);
- SetPagePrivate(page);
+
+ if (tocache) {
+ set_page_private(page, (unsigned long)pcl);
+ SetPagePrivate(page);
+ }
out: /* the only exit (for tracing and debugging) */
return page;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 45fcdbf538d1..bf9429484462 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1028,9 +1028,6 @@ struct ext4_inode_info {
* protected by sbi->s_fc_lock.
*/
- /* Fast commit subtid when this inode was committed */
- unsigned int i_fc_committed_subtid;
-
/* Start of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_start;
@@ -1234,13 +1231,13 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
-#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
-#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */
-
#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
#define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */
+#define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */
+#define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */
+
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt
@@ -1422,16 +1419,6 @@ struct ext4_super_block {
#ifdef __KERNEL__
-/*
- * run-time mount flags
- */
-#define EXT4_MF_MNTDIR_SAMPLED 0x0001
-#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
-#define EXT4_MF_FC_INELIGIBLE 0x0004 /* Fast commit ineligible */
-#define EXT4_MF_FC_COMMITTING 0x0008 /* File system underoing a fast
- * commit.
- */
-
#ifdef CONFIG_FS_ENCRYPTION
#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL)
#else
@@ -1466,7 +1453,7 @@ struct ext4_sb_info {
struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
- unsigned int s_mount_flags;
+ unsigned long s_mount_flags;
unsigned int s_def_mount_opt;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
@@ -1695,6 +1682,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
})
/*
+ * run-time mount flags
+ */
+enum {
+ EXT4_MF_MNTDIR_SAMPLED,
+ EXT4_MF_FS_ABORTED, /* Fatal error detected */
+ EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
+ EXT4_MF_FC_COMMITTING /* File system underoing a fast
+ * commit.
+ */
+};
+
+static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
+{
+ set_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline void ext4_clear_mount_flag(struct super_block *sb, int bit)
+{
+ clear_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline int ext4_test_mount_flag(struct super_block *sb, int bit)
+{
+ return test_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+
+/*
* Simulate_fail codes
*/
#define EXT4_SIM_BBITMAP_EIO 1
@@ -1863,6 +1878,13 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200
+/*
+ * The reason why "FAST_COMMIT" is a compat feature is that, FS becomes
+ * incompatible only if fast commit blocks are present in the FS. Since we
+ * clear the journal (and thus the fast commit blocks), we don't mark FS as
+ * incompatible. We also have a JBD2 incompat feature, which gets set when
+ * there are fast commit blocks present in the journal.
+ */
#define EXT4_FEATURE_COMPAT_FAST_COMMIT 0x0400
#define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800
@@ -2731,12 +2753,16 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
int ext4_fc_info_show(struct seq_file *seq, void *v);
void ext4_fc_init(struct super_block *sb, journal_t *journal);
void ext4_fc_init_inode(struct inode *inode);
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end);
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_inode(struct inode *inode);
+void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
+ struct dentry *dentry);
+void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
+ struct dentry *dentry);
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
void ext4_fc_start_ineligible(struct super_block *sb, int reason);
void ext4_fc_stop_ineligible(struct super_block *sb);
@@ -3452,7 +3478,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
extern int ext4_ci_compare(const struct inode *parent,
const struct qstr *fname,
const struct qstr *entry, bool quick);
-extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
struct inode *inode);
extern int __ext4_link(struct inode *dir, struct inode *inode,
struct dentry *dentry);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57cfa28919a9..17d7096b3212 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3724,7 +3724,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
ext4_ext_show_leaf(inode, path);
- ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return err;
}
@@ -3796,7 +3795,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
if (*allocated > map->m_len)
*allocated = map->m_len;
map->m_len = *allocated;
- ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return 0;
}
@@ -4329,7 +4327,6 @@ got_allocated_blocks:
map->m_len = ar.len;
allocated = map->m_len;
ext4_ext_show_leaf(inode, path);
- ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
out:
ext4_ext_drop_refs(path);
kfree(path);
@@ -4602,7 +4599,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret))
goto out_handle;
- ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
+ ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
@@ -4651,8 +4648,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
- ext4_fc_track_range(inode, offset >> blkbits,
- (offset + len - 1) >> blkbits);
ext4_fc_start_update(inode);
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 8d43058386c3..f2033e13a273 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -83,7 +83,7 @@
*
* Atomicity of commits
* --------------------
- * In order to gaurantee atomicity during the commit operation, fast commit
+ * In order to guarantee atomicity during the commit operation, fast commit
* uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
* tag contains CRC of the contents and TID of the transaction after which
* this fast commit should be applied. Recovery code replays fast commit
@@ -152,7 +152,31 @@ void ext4_fc_init_inode(struct inode *inode)
INIT_LIST_HEAD(&ei->i_fc_list);
init_waitqueue_head(&ei->i_fc_wait);
atomic_set(&ei->i_fc_updates, 0);
- ei->i_fc_committed_subtid = 0;
+}
+
+/* This function must be called with sbi->s_fc_lock held. */
+static void ext4_fc_wait_committing_inode(struct inode *inode)
+__releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
+{
+ wait_queue_head_t *wq;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+#if (BITS_PER_LONG < 64)
+ DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
+ EXT4_STATE_FC_COMMITTING);
+ wq = bit_waitqueue(&ei->i_state_flags,
+ EXT4_STATE_FC_COMMITTING);
+#else
+ DEFINE_WAIT_BIT(wait, &ei->i_flags,
+ EXT4_STATE_FC_COMMITTING);
+ wq = bit_waitqueue(&ei->i_flags,
+ EXT4_STATE_FC_COMMITTING);
+#endif
+ lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
+ schedule();
+ finish_wait(wq, &wait.wq_entry);
}
/*
@@ -176,22 +200,7 @@ restart:
goto out;
if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
- wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
- DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
-#else
- DEFINE_WAIT_BIT(wait, &ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
-#endif
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
- schedule();
- finish_wait(wq, &wait.wq_entry);
+ ext4_fc_wait_committing_inode(inode);
goto restart;
}
out:
@@ -234,26 +243,10 @@ restart:
}
if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
- wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
- DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_state_flags,
- EXT4_STATE_FC_COMMITTING);
-#else
- DEFINE_WAIT_BIT(wait, &ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
- wq = bit_waitqueue(&ei->i_flags,
- EXT4_STATE_FC_COMMITTING);
-#endif
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
- schedule();
- finish_wait(wq, &wait.wq_entry);
+ ext4_fc_wait_committing_inode(inode);
goto restart;
}
- if (!list_empty(&ei->i_fc_list))
- list_del_init(&ei->i_fc_list);
+ list_del_init(&ei->i_fc_list);
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
}
@@ -269,7 +262,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
- sbi->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
+ ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -302,14 +295,14 @@ void ext4_fc_stop_ineligible(struct super_block *sb)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
+ ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
}
static inline int ext4_fc_is_ineligible(struct super_block *sb)
{
- return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) ||
- atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
+ return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
+ atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
}
/*
@@ -323,13 +316,14 @@ static inline int ext4_fc_is_ineligible(struct super_block *sb)
* If enqueue is set, this function enqueues the inode in fast commit list.
*/
static int ext4_fc_track_template(
- struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool),
+ handle_t *handle, struct inode *inode,
+ int (*__fc_track_fn)(struct inode *, void *, bool),
void *args, int enqueue)
{
- tid_t running_txn_tid;
bool update = false;
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ tid_t tid = 0;
int ret;
if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
@@ -339,15 +333,13 @@ static int ext4_fc_track_template(
if (ext4_fc_is_ineligible(inode->i_sb))
return -EINVAL;
- running_txn_tid = sbi->s_journal ?
- sbi->s_journal->j_commit_sequence + 1 : 0;
-
+ tid = handle->h_transaction->t_tid;
mutex_lock(&ei->i_fc_lock);
- if (running_txn_tid == ei->i_sync_tid) {
+ if (tid == ei->i_sync_tid) {
update = true;
} else {
ext4_fc_reset_inode(inode);
- ei->i_sync_tid = running_txn_tid;
+ ei->i_sync_tid = tid;
}
ret = __fc_track_fn(inode, args, update);
mutex_unlock(&ei->i_fc_lock);
@@ -358,7 +350,7 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
- (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
+ (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock);
@@ -384,7 +376,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -397,7 +389,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_MEM);
+ EXT4_FC_REASON_NOMEM);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -411,7 +403,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.len = dentry->d_name.len;
spin_lock(&sbi->s_fc_lock);
- if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
else
@@ -422,7 +414,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
return 0;
}
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
+void __ext4_fc_track_unlink(handle_t *handle,
+ struct inode *inode, struct dentry *dentry)
{
struct __track_dentry_update_args args;
int ret;
@@ -430,12 +423,18 @@ void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
args.dentry = dentry;
args.op = EXT4_FC_TAG_UNLINK;
- ret = ext4_fc_track_template(inode, __track_dentry_update,
+ ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
trace_ext4_fc_track_unlink(inode, dentry, ret);
}
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
+{
+ __ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
+}
+
+void __ext4_fc_track_link(handle_t *handle,
+ struct inode *inode, struct dentry *dentry)
{
struct __track_dentry_update_args args;
int ret;
@@ -443,20 +442,26 @@ void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
args.dentry = dentry;
args.op = EXT4_FC_TAG_LINK;
- ret = ext4_fc_track_template(inode, __track_dentry_update,
+ ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
trace_ext4_fc_track_link(inode, dentry, ret);
}
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
+{
+ __ext4_fc_track_link(handle, d_inode(dentry), dentry);
+}
+
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
{
struct __track_dentry_update_args args;
+ struct inode *inode = d_inode(dentry);
int ret;
args.dentry = dentry;
args.op = EXT4_FC_TAG_CREAT;
- ret = ext4_fc_track_template(inode, __track_dentry_update,
+ ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
trace_ext4_fc_track_create(inode, dentry, ret);
}
@@ -472,14 +477,20 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
return 0;
}
-void ext4_fc_track_inode(struct inode *inode)
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
{
int ret;
if (S_ISDIR(inode->i_mode))
return;
- ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
+ if (ext4_should_journal_data(inode)) {
+ ext4_fc_mark_ineligible(inode->i_sb,
+ EXT4_FC_REASON_INODE_JOURNAL_DATA);
+ return;
+ }
+
+ ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
trace_ext4_fc_track_inode(inode, ret);
}
@@ -515,7 +526,7 @@ static int __track_range(struct inode *inode, void *arg, bool update)
return 0;
}
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end)
{
struct __track_range_args args;
@@ -527,7 +538,7 @@ void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
args.start = start;
args.end = end;
- ret = ext4_fc_track_template(inode, __track_range, &args, 1);
+ ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1);
trace_ext4_fc_track_range(inode, start, end, ret);
}
@@ -537,10 +548,11 @@ static void ext4_fc_submit_bh(struct super_block *sb)
int write_flags = REQ_SYNC;
struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
+ /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
if (test_opt(sb, BARRIER))
write_flags |= REQ_FUA | REQ_PREFLUSH;
lock_buffer(bh);
- clear_buffer_dirty(bh);
+ set_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = ext4_end_buffer_io_sync;
submit_bh(REQ_OP_WRITE, write_flags, bh);
@@ -846,7 +858,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
int ret = 0;
spin_lock(&sbi->s_fc_lock);
- sbi->s_mount_flags |= EXT4_MF_FC_COMMITTING;
+ ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
@@ -900,6 +912,8 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal)
/* Commit all the directory entry updates */
static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
+__acquires(&sbi->s_fc_lock)
+__releases(&sbi->s_fc_lock)
{
struct super_block *sb = (struct super_block *)(journal->j_private);
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -996,6 +1010,13 @@ static int ext4_fc_perform_commit(journal_t *journal)
if (ret)
return ret;
+ /*
+ * If file system device is different from journal device, issue a cache
+ * flush before we start writing fast commit blocks.
+ */
+ if (journal->j_fs_dev != journal->j_dev)
+ blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
+
blk_start_plug(&plug);
if (sbi->s_fc_bytes == 0) {
/*
@@ -1031,8 +1052,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
if (ret)
goto out;
spin_lock(&sbi->s_fc_lock);
- EXT4_I(inode)->i_fc_committed_subtid =
- atomic_read(&sbi->s_fc_subtid);
}
spin_unlock(&sbi->s_fc_lock);
@@ -1131,7 +1150,7 @@ out:
"Fast commit ended with blks = %d, reason = %d, subtid - %d",
nblks, reason, subtid);
if (reason == EXT4_FC_REASON_FC_FAILED)
- return jbd2_fc_end_commit_fallback(journal, commit_tid);
+ return jbd2_fc_end_commit_fallback(journal);
if (reason == EXT4_FC_REASON_FC_START_FAILED ||
reason == EXT4_FC_REASON_INELIGIBLE)
return jbd2_complete_transaction(journal, commit_tid);
@@ -1190,8 +1209,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
&sbi->s_fc_q[FC_Q_STAGING]);
- sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
- sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (full)
sbi->s_fc_bytes = 0;
@@ -1263,7 +1282,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
return 0;
}
- ret = __ext4_unlink(old_parent, &entry, inode);
+ ret = __ext4_unlink(NULL, old_parent, &entry, inode);
/* -ENOENT ok coz it might not exist anymore. */
if (ret == -ENOENT)
ret = 0;
@@ -2079,8 +2098,6 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
void ext4_fc_init(struct super_block *sb, journal_t *journal)
{
- int num_fc_blocks;
-
/*
* We set replay callback even if fast commit disabled because we may
* could still have fast commit blocks that need to be replayed even if
@@ -2090,21 +2107,9 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
return;
journal->j_fc_cleanup_callback = ext4_fc_cleanup;
- if (!buffer_uptodate(journal->j_sb_buffer)
- && ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO,
- true)) {
- ext4_msg(sb, KERN_ERR, "I/O error on journal");
- return;
- }
- num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
- if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
- EXT4_NUM_FC_BLKS)) {
- pr_warn("Error while enabling fast commits, turning off.");
- ext4_clear_feature_fast_commit(sb);
- }
}
-const char *fc_ineligible_reasons[] = {
+static const char *fc_ineligible_reasons[] = {
"Extended attributes changed",
"Cross rename",
"Journal flag changed",
@@ -2113,6 +2118,7 @@ const char *fc_ineligible_reasons[] = {
"Resize",
"Dir renamed",
"Falloc range op",
+ "Data journalling",
"FC Commit Failed"
};
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 06907d485989..3a6e5a1fa1b8 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -3,9 +3,6 @@
#ifndef __FAST_COMMIT_H__
#define __FAST_COMMIT_H__
-/* Number of blocks in journal area to allocate for fast commits */
-#define EXT4_NUM_FC_BLKS 256
-
/* Fast commit tags */
#define EXT4_FC_TAG_ADD_RANGE 0x0001
#define EXT4_FC_TAG_DEL_RANGE 0x0002
@@ -100,11 +97,12 @@ enum {
EXT4_FC_REASON_XATTR = 0,
EXT4_FC_REASON_CROSS_RENAME,
EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
- EXT4_FC_REASON_MEM,
+ EXT4_FC_REASON_NOMEM,
EXT4_FC_REASON_SWAP_BOOT,
EXT4_FC_REASON_RESIZE,
EXT4_FC_REASON_RENAME_DIR,
EXT4_FC_REASON_FALLOC_RANGE,
+ EXT4_FC_REASON_INODE_JOURNAL_DATA,
EXT4_FC_COMMIT_FAILED,
EXT4_FC_REASON_MAX
};
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d85412d12e3a..3ed8c048fb12 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -761,7 +761,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (!daxdev_mapping_supported(vma, dax_dev))
return -EOPNOTSUPP;
- ext4_fc_start_update(inode);
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
@@ -769,7 +768,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
} else {
vma->vm_ops = &ext4_file_vm_ops;
}
- ext4_fc_stop_update(inode);
return 0;
}
@@ -782,13 +780,13 @@ static int ext4_sample_last_mounted(struct super_block *sb,
handle_t *handle;
int err;
- if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
+ if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
return 0;
if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
return 0;
- sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
+ ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
/*
* Sample where the filesystem has been mounted and
* store it in the superblock for sysadmin convenience
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index b232c2767534..4c2a9fe30067 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys,
/* Fabricate an rmap entry for the external log device. */
irec.fmr_physical = journal->j_blk_offset;
- irec.fmr_length = journal->j_maxlen;
+ irec.fmr_length = journal->j_total_len;
irec.fmr_owner = EXT4_FMR_OWN_LOG;
irec.fmr_flags = 0;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 81a545fd14a3..a42ca95840f2 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -143,7 +143,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */
smp_rmb();
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
ret = -EROFS;
goto out;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index caa51473207d..b41512d1badc 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1880,6 +1880,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
+ ext4_write_unlock_xattr(inode, &no_expand);
*has_inline = 0;
ext4_journal_stop(handle);
return 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b96a18679a27..0d8385aea898 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -327,6 +327,8 @@ stop_handle:
ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
+ if (!list_empty(&EXT4_I(inode)->i_fc_list))
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
}
@@ -730,7 +732,7 @@ out_sem:
if (ret)
return ret;
}
- ext4_fc_track_range(inode, map->m_lblk,
+ ext4_fc_track_range(handle, inode, map->m_lblk,
map->m_lblk + map->m_len - 1);
}
@@ -2440,7 +2442,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
struct super_block *sb = inode->i_sb;
if (ext4_forced_shutdown(EXT4_SB(sb)) ||
- EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
goto invalidate_dirty_pages;
/*
* Let the uper layers retry transient errors.
@@ -2674,7 +2676,7 @@ static int ext4_writepages(struct address_space *mapping,
* the stack trace.
*/
if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
- sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+ ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
ret = -EROFS;
goto out_writepages;
}
@@ -3310,8 +3312,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
EXT4_I(inode)->i_datasync_tid))
return false;
if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
- return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) <
- EXT4_I(inode)->i_fc_committed_subtid;
+ return !list_empty(&EXT4_I(inode)->i_fc_list);
return true;
}
@@ -4109,7 +4110,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
up_write(&EXT4_I(inode)->i_data_sem);
}
- ext4_fc_track_range(inode, first_block, stop_block);
+ ext4_fc_track_range(handle, inode, first_block, stop_block);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
@@ -5442,14 +5443,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}
if (shrink)
- ext4_fc_track_range(inode,
+ ext4_fc_track_range(handle, inode,
(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
inode->i_sb->s_blocksize_bits,
(oldsize > 0 ? oldsize - 1 : 0) >>
inode->i_sb->s_blocksize_bits);
else
ext4_fc_track_range(
- inode,
+ handle, inode,
(oldsize > 0 ? oldsize - 1 : oldsize) >>
inode->i_sb->s_blocksize_bits,
(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
@@ -5699,7 +5700,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
put_bh(iloc->bh);
return -EIO;
}
- ext4_fc_track_inode(inode);
+ ext4_fc_track_inode(handle, inode);
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 85abbfb98cbe..24af9ed5c3e5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4477,7 +4477,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
{
ext4_group_t i, ngroups;
- if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
return;
ngroups = ext4_get_groups_count(sb);
@@ -4508,7 +4508,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
- if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
return;
mb_debug(sb, "Can't allocate:"
@@ -5167,7 +5167,7 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
struct super_block *sb = ar->inode->i_sb;
ext4_group_t group;
ext4_grpblk_t blkoff;
- int i;
+ int i = sb->s_blocksize;
ext4_fsblk_t goal, block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f458d1d81d96..33509266f5a0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2606,7 +2606,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
handle_t *handle;
- struct inode *inode, *inode_save;
+ struct inode *inode;
int err, credits, retries = 0;
err = dquot_initialize(dir);
@@ -2624,11 +2624,9 @@ retry:
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
- inode_save = inode;
- ihold(inode_save);
err = ext4_add_nondir(handle, dentry, &inode);
- ext4_fc_track_create(inode_save, dentry);
- iput(inode_save);
+ if (!err)
+ ext4_fc_track_create(handle, dentry);
}
if (handle)
ext4_journal_stop(handle);
@@ -2643,7 +2641,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
handle_t *handle;
- struct inode *inode, *inode_save;
+ struct inode *inode;
int err, credits, retries = 0;
err = dquot_initialize(dir);
@@ -2660,12 +2658,9 @@ retry:
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &ext4_special_inode_operations;
- inode_save = inode;
- ihold(inode_save);
err = ext4_add_nondir(handle, dentry, &inode);
if (!err)
- ext4_fc_track_create(inode_save, dentry);
- iput(inode_save);
+ ext4_fc_track_create(handle, dentry);
}
if (handle)
ext4_journal_stop(handle);
@@ -2829,7 +2824,6 @@ out_clear_inode:
iput(inode);
goto out_retry;
}
- ext4_fc_track_create(inode, dentry);
ext4_inc_count(dir);
ext4_update_dx_flag(dir);
@@ -2837,6 +2831,7 @@ out_clear_inode:
if (err)
goto out_clear_inode;
d_instantiate_new(dentry, inode);
+ ext4_fc_track_create(handle, dentry);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
@@ -3171,7 +3166,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
goto end_rmdir;
ext4_dec_count(dir);
ext4_update_dx_flag(dir);
- ext4_fc_track_unlink(inode, dentry);
+ ext4_fc_track_unlink(handle, dentry);
retval = ext4_mark_inode_dirty(handle, dir);
#ifdef CONFIG_UNICODE
@@ -3192,13 +3187,12 @@ end_rmdir:
return retval;
}
-int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
struct inode *inode)
{
int retval = -ENOENT;
struct buffer_head *bh;
struct ext4_dir_entry_2 *de;
- handle_t *handle = NULL;
int skip_remove_dentry = 0;
bh = ext4_find_entry(dir, d_name, &de, NULL);
@@ -3217,14 +3211,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
skip_remove_dentry = 1;
else
- goto out_bh;
- }
-
- handle = ext4_journal_start(dir, EXT4_HT_DIR,
- EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
- if (IS_ERR(handle)) {
- retval = PTR_ERR(handle);
- goto out_bh;
+ goto out;
}
if (IS_DIRSYNC(dir))
@@ -3233,12 +3220,12 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
if (!skip_remove_dentry) {
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
- goto out_handle;
+ goto out;
dir->i_ctime = dir->i_mtime = current_time(dir);
ext4_update_dx_flag(dir);
retval = ext4_mark_inode_dirty(handle, dir);
if (retval)
- goto out_handle;
+ goto out;
} else {
retval = 0;
}
@@ -3252,15 +3239,14 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
inode->i_ctime = current_time(inode);
retval = ext4_mark_inode_dirty(handle, inode);
-out_handle:
- ext4_journal_stop(handle);
-out_bh:
+out:
brelse(bh);
return retval;
}
static int ext4_unlink(struct inode *dir, struct dentry *dentry)
{
+ handle_t *handle;
int retval;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
@@ -3278,9 +3264,16 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (retval)
goto out_trace;
- retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry));
+ handle = ext4_journal_start(dir, EXT4_HT_DIR,
+ EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+ if (IS_ERR(handle)) {
+ retval = PTR_ERR(handle);
+ goto out_trace;
+ }
+
+ retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
if (!retval)
- ext4_fc_track_unlink(d_inode(dentry), dentry);
+ ext4_fc_track_unlink(handle, dentry);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
@@ -3291,6 +3284,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
+ if (handle)
+ ext4_journal_stop(handle);
out_trace:
trace_ext4_unlink_exit(dentry, retval);
@@ -3447,7 +3442,6 @@ retry:
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
- ext4_fc_track_link(inode, dentry);
err = ext4_mark_inode_dirty(handle, inode);
/* this can happen only for tmpfile being
* linked the first time
@@ -3455,6 +3449,7 @@ retry:
if (inode->i_nlink == 1)
ext4_orphan_del(handle, inode);
d_instantiate(dentry, inode);
+ ext4_fc_track_link(handle, dentry);
} else {
drop_nlink(inode);
iput(inode);
@@ -3915,9 +3910,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
EXT4_FC_REASON_RENAME_DIR);
} else {
if (new.inode)
- ext4_fc_track_unlink(new.inode, new.dentry);
- ext4_fc_track_link(old.inode, new.dentry);
- ext4_fc_track_unlink(old.inode, old.dentry);
+ ext4_fc_track_unlink(handle, new.dentry);
+ __ext4_fc_track_link(handle, old.inode, new.dentry);
+ __ext4_fc_track_unlink(handle, old.inode, old.dentry);
}
if (new.inode) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ef4734b40e2a..6633b20224d5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -289,18 +289,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
if (!ext4_has_metadata_csum(sb))
return;
- /*
- * Locking the superblock prevents the scenario
- * where:
- * 1) a first thread pauses during checksum calculation.
- * 2) a second thread updates the superblock, recalculates
- * the checksum, and updates s_checksum
- * 3) the first thread resumes and finishes its checksum calculation
- * and updates s_checksum with a potentially stale or torn value.
- */
- lock_buffer(EXT4_SB(sb)->s_sbh);
es->s_checksum = ext4_superblock_csum(sb, es);
- unlock_buffer(EXT4_SB(sb)->s_sbh);
}
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
@@ -686,7 +675,7 @@ static void ext4_handle_error(struct super_block *sb)
if (!test_opt(sb, ERRORS_CONT)) {
journal_t *journal = EXT4_SB(sb)->s_journal;
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
if (journal)
jbd2_journal_abort(journal, -EIO);
}
@@ -904,7 +893,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
va_end(args);
if (sb_rdonly(sb) == 0) {
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
@@ -1716,11 +1705,10 @@ enum {
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
- Opt_prefetch_block_bitmaps, Opt_no_fc,
+ Opt_prefetch_block_bitmaps,
#ifdef CONFIG_EXT4_DEBUG
- Opt_fc_debug_max_replay,
+ Opt_fc_debug_max_replay, Opt_fc_debug_force
#endif
- Opt_fc_debug_force
};
static const match_table_t tokens = {
@@ -1807,9 +1795,8 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable=%u"},
{Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"},
- {Opt_no_fc, "no_fc"},
- {Opt_fc_debug_force, "fc_debug_force"},
#ifdef CONFIG_EXT4_DEBUG
+ {Opt_fc_debug_force, "fc_debug_force"},
{Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
#endif
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
@@ -2027,8 +2014,8 @@ static const struct mount_opts {
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+ {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
{Opt_offgrpjquota, 0, MOPT_Q},
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
@@ -2039,11 +2026,9 @@ static const struct mount_opts {
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
MOPT_SET},
- {Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
- MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY},
+#ifdef CONFIG_EXT4_DEBUG
{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
-#ifdef CONFIG_EXT4_DEBUG
{Opt_fc_debug_max_replay, 0, MOPT_GTE0},
#endif
{Opt_err, 0, 0}
@@ -2153,7 +2138,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
return 1;
case Opt_abort:
- sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
return 1;
case Opt_i_version:
sb->s_flags |= SB_I_VERSION;
@@ -3976,7 +3961,7 @@ int ext4_calculate_overhead(struct super_block *sb)
* loaded or not
*/
if (sbi->s_journal && !sbi->s_journal_bdev)
- overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
+ overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
/* j_inum for internal journal is non-zero */
j_inode = ext4_get_journal_inode(sb, j_inum);
@@ -4340,9 +4325,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#endif
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
- printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
+ printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
/* can't mount with both data=journal and dioread_nolock. */
clear_opt(sb, DIOREAD_NOLOCK);
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and delalloc");
@@ -4777,8 +4763,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
sbi->s_fc_bytes = 0;
- sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
- sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL;
@@ -4857,6 +4843,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount_wq;
}
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to set fast commit journal feature");
+ goto failed_mount_wq;
+ }
+
/* We have now updated the journal if required, so we can
* validate the data journaling mode. */
switch (test_opt(sb, DATA_FLAGS)) {
@@ -5872,7 +5866,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -5886,7 +5880,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
err = -EROFS;
goto restore_opts;
}
@@ -6560,10 +6554,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
brelse(bh);
out:
if (inode->i_size < off + len) {
- ext4_fc_track_range(inode,
- (inode->i_size > 0 ? inode->i_size - 1 : 0)
- >> inode->i_sb->s_blocksize_bits,
- (off + len) >> inode->i_sb->s_blocksize_bits);
i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size;
err2 = ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 9cd2ecad07db..cc4f987687f3 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -77,7 +77,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
if (error)
return error;
if (!buffer_mapped(bh_result))
- return -EIO;
+ return -ENODATA;
return 0;
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8dff9cbd0a87..62d9081d1e26 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1301,12 +1301,8 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
ret = gfs2_iomap_get(inode, pos, length, flags, &iomap, &mp);
- if (!ret && iomap.type == IOMAP_HOLE) {
- if (create)
- ret = gfs2_iomap_alloc(inode, &iomap, &mp);
- else
- ret = -ENODATA;
- }
+ if (create && !ret && iomap.type == IOMAP_HOLE)
+ ret = gfs2_iomap_alloc(inode, &iomap, &mp);
release_metapath(&mp);
if (ret)
goto out;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 9133b3178677..2e9314091c81 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -132,6 +132,8 @@ __acquires(&sdp->sd_ail_lock)
spin_unlock(&sdp->sd_ail_lock);
ret = generic_writepages(mapping, wbc);
spin_lock(&sdp->sd_ail_lock);
+ if (ret == -ENODATA) /* if a jdata write into a new hole */
+ ret = 0; /* ignore it */
if (ret || wbc->nr_to_write <= 0)
break;
return -EBUSY;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 92d799a193b8..f7addc6197ed 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2529,13 +2529,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
rbm.rgd = rgd;
error = gfs2_rbm_from_block(&rbm, no_addr);
- if (WARN_ON_ONCE(error))
- goto fail;
-
- if (gfs2_testbit(&rbm, false) != type)
- error = -ESTALE;
+ if (!WARN_ON_ONCE(error)) {
+ if (gfs2_testbit(&rbm, false) != type)
+ error = -ESTALE;
+ }
gfs2_glock_dq_uninit(&rgd_gh);
+
fail:
return error;
}
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 02894df7656d..b53c055bea6a 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -482,6 +482,10 @@ static void io_impersonate_work(struct io_worker *worker,
current->files = work->identity->files;
current->nsproxy = work->identity->nsproxy;
task_unlock(current);
+ if (!work->identity->files) {
+ /* failed grabbing files, ensure work gets cancelled */
+ work->flags |= IO_WQ_WORK_CANCEL;
+ }
}
if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
current->fs = work->identity->fs;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a7429c977eb3..4ead291b2976 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -995,20 +995,33 @@ static void io_sq_thread_drop_mm(void)
if (mm) {
kthread_unuse_mm(mm);
mmput(mm);
+ current->mm = NULL;
}
}
static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
{
- if (!current->mm) {
- if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
- !ctx->sqo_task->mm ||
- !mmget_not_zero(ctx->sqo_task->mm)))
- return -EFAULT;
- kthread_use_mm(ctx->sqo_task->mm);
+ struct mm_struct *mm;
+
+ if (current->mm)
+ return 0;
+
+ /* Should never happen */
+ if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL)))
+ return -EFAULT;
+
+ task_lock(ctx->sqo_task);
+ mm = ctx->sqo_task->mm;
+ if (unlikely(!mm || !mmget_not_zero(mm)))
+ mm = NULL;
+ task_unlock(ctx->sqo_task);
+
+ if (mm) {
+ kthread_use_mm(mm);
+ return 0;
}
- return 0;
+ return -EFAULT;
}
static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
@@ -1274,9 +1287,12 @@ static bool io_identity_cow(struct io_kiocb *req)
/* add one for this request */
refcount_inc(&id->count);
- /* drop old identity, assign new one. one ref for req, one for tctx */
- if (req->work.identity != tctx->identity &&
- refcount_sub_and_test(2, &req->work.identity->count))
+ /* drop tctx and req identity references, if needed */
+ if (tctx->identity != &tctx->__identity &&
+ refcount_dec_and_test(&tctx->identity->count))
+ kfree(tctx->identity);
+ if (req->work.identity != &tctx->__identity &&
+ refcount_dec_and_test(&req->work.identity->count))
kfree(req->work.identity);
req->work.identity = id;
@@ -1577,14 +1593,29 @@ static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
}
}
-static inline bool io_match_files(struct io_kiocb *req,
- struct files_struct *files)
+static inline bool __io_match_files(struct io_kiocb *req,
+ struct files_struct *files)
{
+ return ((req->flags & REQ_F_WORK_INITIALIZED) &&
+ (req->work.flags & IO_WQ_WORK_FILES)) &&
+ req->work.identity->files == files;
+}
+
+static bool io_match_files(struct io_kiocb *req,
+ struct files_struct *files)
+{
+ struct io_kiocb *link;
+
if (!files)
return true;
- if ((req->flags & REQ_F_WORK_INITIALIZED) &&
- (req->work.flags & IO_WQ_WORK_FILES))
- return req->work.identity->files == files;
+ if (__io_match_files(req, files))
+ return true;
+ if (req->flags & REQ_F_LINK_HEAD) {
+ list_for_each_entry(link, &req->link_list, link_list) {
+ if (__io_match_files(link, files))
+ return true;
+ }
+ }
return false;
}
@@ -1668,7 +1699,8 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) {
+ } else if (ctx->cq_overflow_flushed ||
+ atomic_read(&req->task->io_uring->in_idle)) {
/*
* If we're in ring overflow flush mode, or in task cancel mode,
* then we cannot store the request for later flushing, we need
@@ -1838,7 +1870,7 @@ static void __io_free_req(struct io_kiocb *req)
io_dismantle_req(req);
percpu_counter_dec(&tctx->inflight);
- if (tctx->in_idle)
+ if (atomic_read(&tctx->in_idle))
wake_up(&tctx->wait);
put_task_struct(req->task);
@@ -3515,8 +3547,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
- __sb_start_write(file_inode(req->file)->i_sb,
- SB_FREEZE_WRITE, true);
+ sb_start_write(file_inode(req->file)->i_sb);
__sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
@@ -7695,7 +7726,8 @@ static int io_uring_alloc_task_context(struct task_struct *task)
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
tctx->last = NULL;
- tctx->in_idle = 0;
+ atomic_set(&tctx->in_idle, 0);
+ tctx->sqpoll = false;
io_init_identity(&tctx->__identity);
tctx->identity = &tctx->__identity;
task->io_uring = tctx;
@@ -8388,22 +8420,6 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req)
return false;
}
-static bool io_match_link_files(struct io_kiocb *req,
- struct files_struct *files)
-{
- struct io_kiocb *link;
-
- if (io_match_files(req, files))
- return true;
- if (req->flags & REQ_F_LINK_HEAD) {
- list_for_each_entry(link, &req->link_list, link_list) {
- if (io_match_files(link, files))
- return true;
- }
- }
- return false;
-}
-
/*
* We're looking to cancel 'req' because it's holding on to our files, but
* 'req' could be a link to another request. See if it is, and cancel that
@@ -8453,7 +8469,21 @@ static bool io_timeout_remove_link(struct io_ring_ctx *ctx,
static bool io_cancel_link_cb(struct io_wq_work *work, void *data)
{
- return io_match_link(container_of(work, struct io_kiocb, work), data);
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ bool ret;
+
+ if (req->flags & REQ_F_LINK_TIMEOUT) {
+ unsigned long flags;
+ struct io_ring_ctx *ctx = req->ctx;
+
+ /* protect against races with linked timeouts */
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ ret = io_match_link(req, data);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else {
+ ret = io_match_link(req, data);
+ }
+ return ret;
}
static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
@@ -8479,6 +8509,7 @@ static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
}
static void io_cancel_defer_files(struct io_ring_ctx *ctx,
+ struct task_struct *task,
struct files_struct *files)
{
struct io_defer_entry *de = NULL;
@@ -8486,7 +8517,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
spin_lock_irq(&ctx->completion_lock);
list_for_each_entry_reverse(de, &ctx->defer_list, list) {
- if (io_match_link_files(de->req, files)) {
+ if (io_task_match(de->req, task) &&
+ io_match_files(de->req, files)) {
list_cut_position(&list, &ctx->defer_list, &de->list);
break;
}
@@ -8512,7 +8544,6 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx,
if (list_empty_careful(&ctx->inflight_list))
return false;
- io_cancel_defer_files(ctx, files);
/* cancel all at once, should be faster than doing it one by one*/
io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true);
@@ -8598,8 +8629,16 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
{
struct task_struct *task = current;
- if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data)
+ if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
task = ctx->sq_data->thread;
+ atomic_inc(&task->io_uring->in_idle);
+ io_sq_thread_park(ctx->sq_data);
+ }
+
+ if (files)
+ io_cancel_defer_files(ctx, NULL, files);
+ else
+ io_cancel_defer_files(ctx, task, NULL);
io_cqring_overflow_flush(ctx, true, task, files);
@@ -8607,12 +8646,23 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
io_run_task_work();
cond_resched();
}
+
+ if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
+ atomic_dec(&task->io_uring->in_idle);
+ /*
+ * If the files that are going away are the ones in the thread
+ * identity, clear them out.
+ */
+ if (task->io_uring->identity->files == files)
+ task->io_uring->identity->files = NULL;
+ io_sq_thread_unpark(ctx->sq_data);
+ }
}
/*
* Note that this task has used io_uring. We use it for cancelation purposes.
*/
-static int io_uring_add_task_file(struct file *file)
+static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
{
struct io_uring_task *tctx = current->io_uring;
@@ -8634,6 +8684,14 @@ static int io_uring_add_task_file(struct file *file)
tctx->last = file;
}
+ /*
+ * This is race safe in that the task itself is doing this, hence it
+ * cannot be going through the exit/cancel paths at the same time.
+ * This cannot be modified while exit/cancel is running.
+ */
+ if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL))
+ tctx->sqpoll = true;
+
return 0;
}
@@ -8675,7 +8733,7 @@ void __io_uring_files_cancel(struct files_struct *files)
unsigned long index;
/* make sure overflow events are dropped */
- tctx->in_idle = true;
+ atomic_inc(&tctx->in_idle);
xa_for_each(&tctx->xa, index, file) {
struct io_ring_ctx *ctx = file->private_data;
@@ -8684,6 +8742,35 @@ void __io_uring_files_cancel(struct files_struct *files)
if (files)
io_uring_del_task_file(file);
}
+
+ atomic_dec(&tctx->in_idle);
+}
+
+static s64 tctx_inflight(struct io_uring_task *tctx)
+{
+ unsigned long index;
+ struct file *file;
+ s64 inflight;
+
+ inflight = percpu_counter_sum(&tctx->inflight);
+ if (!tctx->sqpoll)
+ return inflight;
+
+ /*
+ * If we have SQPOLL rings, then we need to iterate and find them, and
+ * add the pending count for those.
+ */
+ xa_for_each(&tctx->xa, index, file) {
+ struct io_ring_ctx *ctx = file->private_data;
+
+ if (ctx->flags & IORING_SETUP_SQPOLL) {
+ struct io_uring_task *__tctx = ctx->sqo_task->io_uring;
+
+ inflight += percpu_counter_sum(&__tctx->inflight);
+ }
+ }
+
+ return inflight;
}
/*
@@ -8697,11 +8784,11 @@ void __io_uring_task_cancel(void)
s64 inflight;
/* make sure overflow events are dropped */
- tctx->in_idle = true;
+ atomic_inc(&tctx->in_idle);
do {
/* read completions before cancelations */
- inflight = percpu_counter_sum(&tctx->inflight);
+ inflight = tctx_inflight(tctx);
if (!inflight)
break;
__io_uring_files_cancel(NULL);
@@ -8712,13 +8799,13 @@ void __io_uring_task_cancel(void)
* If we've seen completions, retry. This avoids a race where
* a completion comes in before we did prepare_to_wait().
*/
- if (inflight != percpu_counter_sum(&tctx->inflight))
+ if (inflight != tctx_inflight(tctx))
continue;
schedule();
} while (1);
finish_wait(&tctx->wait, &wait);
- tctx->in_idle = false;
+ atomic_dec(&tctx->in_idle);
}
static int io_uring_flush(struct file *file, void *data)
@@ -8863,7 +8950,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
io_sqpoll_wait_sq(ctx);
submitted = to_submit;
} else if (to_submit) {
- ret = io_uring_add_task_file(f.file);
+ ret = io_uring_add_task_file(ctx, f.file);
if (unlikely(ret))
goto out;
mutex_lock(&ctx->uring_lock);
@@ -8900,7 +8987,8 @@ out_fput:
#ifdef CONFIG_PROC_FS
static int io_uring_show_cred(int id, void *p, void *data)
{
- const struct cred *cred = p;
+ struct io_identity *iod = p;
+ const struct cred *cred = iod->creds;
struct seq_file *m = data;
struct user_namespace *uns = seq_user_ns(m);
struct group_info *gi;
@@ -9092,7 +9180,7 @@ err_fd:
#if defined(CONFIG_UNIX)
ctx->ring_sock->file = file;
#endif
- if (unlikely(io_uring_add_task_file(file))) {
+ if (unlikely(io_uring_add_task_file(ctx, file))) {
file = ERR_PTR(-ENOMEM);
goto err_fd;
}
@@ -9137,6 +9225,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
* to a power-of-two, if it isn't already. We do NOT impose
* any cq vs sq ring sizing.
*/
+ p->cq_entries = roundup_pow_of_two(p->cq_entries);
if (p->cq_entries < p->sq_entries)
return -EINVAL;
if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
@@ -9144,7 +9233,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
return -EINVAL;
p->cq_entries = IORING_MAX_CQ_ENTRIES;
}
- p->cq_entries = roundup_pow_of_two(p->cq_entries);
} else {
p->cq_entries = 2 * p->sq_entries;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 8180061b9e16..10cc7979ce38 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1374,6 +1374,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
WARN_ON_ONCE(!PageLocked(page));
WARN_ON_ONCE(PageWriteback(page));
+ WARN_ON_ONCE(PageDirty(page));
/*
* We cannot cancel the ioend directly here on error. We may have
@@ -1382,33 +1383,22 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
* appropriately.
*/
if (unlikely(error)) {
+ /*
+ * Let the filesystem know what portion of the current page
+ * failed to map. If the page wasn't been added to ioend, it
+ * won't be affected by I/O completion and we must unlock it
+ * now.
+ */
+ if (wpc->ops->discard_page)
+ wpc->ops->discard_page(page, file_offset);
if (!count) {
- /*
- * If the current page hasn't been added to ioend, it
- * won't be affected by I/O completions and we must
- * discard and unlock it right here.
- */
- if (wpc->ops->discard_page)
- wpc->ops->discard_page(page);
ClearPageUptodate(page);
unlock_page(page);
goto done;
}
-
- /*
- * If the page was not fully cleaned, we need to ensure that the
- * higher layers come back to it correctly. That means we need
- * to keep the page dirty, and for WB_SYNC_ALL writeback we need
- * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
- * so another attempt to write this page in this writeback sweep
- * will be made.
- */
- set_page_writeback_keepwrite(page);
- } else {
- clear_page_dirty_for_io(page);
- set_page_writeback(page);
}
+ set_page_writeback(page);
unlock_page(page);
/*
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 263f02ad8ebf..472932b9e6bc 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -106,6 +106,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
* for a checkpoint to free up some space in the log.
*/
void __jbd2_log_wait_for_space(journal_t *journal)
+__acquires(&journal->j_state_lock)
+__releases(&journal->j_state_lock)
{
int nblocks, space_left;
/* assert_spin_locked(&journal->j_state_lock); */
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index fa688e163a80..b121d7d434c6 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -450,6 +450,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
schedule();
write_lock(&journal->j_state_lock);
finish_wait(&journal->j_fc_wait, &wait);
+ /*
+ * TODO: by blocking fast commits here, we are increasing
+ * fsync() latency slightly. Strictly speaking, we don't need
+ * to block fast commits until the transaction enters T_FLUSH
+ * state. So an optimization is possible where we block new fast
+ * commits here and wait for existing ones to complete
+ * just before we enter T_FLUSH. That way, the existing fast
+ * commits and this full commit can proceed parallely.
+ */
}
write_unlock(&journal->j_state_lock);
@@ -801,7 +810,7 @@ start_journal_io:
if (first_block < journal->j_tail)
freed += journal->j_last - journal->j_first;
/* Update tail only if we free significant amount of space */
- if (freed < journal->j_maxlen / 4)
+ if (freed < jbd2_journal_get_max_txn_bufs(journal))
update_tail = 0;
}
J_ASSERT(commit_transaction->t_state == T_COMMIT);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0c7c42bd530f..0c3d5e3b24b2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -727,6 +727,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
*/
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
{
+ if (unlikely(is_journal_aborted(journal)))
+ return -EIO;
/*
* Fast commits only allowed if at least one full commit has
* been processed.
@@ -734,10 +736,12 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
if (!journal->j_stats.ts_tid)
return -EINVAL;
- if (tid <= journal->j_commit_sequence)
+ write_lock(&journal->j_state_lock);
+ if (tid <= journal->j_commit_sequence) {
+ write_unlock(&journal->j_state_lock);
return -EALREADY;
+ }
- write_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
(journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
DEFINE_WAIT(wait);
@@ -777,13 +781,19 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
int jbd2_fc_end_commit(journal_t *journal)
{
- return __jbd2_fc_end_commit(journal, 0, 0);
+ return __jbd2_fc_end_commit(journal, 0, false);
}
EXPORT_SYMBOL(jbd2_fc_end_commit);
-int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
+int jbd2_fc_end_commit_fallback(journal_t *journal)
{
- return __jbd2_fc_end_commit(journal, tid, 1);
+ tid_t tid;
+
+ read_lock(&journal->j_state_lock);
+ tid = journal->j_running_transaction ?
+ journal->j_running_transaction->t_tid : 0;
+ read_unlock(&journal->j_state_lock);
+ return __jbd2_fc_end_commit(journal, tid, true);
}
EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
@@ -865,7 +875,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
int fc_off;
*bh_out = NULL;
- write_lock(&journal->j_state_lock);
if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
fc_off = journal->j_fc_off;
@@ -874,7 +883,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
} else {
ret = -EINVAL;
}
- write_unlock(&journal->j_state_lock);
if (ret)
return ret;
@@ -887,11 +895,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
if (!bh)
return -ENOMEM;
- lock_buffer(bh);
- clear_buffer_uptodate(bh);
- set_buffer_dirty(bh);
- unlock_buffer(bh);
journal->j_fc_wbuf[fc_off] = bh;
*bh_out = bh;
@@ -909,9 +913,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
struct buffer_head *bh;
int i, j_fc_off;
- read_lock(&journal->j_state_lock);
j_fc_off = journal->j_fc_off;
- read_unlock(&journal->j_state_lock);
/*
* Wait in reverse order to minimize chances of us being woken up before
@@ -939,9 +941,7 @@ int jbd2_fc_release_bufs(journal_t *journal)
struct buffer_head *bh;
int i, j_fc_off;
- read_lock(&journal->j_state_lock);
j_fc_off = journal->j_fc_off;
- read_unlock(&journal->j_state_lock);
/*
* Wait in reverse order to minimize chances of us being woken up before
@@ -1348,23 +1348,16 @@ static journal_t *journal_init_common(struct block_device *bdev,
journal->j_dev = bdev;
journal->j_fs_dev = fs_dev;
journal->j_blk_offset = start;
- journal->j_maxlen = len;
+ journal->j_total_len = len;
/* We need enough buffers to write out full descriptor block. */
n = journal->j_blocksize / jbd2_min_tag_size();
journal->j_wbufsize = n;
+ journal->j_fc_wbuf = NULL;
journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!journal->j_wbuf)
goto err_cleanup;
- if (journal->j_fc_wbufsize > 0) {
- journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
- sizeof(struct buffer_head *),
- GFP_KERNEL);
- if (!journal->j_fc_wbuf)
- goto err_cleanup;
- }
-
bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
if (!bh) {
pr_err("%s: Cannot get buffer for journal superblock\n",
@@ -1378,23 +1371,11 @@ static journal_t *journal_init_common(struct block_device *bdev,
err_cleanup:
kfree(journal->j_wbuf);
- kfree(journal->j_fc_wbuf);
jbd2_journal_destroy_revoke(journal);
kfree(journal);
return NULL;
}
-int jbd2_fc_init(journal_t *journal, int num_fc_blks)
-{
- journal->j_fc_wbufsize = num_fc_blks;
- journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
- sizeof(struct buffer_head *), GFP_KERNEL);
- if (!journal->j_fc_wbuf)
- return -ENOMEM;
- return 0;
-}
-EXPORT_SYMBOL(jbd2_fc_init);
-
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
*
* Create a journal structure assigned some fixed set of disk blocks to
@@ -1512,16 +1493,7 @@ static int journal_reset(journal_t *journal)
}
journal->j_first = first;
-
- if (jbd2_has_feature_fast_commit(journal) &&
- journal->j_fc_wbufsize > 0) {
- journal->j_fc_last = last;
- journal->j_last = last - journal->j_fc_wbufsize;
- journal->j_fc_first = journal->j_last + 1;
- journal->j_fc_off = 0;
- } else {
- journal->j_last = last;
- }
+ journal->j_last = last;
journal->j_head = journal->j_first;
journal->j_tail = journal->j_first;
@@ -1531,7 +1503,14 @@ static int journal_reset(journal_t *journal)
journal->j_commit_sequence = journal->j_transaction_sequence - 1;
journal->j_commit_request = journal->j_commit_sequence;
- journal->j_max_transaction_buffers = journal->j_maxlen / 4;
+ journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
+
+ /*
+ * Now that journal recovery is done, turn fast commits off here. This
+ * way, if fast commit was enabled before the crash but if now FS has
+ * disabled it, we don't enable fast commits.
+ */
+ jbd2_clear_feature_fast_commit(journal);
/*
* As a special case, if the on-disk copy is already marked as needing
@@ -1792,15 +1771,15 @@ static int journal_get_superblock(journal_t *journal)
goto out;
}
- if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
- journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
- else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
+ if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
+ journal->j_total_len = be32_to_cpu(sb->s_maxlen);
+ else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
printk(KERN_WARNING "JBD2: journal file too short\n");
goto out;
}
if (be32_to_cpu(sb->s_first) == 0 ||
- be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
+ be32_to_cpu(sb->s_first) >= journal->j_total_len) {
printk(KERN_WARNING
"JBD2: Invalid start block of journal: %u\n",
be32_to_cpu(sb->s_first));
@@ -1872,6 +1851,7 @@ static int load_superblock(journal_t *journal)
{
int err;
journal_superblock_t *sb;
+ int num_fc_blocks;
err = journal_get_superblock(journal);
if (err)
@@ -1883,15 +1863,17 @@ static int load_superblock(journal_t *journal)
journal->j_tail = be32_to_cpu(sb->s_start);
journal->j_first = be32_to_cpu(sb->s_first);
journal->j_errno = be32_to_cpu(sb->s_errno);
+ journal->j_last = be32_to_cpu(sb->s_maxlen);
- if (jbd2_has_feature_fast_commit(journal) &&
- journal->j_fc_wbufsize > 0) {
+ if (jbd2_has_feature_fast_commit(journal)) {
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
- journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
+ num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
+ if (!num_fc_blocks)
+ num_fc_blocks = JBD2_MIN_FC_BLOCKS;
+ if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
+ journal->j_last = journal->j_fc_last - num_fc_blocks;
journal->j_fc_first = journal->j_last + 1;
journal->j_fc_off = 0;
- } else {
- journal->j_last = be32_to_cpu(sb->s_maxlen);
}
return 0;
@@ -1954,9 +1936,6 @@ int jbd2_journal_load(journal_t *journal)
*/
journal->j_flags &= ~JBD2_ABORT;
- if (journal->j_fc_wbufsize > 0)
- jbd2_journal_set_features(journal, 0, 0,
- JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
* and reset them on disk. */
@@ -2040,8 +2019,7 @@ int jbd2_journal_destroy(journal_t *journal)
jbd2_journal_destroy_revoke(journal);
if (journal->j_chksum_driver)
crypto_free_shash(journal->j_chksum_driver);
- if (journal->j_fc_wbufsize > 0)
- kfree(journal->j_fc_wbuf);
+ kfree(journal->j_fc_wbuf);
kfree(journal->j_wbuf);
kfree(journal);
@@ -2116,6 +2094,37 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp
return 0;
}
+static int
+jbd2_journal_initialize_fast_commit(journal_t *journal)
+{
+ journal_superblock_t *sb = journal->j_superblock;
+ unsigned long long num_fc_blks;
+
+ num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
+ if (num_fc_blks == 0)
+ num_fc_blks = JBD2_MIN_FC_BLOCKS;
+ if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
+ return -ENOSPC;
+
+ /* Are we called twice? */
+ WARN_ON(journal->j_fc_wbuf != NULL);
+ journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
+ sizeof(struct buffer_head *), GFP_KERNEL);
+ if (!journal->j_fc_wbuf)
+ return -ENOMEM;
+
+ journal->j_fc_wbufsize = num_fc_blks;
+ journal->j_fc_last = journal->j_last;
+ journal->j_last = journal->j_fc_last - num_fc_blks;
+ journal->j_fc_first = journal->j_last + 1;
+ journal->j_fc_off = 0;
+ journal->j_free = journal->j_last - journal->j_first;
+ journal->j_max_transaction_buffers =
+ jbd2_journal_get_max_txn_bufs(journal);
+
+ return 0;
+}
+
/**
* int jbd2_journal_set_features() - Mark a given journal feature in the superblock
* @journal: Journal to act on.
@@ -2159,6 +2168,13 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
sb = journal->j_superblock;
+ if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
+ if (jbd2_journal_initialize_fast_commit(journal)) {
+ pr_err("JBD2: Cannot enable fast commits.\n");
+ return 0;
+ }
+ }
+
/* Load the checksum driver if necessary */
if ((journal->j_chksum_driver == NULL) &&
INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index eb2606133cd8..dc0694fcfcd1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -74,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start)
/* Do up to 128K of readahead */
max = start + (128 * 1024 / journal->j_blocksize);
- if (max > journal->j_maxlen)
- max = journal->j_maxlen;
+ if (max > journal->j_total_len)
+ max = journal->j_total_len;
/* Do the readahead itself. We'll submit MAXBUF buffer_heads at
* a time to the block device IO layer. */
@@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
*bhp = NULL;
- if (offset >= journal->j_maxlen) {
+ if (offset >= journal->j_total_len) {
printk(KERN_ERR "JBD2: corrupted journal superblock\n");
return -EFSCORRUPTED;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 43985738aa86..d54f04674e8e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -195,8 +195,10 @@ static void wait_transaction_switching(journal_t *journal)
DEFINE_WAIT(wait);
if (WARN_ON(!journal->j_running_transaction ||
- journal->j_running_transaction->t_state != T_SWITCH))
+ journal->j_running_transaction->t_state != T_SWITCH)) {
+ read_unlock(&journal->j_state_lock);
return;
+ }
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
read_unlock(&journal->j_state_lock);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index cb52db9a0cfb..4e011adaf967 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -955,7 +955,6 @@ out:
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
{
- struct inode *inode = file_inode(filp);
struct nfs_open_dir_context *dir_ctx = filp->private_data;
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
@@ -967,15 +966,15 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
case SEEK_SET:
if (offset < 0)
return -EINVAL;
- inode_lock(inode);
+ spin_lock(&filp->f_lock);
break;
case SEEK_CUR:
if (offset == 0)
return filp->f_pos;
- inode_lock(inode);
+ spin_lock(&filp->f_lock);
offset += filp->f_pos;
if (offset < 0) {
- inode_unlock(inode);
+ spin_unlock(&filp->f_lock);
return -EINVAL;
}
}
@@ -987,7 +986,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
dir_ctx->dir_cookie = 0;
dir_ctx->duped = 0;
}
- inode_unlock(inode);
+ spin_unlock(&filp->f_lock);
return offset;
}
@@ -998,13 +997,9 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
int datasync)
{
- struct inode *inode = file_inode(filp);
-
dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
- inode_lock(inode);
- nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- inode_unlock(inode);
+ nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
return 0;
}
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index b51424ff8159..6c2ce799150f 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -1047,8 +1047,10 @@ out4:
void nfs4_xattr_cache_exit(void)
{
+ unregister_shrinker(&nfs4_xattr_large_entry_shrinker);
unregister_shrinker(&nfs4_xattr_entry_shrinker);
unregister_shrinker(&nfs4_xattr_cache_shrinker);
+ list_lru_destroy(&nfs4_xattr_large_entry_lru);
list_lru_destroy(&nfs4_xattr_entry_lru);
list_lru_destroy(&nfs4_xattr_cache_lru);
kmem_cache_destroy(nfs4_xattr_cache_cachep);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0dc31ad2362e..6e060a88f98c 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -196,7 +196,7 @@
1 + nfs4_xattr_name_maxsz + 1)
#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1)
-#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1)
+#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1)
#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \
nfs4_xattr_name_maxsz)
#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \
@@ -531,7 +531,7 @@ static void encode_listxattrs(struct xdr_stream *xdr,
{
__be32 *p;
- encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz + 1, hdr);
+ encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz, hdr);
p = reserve_space(xdr, 12);
if (unlikely(!p))
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8d3278805602..fa148308822c 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -88,7 +88,13 @@
#define NFS_ROOT "/tftpboot/%s"
/* Default NFSROOT mount options. */
+#if defined(CONFIG_NFS_V2)
#define NFS_DEF_OPTIONS "vers=2,tcp,rsize=4096,wsize=4096"
+#elif defined(CONFIG_NFS_V3)
+#define NFS_DEF_OPTIONS "vers=3,tcp,rsize=4096,wsize=4096"
+#else
+#define NFS_DEF_OPTIONS "vers=4,tcp,rsize=4096,wsize=4096"
+#endif
/* Parameters passed from the kernel command line */
static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = "";
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 14468613d150..a633044b0dc1 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -316,10 +316,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
- if (argp->ftype == 0 || argp->ftype >= NF3BAD) {
- resp->status = nfserr_inval;
- goto out;
- }
if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) {
rdev = MKDEV(argp->major, argp->minor);
if (MAJOR(rdev) != argp->major ||
@@ -328,7 +324,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
goto out;
}
} else if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) {
- resp->status = nfserr_inval;
+ resp->status = nfserr_badtype;
goto out;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 9c23b6acf234..2277f83da250 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1114,6 +1114,7 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+ *p++ = resp->status;
*p++ = xdr_zero; /* no post_op_attr */
if (resp->status == 0) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ad2fa1a8e7ad..e83b21778816 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1299,7 +1299,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
struct nfsd_file *dst)
{
nfs42_ssc_close(src->nf_file);
- nfsd_file_put(src);
+ /* 'src' is freed by nfsd4_do_async_copy */
nfsd_file_put(dst);
mntput(ss_mnt);
}
@@ -1486,6 +1486,7 @@ do_callback:
cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!cb_copy)
goto out;
+ refcount_set(&cb_copy->refcount, 1);
memcpy(&cb_copy->cp_res, &copy->cp_res, sizeof(copy->cp_res));
cb_copy->cp_clp = copy->cp_clp;
cb_copy->nfserr = copy->nfserr;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index b9a9d69dde7e..db52e843002a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -877,7 +877,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
goto done;
}
- trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
+ trace_ocfs2_journal_init_maxlen(j_journal->j_total_len);
*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
OCFS2_JOURNAL_DIRTY_FL);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 1d91dd1e8711..2febc76e9de7 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1713,6 +1713,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_blkno = 0ULL;
oi->ip_clusters = 0;
+ oi->ip_next_orphan = NULL;
ocfs2_resv_init_once(&oi->ip_la_data_resv);
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index d0989a443c77..419760fd77bd 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -19,7 +19,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
static const struct proc_ops cpuinfo_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = cpuinfo_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
};
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2f9fa179194d..b84663252add 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -590,7 +590,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
static const struct proc_ops proc_seq_ops = {
/* not permanent -- can call into arbitrary seq_operations */
.proc_open = proc_seq_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = proc_seq_release,
};
@@ -621,7 +621,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
static const struct proc_ops proc_single_ops = {
/* not permanent -- can call into arbitrary ->single_show */
.proc_open = proc_single_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 58c075e2a452..bde6b6f69852 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -597,6 +597,7 @@ static const struct file_operations proc_iter_file_ops = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
+ .splice_read = generic_file_splice_read,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
@@ -622,6 +623,7 @@ static const struct file_operations proc_reg_file_ops_compat = {
static const struct file_operations proc_iter_file_ops_compat = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
+ .splice_read = generic_file_splice_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 46b3293015fe..4695b6de3151 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -226,7 +226,7 @@ static int stat_open(struct inode *inode, struct file *file)
static const struct proc_ops stat_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = stat_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 31219c1db17d..3b20e21604e7 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/string_helpers.h>
+#include <linux/uio.h>
#include <linux/uaccess.h>
#include <asm/page.h>
@@ -146,7 +147,28 @@ Eoverflow:
*/
ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{
- struct seq_file *m = file->private_data;
+ struct iovec iov = { .iov_base = buf, .iov_len = size};
+ struct kiocb kiocb;
+ struct iov_iter iter;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, file);
+ iov_iter_init(&iter, READ, &iov, 1, size);
+
+ kiocb.ki_pos = *ppos;
+ ret = seq_read_iter(&kiocb, &iter);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+EXPORT_SYMBOL(seq_read);
+
+/*
+ * Ready-made ->f_op->read_iter()
+ */
+ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct seq_file *m = iocb->ki_filp->private_data;
+ size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
@@ -158,14 +180,14 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
* if request is to read from zero offset, reset iterator to first
* record as it might have been already advanced by previous requests
*/
- if (*ppos == 0) {
+ if (iocb->ki_pos == 0) {
m->index = 0;
m->count = 0;
}
- /* Don't assume *ppos is where we left it */
- if (unlikely(*ppos != m->read_pos)) {
- while ((err = traverse(m, *ppos)) == -EAGAIN)
+ /* Don't assume ki_pos is where we left it */
+ if (unlikely(iocb->ki_pos != m->read_pos)) {
+ while ((err = traverse(m, iocb->ki_pos)) == -EAGAIN)
;
if (err) {
/* With prejudice... */
@@ -174,7 +196,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
m->count = 0;
goto Done;
} else {
- m->read_pos = *ppos;
+ m->read_pos = iocb->ki_pos;
}
}
@@ -187,13 +209,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* if not empty - flush it first */
if (m->count) {
n = min(m->count, size);
- err = copy_to_user(buf, m->buf + m->from, n);
- if (err)
+ if (copy_to_iter(m->buf + m->from, n, iter) != n)
goto Efault;
m->count -= n;
m->from += n;
size -= n;
- buf += n;
copied += n;
if (!size)
goto Done;
@@ -254,8 +274,7 @@ Fill:
}
m->op->stop(m, p);
n = min(m->count, size);
- err = copy_to_user(buf, m->buf, n);
- if (err)
+ if (copy_to_iter(m->buf, n, iter) != n)
goto Efault;
copied += n;
m->count -= n;
@@ -264,7 +283,7 @@ Done:
if (!copied)
copied = err;
else {
- *ppos += copied;
+ iocb->ki_pos += copied;
m->read_pos += copied;
}
mutex_unlock(&m->lock);
@@ -276,7 +295,7 @@ Efault:
err = -EFAULT;
goto Done;
}
-EXPORT_SYMBOL(seq_read);
+EXPORT_SYMBOL(seq_read_iter);
/**
* seq_lseek - ->llseek() method for sequential files.
diff --git a/fs/super.c b/fs/super.c
index a51c2083cd6b..98bb0629ee10 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1631,55 +1631,6 @@ int super_setup_bdi(struct super_block *sb)
}
EXPORT_SYMBOL(super_setup_bdi);
-/*
- * This is an internal function, please use sb_end_{write,pagefault,intwrite}
- * instead.
- */
-void __sb_end_write(struct super_block *sb, int level)
-{
- percpu_up_read(sb->s_writers.rw_sem + level-1);
-}
-EXPORT_SYMBOL(__sb_end_write);
-
-/*
- * This is an internal function, please use sb_start_{write,pagefault,intwrite}
- * instead.
- */
-int __sb_start_write(struct super_block *sb, int level, bool wait)
-{
- bool force_trylock = false;
- int ret = 1;
-
-#ifdef CONFIG_LOCKDEP
- /*
- * We want lockdep to tell us about possible deadlocks with freezing
- * but it's it bit tricky to properly instrument it. Getting a freeze
- * protection works as getting a read lock but there are subtle
- * problems. XFS for example gets freeze protection on internal level
- * twice in some cases, which is OK only because we already hold a
- * freeze protection also on higher level. Due to these cases we have
- * to use wait == F (trylock mode) which must not fail.
- */
- if (wait) {
- int i;
-
- for (i = 0; i < level - 1; i++)
- if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
- force_trylock = true;
- break;
- }
- }
-#endif
- if (wait && !force_trylock)
- percpu_down_read(sb->s_writers.rw_sem + level-1);
- else
- ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
-
- WARN_ON(force_trylock && !ret);
- return ret;
-}
-EXPORT_SYMBOL(__sb_start_write);
-
/**
* sb_wait_write - wait until all writers to given file system finish
* @sb: the super for which we wait
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 852b536551b5..15640015be9d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2467,6 +2467,7 @@ xfs_defer_agfl_block(
new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
new->xefi_blockcount = 1;
new->xefi_oinfo = *oinfo;
+ new->xefi_skip_discard = false;
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index e1bd484e5548..6747e97a7949 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -52,9 +52,9 @@ struct xfs_extent_free_item
{
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
+ bool xefi_skip_discard;
struct list_head xefi_list;
struct xfs_owner_info xefi_oinfo; /* extent owner */
- bool xefi_skip_discard;
};
#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 340c83f76c80..2668ebe02865 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1514,7 +1514,7 @@ xfs_rmap_convert_shared(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
+ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
&PREV, &i);
if (error)
goto done;
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index beb81c84a937..577a66381327 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -243,8 +243,8 @@ xfs_rmapbt_key_diff(
else if (y > x)
return -1;
- x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
- y = rec->rm_offset;
+ x = be64_to_cpu(kp->rm_offset);
+ y = xfs_rmap_irec_offset_pack(rec);
if (x > y)
return 1;
else if (y > x)
@@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys(
else if (y > x)
return -1;
- x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
- y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
+ x = be64_to_cpu(kp1->rm_offset);
+ y = be64_to_cpu(kp2->rm_offset);
if (x > y)
return 1;
else if (y > x)
@@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+ a = be64_to_cpu(k1->rmap.rm_offset);
+ b = be64_to_cpu(k2->rmap.rm_offset);
if (a <= b)
return 1;
return 0;
@@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+ a = be64_to_cpu(r1->rmap.rm_offset);
+ b = be64_to_cpu(r2->rmap.rm_offset);
if (a <= b)
return 1;
return 0;
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 955302e7cdde..412e2ec55e38 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -113,6 +113,8 @@ xchk_bmap_get_rmap(
if (info->whichfork == XFS_ATTR_FORK)
rflags |= XFS_RMAP_ATTR_FORK;
+ if (irec->br_state == XFS_EXT_UNWRITTEN)
+ rflags |= XFS_RMAP_UNWRITTEN;
/*
* CoW staging extents are owned (on disk) by the refcountbt, so
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 3aa85b64de36..bb25ff1b770d 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -121,8 +121,7 @@ xchk_inode_flags(
goto bad;
/* rt flags require rt device */
- if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
- !mp->m_rtdev_targp)
+ if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
goto bad;
/* new rt bitmap flag only valid for rbmino */
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index beaeb6fa3119..dd672e6bbc75 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -170,7 +170,6 @@ xchk_refcountbt_process_rmap_fragments(
*/
INIT_LIST_HEAD(&worklist);
rbno = NULLAGBLOCK;
- nr = 1;
/* Make sure the fragments actually /are/ in agbno order. */
bno = 0;
@@ -184,15 +183,14 @@ xchk_refcountbt_process_rmap_fragments(
* Find all the rmaps that start at or before the refc extent,
* and put them on the worklist.
*/
+ nr = 0;
list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
- if (frag->rm.rm_startblock > refchk->bno)
- goto done;
+ if (frag->rm.rm_startblock > refchk->bno || nr > target_nr)
+ break;
bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
if (bno < rbno)
rbno = bno;
list_move_tail(&frag->list, &worklist);
- if (nr == target_nr)
- break;
nr++;
}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 55d126d4e096..4304c6416fbb 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -346,8 +346,8 @@ xfs_map_blocks(
ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
- xfs_fileoff_t cow_fsb = NULLFILEOFF;
- int whichfork = XFS_DATA_FORK;
+ xfs_fileoff_t cow_fsb;
+ int whichfork;
struct xfs_bmbt_irec imap;
struct xfs_iext_cursor icur;
int retries = 0;
@@ -381,6 +381,8 @@ xfs_map_blocks(
* landed in a hole and we skip the block.
*/
retry:
+ cow_fsb = NULLFILEOFF;
+ whichfork = XFS_DATA_FORK;
xfs_ilock(ip, XFS_ILOCK_SHARED);
ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
(ip->i_df.if_flags & XFS_IFEXTENTS));
@@ -527,13 +529,15 @@ xfs_prepare_ioend(
*/
static void
xfs_discard_page(
- struct page *page)
+ struct page *page,
+ loff_t fileoff)
{
struct inode *inode = page->mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- loff_t offset = page_offset(page);
- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset);
+ unsigned int pageoff = offset_in_page(fileoff);
+ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff);
+ xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff);
int error;
if (XFS_FORCED_SHUTDOWN(mp))
@@ -541,14 +545,14 @@ xfs_discard_page(
xfs_alert_ratelimited(mp,
"page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
- page, ip->i_ino, offset);
+ page, ip->i_ino, fileoff);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
- i_blocks_per_page(inode, page));
+ i_blocks_per_page(inode, page) - pageoff_fsb);
if (error && !XFS_FORCED_SHUTDOWN(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
out_invalidate:
- iomap_invalidatepage(page, 0, PAGE_SIZE);
+ iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff);
}
static const struct iomap_writeback_ops xfs_writeback_ops = {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 5e165456da68..1414ab79eacf 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -911,6 +911,16 @@ xfs_setattr_size(
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
&did_zeroing, &xfs_buffered_write_iomap_ops);
} else {
+ /*
+ * iomap won't detect a dirty page over an unwritten block (or a
+ * cow block over a hole) and subsequently skips zeroing the
+ * newly post-EOF portion of the page. Flush the new EOF to
+ * convert the block before the pagecache truncate.
+ */
+ error = filemap_write_and_wait_range(inode->i_mapping, newsize,
+ newsize);
+ if (error)
+ return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_buffered_write_iomap_ops);
}
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index b101feb2aab4..f3082a957d5e 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -134,7 +134,7 @@ xfs_fs_map_blocks(
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 16098dc42add..6fa05fb78189 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1502,7 +1502,8 @@ xfs_reflink_unshare(
&xfs_buffered_write_iomap_ops);
if (error)
goto out;
- error = filemap_write_and_wait(inode->i_mapping);
+
+ error = filemap_write_and_wait_range(inode->i_mapping, offset, len);
if (error)
goto out;