summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/9p/vfs_inode_dotl.c3
-rw-r--r--fs/9p/vfs_super.c6
-rw-r--r--fs/adfs/super.c4
-rw-r--r--fs/affs/amigaffs.c2
-rw-r--r--fs/affs/bitmap.c6
-rw-r--r--fs/affs/super.c16
-rw-r--r--fs/afs/cell.c7
-rw-r--r--fs/afs/dir.c25
-rw-r--r--fs/afs/flock.c548
-rw-r--r--fs/afs/internal.h28
-rw-r--r--fs/afs/rotate.c70
-rw-r--r--fs/afs/security.c22
-rw-r--r--fs/afs/server_list.c2
-rw-r--r--fs/afs/super.c18
-rw-r--r--fs/afs/write.c5
-rw-r--r--fs/aio.c55
-rw-r--r--fs/autofs4/root.c17
-rw-r--r--fs/autofs4/waitq.c15
-rw-r--r--fs/befs/ChangeLog2
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/btrfs/compression.c9
-rw-r--r--fs/btrfs/compression.h5
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c10
-rw-r--r--fs/btrfs/extent-tree.c14
-rw-r--r--fs/btrfs/extent_io.c4
-rw-r--r--fs/btrfs/extent_io.h8
-rw-r--r--fs/btrfs/file.c130
-rw-r--r--fs/btrfs/free-space-cache.c3
-rw-r--r--fs/btrfs/inode.c34
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/send.c124
-rw-r--r--fs/btrfs/super.c63
-rw-r--r--fs/btrfs/tests/extent-io-tests.c6
-rw-r--r--fs/btrfs/tests/inode-tests.c12
-rw-r--r--fs/btrfs/tree-checker.c27
-rw-r--r--fs/btrfs/tree-checker.h14
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c36
-rw-r--r--fs/ceph/caps.c9
-rw-r--r--fs/ceph/inode.c9
-rw-r--r--fs/ceph/locks.c177
-rw-r--r--fs/ceph/mds_client.c96
-rw-r--r--fs/ceph/super.c13
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/cifs/cifs_fs_sb.h2
-rw-r--r--fs/cifs/cifsfs.c12
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/xattr.c8
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/upcall.c3
-rw-r--r--fs/compat_ioctl.c123
-rw-r--r--fs/coredump.c7
-rw-r--r--fs/cramfs/Kconfig39
-rw-r--r--fs/cramfs/README31
-rw-r--r--fs/cramfs/inode.c513
-rw-r--r--fs/dax.c322
-rw-r--r--fs/ecryptfs/crypto.c44
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h9
-rw-r--r--fs/ecryptfs/inode.c4
-rw-r--r--fs/ecryptfs/keystore.c48
-rw-r--r--fs/ecryptfs/main.c12
-rw-r--r--fs/ecryptfs/messaging.c13
-rw-r--r--fs/ecryptfs/miscdev.c8
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/efs/super.c4
-rw-r--r--fs/eventpoll.c135
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext2/balloc.c4
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/ialloc.c4
-rw-r--r--fs/ext2/super.c20
-rw-r--r--fs/ext4/file.c26
-rw-r--r--fs/ext4/inode.c19
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/super.c52
-rw-r--r--fs/f2fs/checkpoint.c10
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/gc.c2
-rw-r--r--fs/f2fs/recovery.c10
-rw-r--r--fs/f2fs/super.c28
-rw-r--r--fs/fat/dir.c1
-rw-r--r--fs/fat/fatent.c6
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/fat/misc.c2
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fcntl.c16
-rw-r--r--fs/fhandle.c4
-rw-r--r--fs/file.c12
-rw-r--r--fs/freevxfs/vxfs_super.c4
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/inode.c12
-rw-r--r--fs/gfs2/ops_fstype.c16
-rw-r--r--fs/gfs2/super.c10
-rw-r--r--fs/gfs2/trans.c2
-rw-r--r--fs/hfs/bnode.c4
-rw-r--r--fs/hfs/mdb.c4
-rw-r--r--fs/hfs/super.c16
-rw-r--r--fs/hfsplus/bnode.c4
-rw-r--r--fs/hfsplus/super.c22
-rw-r--r--fs/hpfs/map.c2
-rw-r--r--fs/hpfs/super.c8
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c10
-rw-r--r--fs/internal.h1
-rw-r--r--fs/iomap.c24
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jffs2/fs.c4
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/super.c4
-rw-r--r--fs/jfs/super.c10
-rw-r--r--fs/kernfs/mount.c2
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/lockd/host.c24
-rw-r--r--fs/lockd/mon.c3
-rw-r--r--fs/lockd/svc.c58
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/namei.c29
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/cache_lib.c6
-rw-r--r--fs/nfs/cache_lib.h2
-rw-r--r--fs/nfs/callback.c14
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c10
-rw-r--r--fs/nfs/delegation.c27
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c52
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfs/filelayout/filelayout.c12
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c20
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h3
-rw-r--r--fs/nfs/inode.c18
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs3proc.c17
-rw-r--r--fs/nfs/nfs4_fs.h12
-rw-r--r--fs/nfs/nfs4client.c12
-rw-r--r--fs/nfs/nfs4proc.c511
-rw-r--r--fs/nfs/nfs4state.c53
-rw-r--r--fs/nfs/nfs4trace.h26
-rw-r--r--fs/nfs/nfs4xdr.c12
-rw-r--r--fs/nfs/pnfs.c44
-rw-r--r--fs/nfs/pnfs.h15
-rw-r--r--fs/nfs/pnfs_nfs.c10
-rw-r--r--fs/nfs/super.c36
-rw-r--r--fs/nfs/write.c17
-rw-r--r--fs/nfs_common/grace.c34
-rw-r--r--fs/nfsd/export.c10
-rw-r--r--fs/nfsd/fault_inject.c5
-rw-r--r--fs/nfsd/netns.h5
-rw-r--r--fs/nfsd/nfs3xdr.c10
-rw-r--r--fs/nfsd/nfs4idmap.c4
-rw-r--r--fs/nfsd/nfs4layouts.c4
-rw-r--r--fs/nfsd/nfs4proc.c19
-rw-r--r--fs/nfsd/nfs4state.c404
-rw-r--r--fs/nfsd/nfsctl.c3
-rw-r--r--fs/nfsd/nfssvc.c18
-rw-r--r--fs/nfsd/state.h11
-rw-r--r--fs/nfsd/xdr4.h13
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/nilfs2/segment.c19
-rw-r--r--fs/nilfs2/segment.h1
-rw-r--r--fs/nilfs2/sufile.c32
-rw-r--r--fs/nilfs2/super.c25
-rw-r--r--fs/nilfs2/the_nilfs.c14
-rw-r--r--fs/nilfs2/the_nilfs.h5
-rw-r--r--fs/notify/fsnotify.c2
-rw-r--r--fs/nsfs.c2
-rw-r--r--fs/ntfs/super.c32
-rw-r--r--fs/ocfs2/cluster/tcp.c9
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/super.c28
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/orangefs/acl.c10
-rw-r--r--fs/orangefs/dir.c1
-rw-r--r--fs/orangefs/file.c16
-rw-r--r--fs/orangefs/inode.c17
-rw-r--r--fs/orangefs/namei.c45
-rw-r--r--fs/orangefs/orangefs-debug.h4
-rw-r--r--fs/orangefs/orangefs-kernel.h37
-rw-r--r--fs/orangefs/orangefs-utils.c86
-rw-r--r--fs/orangefs/super.c23
-rw-r--r--fs/orangefs/symlink.c1
-rw-r--r--fs/overlayfs/copy_up.c8
-rw-r--r--fs/overlayfs/dir.c25
-rw-r--r--fs/overlayfs/inode.c63
-rw-r--r--fs/overlayfs/namei.c59
-rw-r--r--fs/overlayfs/overlayfs.h13
-rw-r--r--fs/overlayfs/ovl_entry.h14
-rw-r--r--fs/overlayfs/readdir.c55
-rw-r--r--fs/overlayfs/super.c690
-rw-r--r--fs/overlayfs/util.c21
-rw-r--r--fs/pipe.c23
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c6
-rw-r--r--fs/proc/base.c5
-rw-r--r--fs/proc/cpuinfo.c6
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h25
-rw-r--r--fs/proc/loadavg.c2
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/proc/util.c23
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--fs/pstore/platform.c6
-rw-r--r--fs/qnx4/inode.c4
-rw-r--r--fs/qnx6/inode.c4
-rw-r--r--fs/quota/dquot.c22
-rw-r--r--fs/read_write.c21
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/super.c19
-rw-r--r--fs/reiserfs/xattr.c10
-rw-r--r--fs/romfs/super.c4
-rw-r--r--fs/select.c68
-rw-r--r--fs/signalfd.c4
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/statfs.c8
-rw-r--r--fs/super.c46
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/sysv/super.c2
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/ubifs/io.c2
-rw-r--r--fs/ubifs/super.c20
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/udf/super.c6
-rw-r--r--fs/ufs/balloc.c8
-rw-r--r--fs/ufs/ialloc.c10
-rw-r--r--fs/ufs/super.c30
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c6
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c8
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h4
-rw-r--r--fs/xfs/scrub/inode.c14
-rw-r--r--fs/xfs/scrub/quota.c4
-rw-r--r--fs/xfs/xfs_aops.c12
-rw-r--r--fs/xfs/xfs_bmap_item.c23
-rw-r--r--fs/xfs/xfs_bmap_item.h3
-rw-r--r--fs/xfs/xfs_buf.c15
-rw-r--r--fs/xfs/xfs_dquot.c14
-rw-r--r--fs/xfs/xfs_dquot_item.c40
-rw-r--r--fs/xfs/xfs_file.c44
-rw-r--r--fs/xfs/xfs_inode.c22
-rw-r--r--fs/xfs/xfs_iomap.c5
-rw-r--r--fs/xfs/xfs_linux.h10
-rw-r--r--fs/xfs/xfs_log.c6
-rw-r--r--fs/xfs/xfs_log_recover.c75
-rw-r--r--fs/xfs/xfs_refcount_item.c21
-rw-r--r--fs/xfs/xfs_refcount_item.h3
-rw-r--r--fs/xfs/xfs_super.c8
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_trace.h2
262 files changed, 4420 insertions, 2882 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2a5de610dd8f..bdabb2765d1b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -483,6 +483,9 @@ static int v9fs_test_inode(struct inode *inode, void *data)
if (v9inode->qid.type != st->qid.type)
return 0;
+
+ if (v9inode->qid.path != st->qid.path)
+ return 0;
return 1;
}
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 70f9887c59a9..7f6ae21a27b3 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -87,6 +87,9 @@ static int v9fs_test_inode_dotl(struct inode *inode, void *data)
if (v9inode->qid.type != st->qid.type)
return 0;
+
+ if (v9inode->qid.path != st->qid.path)
+ return 0;
return 1;
}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8b75463cb211..af03c2a901eb 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -94,13 +94,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
if (v9ses->cache)
sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
- sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
+ sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME;
if (!v9ses->cache)
- sb->s_flags |= MS_SYNCHRONOUS;
+ sb->s_flags |= SB_SYNCHRONOUS;
#ifdef CONFIG_9P_FS_POSIX_ACL
if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
#endif
return 0;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index c9fdfb112933..cfda2c7caedc 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -213,7 +213,7 @@ static int parse_options(struct super_block *sb, char *options)
static int adfs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_NODIRATIME;
+ *flags |= SB_NODIRATIME;
return parse_options(sb, data);
}
@@ -372,7 +372,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root;
int ret = -EINVAL;
- sb->s_flags |= MS_NODIRATIME;
+ sb->s_flags |= SB_NODIRATIME;
asb = kzalloc(sizeof(*asb), GFP_KERNEL);
if (!asb)
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 185d5ab7e986..0f0e6925e97d 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -453,7 +453,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
pr_crit("error (device %s): %s(): %pV\n", sb->s_id, function, &vaf);
if (!sb_rdonly(sb))
pr_warn("Remounting filesystem read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
va_end(args);
}
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 2b1399611d9e..5ba9ef2742f6 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -250,12 +250,12 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
int i, res = 0;
struct affs_sb_info *sbi = AFFS_SB(sb);
- if (*flags & MS_RDONLY)
+ if (*flags & SB_RDONLY)
return 0;
if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) {
pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
@@ -288,7 +288,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
if (affs_checksum_block(sb, bh)) {
pr_warn("Bitmap %u invalid - mounting %s read only.\n",
bm->bm_key, sb->s_id);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
goto out;
}
pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 884bedab7266..1117e36134cc 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -356,7 +356,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = AFFS_SUPER_MAGIC;
sb->s_op = &affs_sops;
- sb->s_flags |= MS_NODIRATIME;
+ sb->s_flags |= SB_NODIRATIME;
sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL);
if (!sbi)
@@ -466,7 +466,7 @@ got_root:
if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS
|| chksum == MUFS_DCOFS) && !sb_rdonly(sb)) {
pr_notice("Dircache FS - mounting %s read only\n", sb->s_id);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
switch (chksum) {
case MUFS_FS:
@@ -488,7 +488,7 @@ got_root:
/* fall thru */
case FS_OFS:
affs_set_opt(sbi->s_flags, SF_OFS);
- sb->s_flags |= MS_NOEXEC;
+ sb->s_flags |= SB_NOEXEC;
break;
case MUFS_DCOFS:
case MUFS_INTLOFS:
@@ -497,7 +497,7 @@ got_root:
case FS_INTLOFS:
affs_set_opt(sbi->s_flags, SF_INTL);
affs_set_opt(sbi->s_flags, SF_OFS);
- sb->s_flags |= MS_NOEXEC;
+ sb->s_flags |= SB_NOEXEC;
break;
default:
pr_err("Unknown filesystem on device %s: %08X\n",
@@ -513,7 +513,7 @@ got_root:
sig, sig[3] + '0', blocksize);
}
- sb->s_flags |= MS_NODEV | MS_NOSUID;
+ sb->s_flags |= SB_NODEV | SB_NOSUID;
sbi->s_data_blksize = sb->s_blocksize;
if (affs_test_opt(sbi->s_flags, SF_OFS))
@@ -570,7 +570,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
sync_filesystem(sb);
- *flags |= MS_NODIRATIME;
+ *flags |= SB_NODIRATIME;
memcpy(volume, sbi->s_volume, 32);
if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block,
@@ -596,10 +596,10 @@ affs_remount(struct super_block *sb, int *flags, char *data)
memcpy(sbi->s_volume, volume, 32);
spin_unlock(&sbi->symlink_lock);
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
return 0;
- if (*flags & MS_RDONLY)
+ if (*flags & SB_RDONLY)
affs_free_bitmap(sb);
else
res = affs_init_bitmap(sb, flags);
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 1858c91169e4..9bb921d120d0 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -207,13 +207,8 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
rcu_read_lock();
cell = afs_lookup_cell_rcu(net, name, namesz);
rcu_read_unlock();
- if (!IS_ERR(cell)) {
- if (excl) {
- afs_put_cell(net, cell);
- return ERR_PTR(-EEXIST);
- }
+ if (!IS_ERR(cell))
goto wait_for_cell;
- }
}
/* Assume we're probably going to create a cell and preallocate and
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ab618d32554c..ff8d5bf4354f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -765,6 +765,8 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
if (fc->ac.error < 0)
return;
+ d_drop(new_dentry);
+
inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
newfid, newstatus, newcb, fc->cbi);
if (IS_ERR(inode)) {
@@ -775,9 +777,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
return;
}
- d_instantiate(new_dentry, inode);
- if (d_unhashed(new_dentry))
- d_rehash(new_dentry);
+ d_add(new_dentry, inode);
}
/*
@@ -818,6 +818,8 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
+ } else {
+ goto error_key;
}
key_put(key);
@@ -972,7 +974,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct afs_fs_cursor fc;
struct afs_file_status newstatus;
struct afs_callback newcb;
- struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir);
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_fid newfid;
struct key *key;
int ret;
@@ -1006,6 +1008,8 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
+ } else {
+ goto error_key;
}
key_put(key);
@@ -1053,7 +1057,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
- return -ERESTARTSYS;
+ goto error_key;
}
while (afs_select_fileserver(&fc)) {
@@ -1071,6 +1075,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
+ } else {
+ goto error_key;
}
key_put(key);
@@ -1130,6 +1136,8 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
+ } else {
+ goto error_key;
}
key_put(key);
@@ -1180,7 +1188,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (orig_dvnode != new_dvnode) {
if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
- return -ERESTARTSYS;
+ goto error_key;
}
}
while (afs_select_fileserver(&fc)) {
@@ -1199,14 +1207,9 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto error_key;
}
- key_put(key);
- _leave(" = 0");
- return 0;
-
error_key:
key_put(key);
error:
- d_drop(new_dentry);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 7571a5dfd5a3..c40ba2fe3cbe 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -170,7 +170,7 @@ void afs_lock_work(struct work_struct *work)
{
struct afs_vnode *vnode =
container_of(work, struct afs_vnode, lock_work.work);
- struct file_lock *fl;
+ struct file_lock *fl, *next;
afs_lock_type_t type;
struct key *key;
int ret;
@@ -179,117 +179,136 @@ void afs_lock_work(struct work_struct *work)
spin_lock(&vnode->lock);
- if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) {
+again:
+ _debug("wstate %u for %p", vnode->lock_state, vnode);
+ switch (vnode->lock_state) {
+ case AFS_VNODE_LOCK_NEED_UNLOCK:
_debug("unlock");
+ vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
spin_unlock(&vnode->lock);
/* attempt to release the server lock; if it fails, we just
- * wait 5 minutes and it'll time out anyway */
- ret = afs_release_lock(vnode, vnode->unlock_key);
+ * wait 5 minutes and it'll expire anyway */
+ ret = afs_release_lock(vnode, vnode->lock_key);
if (ret < 0)
printk(KERN_WARNING "AFS:"
" Failed to release lock on {%x:%x} error %d\n",
vnode->fid.vid, vnode->fid.vnode, ret);
spin_lock(&vnode->lock);
- key_put(vnode->unlock_key);
- vnode->unlock_key = NULL;
- clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags);
- }
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
+ vnode->lock_state = AFS_VNODE_LOCK_NONE;
+
+ if (list_empty(&vnode->pending_locks)) {
+ spin_unlock(&vnode->lock);
+ return;
+ }
+
+ /* The new front of the queue now owns the state variables. */
+ next = list_entry(vnode->pending_locks.next,
+ struct file_lock, fl_u.afs.link);
+ vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+ vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
+ goto again;
- /* if we've got a lock, then it must be time to extend that lock as AFS
- * locks time out after 5 minutes */
- if (!list_empty(&vnode->granted_locks)) {
+ /* If we've already got a lock, then it must be time to extend that
+ * lock as AFS locks time out after 5 minutes.
+ */
+ case AFS_VNODE_LOCK_GRANTED:
_debug("extend");
- if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
- BUG();
- fl = list_entry(vnode->granted_locks.next,
- struct file_lock, fl_u.afs.link);
- key = key_get(afs_file_key(fl->fl_file));
+ ASSERT(!list_empty(&vnode->granted_locks));
+
+ key = key_get(vnode->lock_key);
+ vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
spin_unlock(&vnode->lock);
- ret = afs_extend_lock(vnode, key);
- clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+ ret = afs_extend_lock(vnode, key); /* RPC */
key_put(key);
- switch (ret) {
- case 0:
+
+ if (ret < 0)
+ pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
+ vnode->fid.vid, vnode->fid.vnode, ret);
+
+ spin_lock(&vnode->lock);
+
+ if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
+ goto again;
+ vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+
+ if (ret == 0)
afs_schedule_lock_extension(vnode);
- break;
- default:
- /* ummm... we failed to extend the lock - retry
- * extension shortly */
- printk(KERN_WARNING "AFS:"
- " Failed to extend lock on {%x:%x} error %d\n",
- vnode->fid.vid, vnode->fid.vnode, ret);
+ else
queue_delayed_work(afs_lock_manager, &vnode->lock_work,
HZ * 10);
- break;
- }
- _leave(" [extend]");
+ spin_unlock(&vnode->lock);
+ _leave(" [ext]");
return;
- }
- /* if we don't have a granted lock, then we must've been called back by
- * the server, and so if might be possible to get a lock we're
- * currently waiting for */
- if (!list_empty(&vnode->pending_locks)) {
+ /* If we don't have a granted lock, then we must've been called
+ * back by the server, and so if might be possible to get a
+ * lock we're currently waiting for.
+ */
+ case AFS_VNODE_LOCK_WAITING_FOR_CB:
_debug("get");
- if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
- BUG();
- fl = list_entry(vnode->pending_locks.next,
- struct file_lock, fl_u.afs.link);
- key = key_get(afs_file_key(fl->fl_file));
- type = (fl->fl_type == F_RDLCK) ?
- AFS_LOCK_READ : AFS_LOCK_WRITE;
+ key = key_get(vnode->lock_key);
+ type = vnode->lock_type;
+ vnode->lock_state = AFS_VNODE_LOCK_SETTING;
spin_unlock(&vnode->lock);
- ret = afs_set_lock(vnode, key, type);
- clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+ ret = afs_set_lock(vnode, key, type); /* RPC */
+ key_put(key);
+
+ spin_lock(&vnode->lock);
switch (ret) {
case -EWOULDBLOCK:
_debug("blocked");
break;
case 0:
_debug("acquired");
- if (type == AFS_LOCK_READ)
- set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
- else
- set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
- ret = AFS_LOCK_GRANTED;
+ vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+ /* Fall through */
default:
- spin_lock(&vnode->lock);
- /* the pending lock may have been withdrawn due to a
- * signal */
- if (list_entry(vnode->pending_locks.next,
- struct file_lock, fl_u.afs.link) == fl) {
- fl->fl_u.afs.state = ret;
- if (ret == AFS_LOCK_GRANTED)
- afs_grant_locks(vnode, fl);
- else
- list_del_init(&fl->fl_u.afs.link);
- wake_up(&fl->fl_wait);
- spin_unlock(&vnode->lock);
- } else {
+ /* Pass the lock or the error onto the first locker in
+ * the list - if they're looking for this type of lock.
+ * If they're not, we assume that whoever asked for it
+ * took a signal.
+ */
+ if (list_empty(&vnode->pending_locks)) {
_debug("withdrawn");
- clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
- clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
- spin_unlock(&vnode->lock);
- afs_release_lock(vnode, key);
- if (!list_empty(&vnode->pending_locks))
- afs_lock_may_be_available(vnode);
+ vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
+ goto again;
}
- break;
+
+ fl = list_entry(vnode->pending_locks.next,
+ struct file_lock, fl_u.afs.link);
+ type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+ if (vnode->lock_type != type) {
+ _debug("changed");
+ vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
+ goto again;
+ }
+
+ fl->fl_u.afs.state = ret;
+ if (ret == 0)
+ afs_grant_locks(vnode, fl);
+ else
+ list_del_init(&fl->fl_u.afs.link);
+ wake_up(&fl->fl_wait);
+ spin_unlock(&vnode->lock);
+ _leave(" [granted]");
+ return;
}
- key_put(key);
- _leave(" [pend]");
+
+ default:
+ /* Looks like a lock request was withdrawn. */
+ spin_unlock(&vnode->lock);
+ _leave(" [no]");
return;
}
-
- /* looks like the lock request was withdrawn on a signal */
- spin_unlock(&vnode->lock);
- _leave(" [no locks]");
}
/*
@@ -298,15 +317,105 @@ void afs_lock_work(struct work_struct *work)
* AF_RXRPC
* - the caller must hold the vnode lock
*/
-static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
+static void afs_defer_unlock(struct afs_vnode *vnode)
{
- cancel_delayed_work(&vnode->lock_work);
- if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) &&
- !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
- BUG();
- if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags))
- BUG();
- vnode->unlock_key = key_get(key);
+ _enter("");
+
+ if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
+ vnode->lock_state == AFS_VNODE_LOCK_EXTENDING) {
+ cancel_delayed_work(&vnode->lock_work);
+
+ vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
+ afs_lock_may_be_available(vnode);
+ }
+}
+
+/*
+ * Check that our view of the file metadata is up to date and check to see
+ * whether we think that we have a locking permit.
+ */
+static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
+ afs_lock_type_t type, bool can_sleep)
+{
+ afs_access_t access;
+ int ret;
+
+ /* Make sure we've got a callback on this file and that our view of the
+ * data version is up to date.
+ */
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ return ret;
+
+ /* Check the permission set to see if we're actually going to be
+ * allowed to get a lock on this file.
+ */
+ ret = afs_check_permit(vnode, key, &access);
+ if (ret < 0)
+ return ret;
+
+ /* At a rough estimation, you need LOCK, WRITE or INSERT perm to
+ * read-lock a file and WRITE or INSERT perm to write-lock a file.
+ *
+ * We can't rely on the server to do this for us since if we want to
+ * share a read lock that we already have, we won't go the server.
+ */
+ if (type == AFS_LOCK_READ) {
+ if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
+ return -EACCES;
+ if (vnode->status.lock_count == -1 && !can_sleep)
+ return -EAGAIN; /* Write locked */
+ } else {
+ if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
+ return -EACCES;
+ if (vnode->status.lock_count != 0 && !can_sleep)
+ return -EAGAIN; /* Locked */
+ }
+
+ return 0;
+}
+
+/*
+ * Remove the front runner from the pending queue.
+ * - The caller must hold vnode->lock.
+ */
+static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
+{
+ struct file_lock *next;
+
+ _enter("");
+
+ /* ->lock_type, ->lock_key and ->lock_state only belong to this
+ * file_lock if we're at the front of the pending queue or if we have
+ * the lock granted or if the lock_state is NEED_UNLOCK or UNLOCKING.
+ */
+ if (vnode->granted_locks.next == &fl->fl_u.afs.link &&
+ vnode->granted_locks.prev == &fl->fl_u.afs.link) {
+ list_del_init(&fl->fl_u.afs.link);
+ afs_defer_unlock(vnode);
+ return;
+ }
+
+ if (!list_empty(&vnode->granted_locks) ||
+ vnode->pending_locks.next != &fl->fl_u.afs.link) {
+ list_del_init(&fl->fl_u.afs.link);
+ return;
+ }
+
+ list_del_init(&fl->fl_u.afs.link);
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
+ vnode->lock_state = AFS_VNODE_LOCK_NONE;
+
+ if (list_empty(&vnode->pending_locks))
+ return;
+
+ /* The new front of the queue now owns the state variables. */
+ next = list_entry(vnode->pending_locks.next,
+ struct file_lock, fl_u.afs.link);
+ vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+ vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
afs_lock_may_be_available(vnode);
}
@@ -315,7 +424,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
*/
static int afs_do_setlk(struct file *file, struct file_lock *fl)
{
- struct inode *inode = file_inode(file);
+ struct inode *inode = locks_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_lock_type_t type;
struct key *key = afs_file_key(file);
@@ -333,165 +442,136 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
- spin_lock(&inode->i_lock);
-
- /* make sure we've got a callback on this file and that our view of the
- * data version is up to date */
- ret = afs_validate(vnode, key);
+ ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
if (ret < 0)
- goto error;
-
- if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
- ret = -EAGAIN;
- goto error;
- }
+ return ret;
spin_lock(&vnode->lock);
- /* if we've already got a readlock on the server then we can instantly
+ /* If we've already got a readlock on the server then we instantly
* grant another readlock, irrespective of whether there are any
- * pending writelocks */
+ * pending writelocks.
+ */
if (type == AFS_LOCK_READ &&
- vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
+ vnode->lock_state == AFS_VNODE_LOCK_GRANTED &&
+ vnode->lock_type == AFS_LOCK_READ) {
_debug("instant readlock");
- ASSERTCMP(vnode->flags &
- ((1 << AFS_VNODE_LOCKING) |
- (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
ASSERT(!list_empty(&vnode->granted_locks));
- goto sharing_existing_lock;
+ goto share_existing_lock;
}
- /* if there's no-one else with a lock on this vnode, then we need to
- * ask the server for a lock */
- if (list_empty(&vnode->pending_locks) &&
- list_empty(&vnode->granted_locks)) {
- _debug("not locked");
- ASSERTCMP(vnode->flags &
- ((1 << AFS_VNODE_LOCKING) |
- (1 << AFS_VNODE_READLOCKED) |
- (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
- list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
- set_bit(AFS_VNODE_LOCKING, &vnode->flags);
- spin_unlock(&vnode->lock);
+ list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
- ret = afs_set_lock(vnode, key, type);
- clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
- switch (ret) {
- case 0:
- _debug("acquired");
- goto acquired_server_lock;
- case -EWOULDBLOCK:
- _debug("would block");
- spin_lock(&vnode->lock);
- ASSERT(list_empty(&vnode->granted_locks));
- ASSERTCMP(vnode->pending_locks.next, ==,
- &fl->fl_u.afs.link);
- goto wait;
- default:
- spin_lock(&vnode->lock);
- list_del_init(&fl->fl_u.afs.link);
- spin_unlock(&vnode->lock);
- goto error;
- }
- }
+ if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
+ goto need_to_wait;
- /* otherwise, we need to wait for a local lock to become available */
- _debug("wait local");
- list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
-wait:
- if (!(fl->fl_flags & FL_SLEEP)) {
- _debug("noblock");
- ret = -EAGAIN;
- goto abort_attempt;
- }
+ /* We don't have a lock on this vnode and we aren't currently waiting
+ * for one either, so ask the server for a lock.
+ *
+ * Note that we need to be careful if we get interrupted by a signal
+ * after dispatching the request as we may still get the lock, even
+ * though we don't wait for the reply (it's not too bad a problem - the
+ * lock will expire in 10 mins anyway).
+ */
+ _debug("not locked");
+ vnode->lock_key = key_get(key);
+ vnode->lock_type = type;
+ vnode->lock_state = AFS_VNODE_LOCK_SETTING;
spin_unlock(&vnode->lock);
- /* now we need to sleep and wait for the lock manager thread to get the
- * lock from the server */
- _debug("sleep");
- ret = wait_event_interruptible(fl->fl_wait,
- fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
- if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
- ret = fl->fl_u.afs.state;
- if (ret < 0)
- goto error;
- spin_lock(&vnode->lock);
- goto given_lock;
- }
-
- /* we were interrupted, but someone may still be in the throes of
- * giving us the lock */
- _debug("intr");
- ASSERTCMP(ret, ==, -ERESTARTSYS);
+ ret = afs_set_lock(vnode, key, type); /* RPC */
spin_lock(&vnode->lock);
- if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
- ret = fl->fl_u.afs.state;
- if (ret < 0) {
- spin_unlock(&vnode->lock);
- goto error;
- }
- goto given_lock;
- }
+ switch (ret) {
+ default:
+ goto abort_attempt;
-abort_attempt:
- /* we aren't going to get the lock, either because we're unwilling to
- * wait, or because some signal happened */
- _debug("abort");
- if (list_empty(&vnode->granted_locks) &&
- vnode->pending_locks.next == &fl->fl_u.afs.link) {
- if (vnode->pending_locks.prev != &fl->fl_u.afs.link) {
- /* kick the next pending lock into having a go */
- list_del_init(&fl->fl_u.afs.link);
- afs_lock_may_be_available(vnode);
- }
- } else {
- list_del_init(&fl->fl_u.afs.link);
+ case -EWOULDBLOCK:
+ /* The server doesn't have a lock-waiting queue, so the client
+ * will have to retry. The server will break the outstanding
+ * callbacks on a file when a lock is released.
+ */
+ _debug("would block");
+ ASSERT(list_empty(&vnode->granted_locks));
+ ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
+ vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
+ goto need_to_wait;
+
+ case 0:
+ _debug("acquired");
+ break;
}
- spin_unlock(&vnode->lock);
- goto error;
-acquired_server_lock:
/* we've acquired a server lock, but it needs to be renewed after 5
* mins */
- spin_lock(&vnode->lock);
+ vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
afs_schedule_lock_extension(vnode);
- if (type == AFS_LOCK_READ)
- set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
- else
- set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
-sharing_existing_lock:
+
+share_existing_lock:
/* the lock has been granted as far as we're concerned... */
fl->fl_u.afs.state = AFS_LOCK_GRANTED;
list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+
given_lock:
/* ... but we do still need to get the VFS's blessing */
- ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING)));
- ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) |
- (1 << AFS_VNODE_WRITELOCKED))) != 0);
+ spin_unlock(&vnode->lock);
+
ret = posix_lock_file(file, fl, NULL);
if (ret < 0)
goto vfs_rejected_lock;
- spin_unlock(&vnode->lock);
- /* again, make sure we've got a callback on this file and, again, make
+ /* Again, make sure we've got a callback on this file and, again, make
* sure that our view of the data version is up to date (we ignore
- * errors incurred here and deal with the consequences elsewhere) */
+ * errors incurred here and deal with the consequences elsewhere).
+ */
afs_validate(vnode, key);
+ _leave(" = 0");
+ return 0;
-error:
- spin_unlock(&inode->i_lock);
+need_to_wait:
+ /* We're going to have to wait. Either this client doesn't have a lock
+ * on the server yet and we need to wait for a callback to occur, or
+ * the client does have a lock on the server, but it belongs to some
+ * other process(es) and is incompatible with the lock we want.
+ */
+ ret = -EAGAIN;
+ if (fl->fl_flags & FL_SLEEP) {
+ spin_unlock(&vnode->lock);
+
+ _debug("sleep");
+ ret = wait_event_interruptible(fl->fl_wait,
+ fl->fl_u.afs.state != AFS_LOCK_PENDING);
+
+ spin_lock(&vnode->lock);
+ }
+
+ if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
+ goto given_lock;
+ if (fl->fl_u.afs.state < 0)
+ ret = fl->fl_u.afs.state;
+
+abort_attempt:
+ /* we aren't going to get the lock, either because we're unwilling to
+ * wait, or because some signal happened */
+ _debug("abort");
+ afs_dequeue_lock(vnode, fl);
+
+error_unlock:
+ spin_unlock(&vnode->lock);
_leave(" = %d", ret);
return ret;
vfs_rejected_lock:
- /* the VFS rejected the lock we just obtained, so we have to discard
- * what we just got */
+ /* The VFS rejected the lock we just obtained, so we have to discard
+ * what we just got. We defer this to the lock manager work item to
+ * deal with.
+ */
_debug("vfs refused %d", ret);
+ spin_lock(&vnode->lock);
list_del_init(&fl->fl_u.afs.link);
if (list_empty(&vnode->granted_locks))
- afs_defer_unlock(vnode, key);
- goto abort_attempt;
+ afs_defer_unlock(vnode);
+ goto error_unlock;
}
/*
@@ -499,34 +579,21 @@ vfs_rejected_lock:
*/
static int afs_do_unlk(struct file *file, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
- struct key *key = afs_file_key(file);
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
int ret;
_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+ /* Flush all pending writes before doing anything with locks. */
+ vfs_fsync(file, 0);
+
/* only whole-file unlocks are supported */
if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
return -EINVAL;
- fl->fl_ops = &afs_lock_ops;
- INIT_LIST_HEAD(&fl->fl_u.afs.link);
- fl->fl_u.afs.state = AFS_LOCK_PENDING;
-
- spin_lock(&vnode->lock);
ret = posix_lock_file(file, fl, NULL);
- if (ret < 0) {
- spin_unlock(&vnode->lock);
- _leave(" = %d [vfs]", ret);
- return ret;
- }
-
- /* discard the server lock only if all granted locks are gone */
- if (list_empty(&vnode->granted_locks))
- afs_defer_unlock(vnode, key);
- spin_unlock(&vnode->lock);
- _leave(" = 0");
- return 0;
+ _leave(" = %d [%u]", ret, vnode->lock_state);
+ return ret;
}
/*
@@ -534,7 +601,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
*/
static int afs_do_getlk(struct file *file, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
struct key *key = afs_file_key(file);
int ret, lock_count;
@@ -542,29 +609,25 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
fl->fl_type = F_UNLCK;
- inode_lock(&vnode->vfs_inode);
-
/* check local lock records first */
- ret = 0;
posix_test_lock(file, fl);
if (fl->fl_type == F_UNLCK) {
/* no local locks; consult the server */
ret = afs_fetch_status(vnode, key);
if (ret < 0)
goto error;
- lock_count = vnode->status.lock_count;
- if (lock_count) {
- if (lock_count > 0)
- fl->fl_type = F_RDLCK;
- else
- fl->fl_type = F_WRLCK;
- fl->fl_start = 0;
- fl->fl_end = OFFSET_MAX;
- }
+
+ lock_count = READ_ONCE(vnode->status.lock_count);
+ if (lock_count > 0)
+ fl->fl_type = F_RDLCK;
+ else
+ fl->fl_type = F_WRLCK;
+ fl->fl_start = 0;
+ fl->fl_end = OFFSET_MAX;
}
+ ret = 0;
error:
- inode_unlock(&vnode->vfs_inode);
_leave(" = %d [%hd]", ret, fl->fl_type);
return ret;
}
@@ -574,7 +637,7 @@ error:
*/
int afs_lock(struct file *file, int cmd, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
_enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -597,7 +660,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
*/
int afs_flock(struct file *file, int cmd, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
_enter("{%x:%u},%d,{t=%x,fl=%x}",
vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -627,9 +690,13 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
*/
static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+
_enter("");
+ spin_lock(&vnode->lock);
list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
+ spin_unlock(&vnode->lock);
}
/*
@@ -638,7 +705,12 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
*/
static void afs_fl_release_private(struct file_lock *fl)
{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+
_enter("");
- list_del_init(&fl->fl_u.afs.link);
+ spin_lock(&vnode->lock);
+ afs_dequeue_lock(vnode, fl);
+ _debug("state %u for %p", vnode->lock_state, vnode);
+ spin_unlock(&vnode->lock);
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index bd8dcee7e066..804d1f905622 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -430,8 +430,21 @@ struct afs_volume {
u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
};
+enum afs_lock_state {
+ AFS_VNODE_LOCK_NONE, /* The vnode has no lock on the server */
+ AFS_VNODE_LOCK_WAITING_FOR_CB, /* We're waiting for the server to break the callback */
+ AFS_VNODE_LOCK_SETTING, /* We're asking the server for a lock */
+ AFS_VNODE_LOCK_GRANTED, /* We have a lock on the server */
+ AFS_VNODE_LOCK_EXTENDING, /* We're extending a lock on the server */
+ AFS_VNODE_LOCK_NEED_UNLOCK, /* We need to unlock on the server */
+ AFS_VNODE_LOCK_UNLOCKING, /* We're telling the server to unlock */
+};
+
/*
- * AFS inode private data
+ * AFS inode private data.
+ *
+ * Note that afs_alloc_inode() *must* reset anything that could incorrectly
+ * leak from one inode to another.
*/
struct afs_vnode {
struct inode vfs_inode; /* the VFS's inode record */
@@ -454,18 +467,16 @@ struct afs_vnode {
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
-#define AFS_VNODE_LOCKING 6 /* set if waiting for lock on vnode */
-#define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */
-#define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */
-#define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */
-#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
-#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
+#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */
+#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
struct delayed_work lock_work; /* work to be done in locking */
- struct key *unlock_key; /* key to be used in unlocking */
+ struct key *lock_key; /* Key to be used in lock ops */
+ enum afs_lock_state lock_state : 8;
+ afs_lock_type_t lock_type : 8;
/* outstanding callback notification on this file */
struct afs_cb_interest *cb_interest; /* Server on which this resides */
@@ -843,6 +854,7 @@ extern void afs_clear_permits(struct afs_vnode *);
extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
+extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
extern int afs_permission(struct inode *, int);
extern void __exit afs_clean_up_permit_cache(void);
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index e728ca1776c9..d04511fb3879 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -46,8 +46,7 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode
return false;
}
- if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) ||
- test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
+ if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
return true;
}
@@ -117,7 +116,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
case VSALVAGING: m = "being salvaged"; break;
default: m = "busy"; break;
}
-
+
pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
}
@@ -438,24 +437,67 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
_enter("");
- if (!cbi) {
- fc->ac.error = -ESTALE;
+ switch (fc->ac.error) {
+ case SHRT_MAX:
+ if (!cbi) {
+ fc->ac.error = -ESTALE;
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ return false;
+ }
+
+ fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+
+ read_lock(&cbi->server->fs_lock);
+ alist = rcu_dereference_protected(cbi->server->addresses,
+ lockdep_is_held(&cbi->server->fs_lock));
+ afs_get_addrlist(alist);
+ read_unlock(&cbi->server->fs_lock);
+ if (!alist) {
+ fc->ac.error = -ESTALE;
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ return false;
+ }
+
+ fc->ac.alist = alist;
+ fc->ac.addr = NULL;
+ fc->ac.start = READ_ONCE(alist->index);
+ fc->ac.index = fc->ac.start;
+ fc->ac.error = 0;
+ fc->ac.begun = false;
+ goto iterate_address;
+
+ case 0:
+ default:
+ /* Success or local failure. Stop. */
fc->flags |= AFS_FS_CURSOR_STOP;
+ _leave(" = f [okay/local %d]", fc->ac.error);
return false;
- }
- read_lock(&cbi->server->fs_lock);
- alist = afs_get_addrlist(cbi->server->addresses);
- read_unlock(&cbi->server->fs_lock);
- if (!alist) {
- fc->ac.error = -ESTALE;
+ case -ECONNABORTED:
fc->flags |= AFS_FS_CURSOR_STOP;
+ _leave(" = f [abort]");
return false;
+
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ _debug("no conn");
+ goto iterate_address;
}
- fc->ac.alist = alist;
- fc->ac.error = 0;
- return true;
+iterate_address:
+ /* Iterate over the current server's address list to try and find an
+ * address on which it will respond to us.
+ */
+ if (afs_iterate_addresses(&fc->ac)) {
+ _leave(" = t");
+ return true;
+ }
+
+ afs_end_cursor(&fc->ac);
+ return false;
}
/*
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 46a881a4d08f..b88b7d45fdaa 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -120,7 +120,7 @@ static void afs_hash_permits(struct afs_permits *permits)
void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
unsigned int cb_break)
{
- struct afs_permits *permits, *xpermits, *replacement, *new = NULL;
+ struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL;
afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
size_t size = 0;
bool changed = false;
@@ -204,7 +204,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
new = kzalloc(sizeof(struct afs_permits) +
sizeof(struct afs_permit) * size, GFP_NOFS);
if (!new)
- return;
+ goto out_put;
refcount_set(&new->usage, 1);
new->nr_permits = size;
@@ -229,8 +229,6 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
afs_hash_permits(new);
- afs_put_permits(permits);
-
/* Now see if the permit list we want is actually already available */
spin_lock(&afs_permits_lock);
@@ -262,11 +260,15 @@ found:
kfree(new);
spin_lock(&vnode->lock);
- if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) ||
- permits != rcu_access_pointer(vnode->permit_cache))
- goto someone_else_changed_it_unlock;
- rcu_assign_pointer(vnode->permit_cache, replacement);
+ zap = rcu_access_pointer(vnode->permit_cache);
+ if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) &&
+ zap == permits)
+ rcu_assign_pointer(vnode->permit_cache, replacement);
+ else
+ zap = replacement;
spin_unlock(&vnode->lock);
+ afs_put_permits(zap);
+out_put:
afs_put_permits(permits);
return;
@@ -284,8 +286,8 @@ someone_else_changed_it:
* permitted to be accessed with this authorisation, and if so, what access it
* is granted
*/
-static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
- afs_access_t *_access)
+int afs_check_permit(struct afs_vnode *vnode, struct key *key,
+ afs_access_t *_access)
{
struct afs_permits *permits;
bool valid = false;
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 26bad7032bba..0ab3f8457839 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
{
int i;
- if (refcount_dec_and_test(&slist->usage)) {
+ if (slist && refcount_dec_and_test(&slist->usage)) {
for (i = 0; i < slist->nr_servers; i++) {
afs_put_cb_interest(net, slist->servers[i].cb_interest);
afs_put_server(net, slist->servers[i].server);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 875b5eb02242..1037dd41a622 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -496,10 +496,10 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
if (ret < 0)
goto error_sb;
as = NULL;
- sb->s_flags |= MS_ACTIVE;
+ sb->s_flags |= SB_ACTIVE;
} else {
_debug("reuse");
- ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
+ ASSERTCMP(sb->s_flags, &, SB_ACTIVE);
afs_destroy_sbi(as);
as = NULL;
}
@@ -536,7 +536,9 @@ static void afs_kill_super(struct super_block *sb)
}
/*
- * initialise an inode cache slab element prior to any use
+ * Initialise an inode cache slab element prior to any use. Note that
+ * afs_alloc_inode() *must* reset anything that could incorrectly leak from one
+ * inode to another.
*/
static void afs_i_init_once(void *_vnode)
{
@@ -568,11 +570,21 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
atomic_inc(&afs_count_active_inodes);
+ /* Reset anything that shouldn't leak from one inode to the next. */
memset(&vnode->fid, 0, sizeof(vnode->fid));
memset(&vnode->status, 0, sizeof(vnode->status));
vnode->volume = NULL;
+ vnode->lock_key = NULL;
+ vnode->permit_cache = NULL;
+ vnode->cb_interest = NULL;
+#ifdef CONFIG_AFS_FSCACHE
+ vnode->cache = NULL;
+#endif
+
vnode->flags = 1 << AFS_VNODE_UNSET;
+ vnode->cb_type = 0;
+ vnode->lock_state = AFS_VNODE_LOCK_NONE;
_leave(" = %p", &vnode->vfs_inode);
return &vnode->vfs_inode;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 18e46e31523c..cb5f8a3df577 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -119,6 +119,11 @@ try_again:
}
if (f != t) {
+ if (PageWriteback(page)) {
+ trace_afs_page_dirty(vnode, tracepoint_string("alrdy"),
+ page->index, priv);
+ goto flush_conflicting_write;
+ }
if (to < f || from > t)
goto flush_conflicting_write;
if (from < f)
diff --git a/fs/aio.c b/fs/aio.c
index e6de7715228c..a062d75109cb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1297,20 +1297,10 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
static long read_events(struct kioctx *ctx, long min_nr, long nr,
struct io_event __user *event,
- struct timespec __user *timeout)
+ ktime_t until)
{
- ktime_t until = KTIME_MAX;
long ret = 0;
- if (timeout) {
- struct timespec ts;
-
- if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
- return -EFAULT;
-
- until = timespec_to_ktime(ts);
- }
-
/*
* Note that aio_read_events() is being called as the conditional - i.e.
* we're calling it after prepare_to_wait() has set task state to
@@ -1826,6 +1816,25 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return ret;
}
+static long do_io_getevents(aio_context_t ctx_id,
+ long min_nr,
+ long nr,
+ struct io_event __user *events,
+ struct timespec64 *ts)
+{
+ ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX;
+ struct kioctx *ioctx = lookup_ioctx(ctx_id);
+ long ret = -EINVAL;
+
+ if (likely(ioctx)) {
+ if (likely(min_nr <= nr && min_nr >= 0))
+ ret = read_events(ioctx, min_nr, nr, events, until);
+ percpu_ref_put(&ioctx->users);
+ }
+
+ return ret;
+}
+
/* io_getevents:
* Attempts to read at least min_nr events and up to nr events from
* the completion queue for the aio_context specified by ctx_id. If
@@ -1844,15 +1853,14 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct io_event __user *, events,
struct timespec __user *, timeout)
{
- struct kioctx *ioctx = lookup_ioctx(ctx_id);
- long ret = -EINVAL;
+ struct timespec64 ts;
- if (likely(ioctx)) {
- if (likely(min_nr <= nr && min_nr >= 0))
- ret = read_events(ioctx, min_nr, nr, events, timeout);
- percpu_ref_put(&ioctx->users);
+ if (timeout) {
+ if (unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
}
- return ret;
+
+ return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
}
#ifdef CONFIG_COMPAT
@@ -1862,17 +1870,14 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct io_event __user *, events,
struct compat_timespec __user *, timeout)
{
- struct timespec t;
- struct timespec __user *ut = NULL;
+ struct timespec64 t;
if (timeout) {
- if (compat_get_timespec(&t, timeout))
+ if (compat_get_timespec64(&t, timeout))
return -EFAULT;
- ut = compat_alloc_user_space(sizeof(*ut));
- if (copy_to_user(ut, &t, sizeof(t)))
- return -EFAULT;
}
- return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
+
+ return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
}
#endif
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d79ced925861..82e8f6edfb48 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
- ino->last_used = jiffies;
}
+ ino->last_used = jiffies;
return status;
}
@@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
+ struct autofs_info *ino;
struct dentry *new;
new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
- if (new == dentry)
- dput(new);
- else {
- struct autofs_info *ino;
-
- ino = autofs4_dentry_ino(new);
- ino->last_used = jiffies;
- dput(path->dentry);
- path->dentry = new;
- }
+ ino = autofs4_dentry_ino(new);
+ ino->last_used = jiffies;
+ dput(path->dentry);
+ path->dentry = new;
}
return path->dentry;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 4ac49d038bf3..8fc41705c7cd 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -81,7 +81,8 @@ static int autofs4_write(struct autofs_sb_info *sbi,
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
- return (bytes > 0);
+ /* if 'wr' returned 0 (impossible) we assume -EIO (safe) */
+ return bytes == 0 ? 0 : wr < 0 ? wr : -EIO;
}
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
@@ -95,6 +96,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
} pkt;
struct file *pipe = NULL;
size_t pktsz;
+ int ret;
pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
(unsigned long) wq->wait_queue_token,
@@ -169,7 +171,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
mutex_unlock(&sbi->wq_mutex);
if (autofs4_write(sbi, pipe, &pkt, pktsz))
+ switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
+ case 0:
+ break;
+ case -ENOMEM:
+ case -ERESTARTSYS:
+ /* Just fail this one */
+ autofs4_wait_release(sbi, wq->wait_queue_token, ret);
+ break;
+ default:
autofs4_catatonic_mode(sbi);
+ break;
+ }
fput(pipe);
}
diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog
index 75a461cfaca6..16f2dfe8c2f7 100644
--- a/fs/befs/ChangeLog
+++ b/fs/befs/ChangeLog
@@ -365,7 +365,7 @@ Version 0.4 (2001-10-28)
(fs/befs/super.c)
* Tell the kernel to only mount befs read-only.
- By setting the MS_RDONLY flag in befs_read_super().
+ By setting the SB_RDONLY flag in befs_read_super().
Not that it was possible to write before. But now the kernel won't even try.
(fs/befs/super.c)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index a92355cc453b..ee236231cafa 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -841,7 +841,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb_rdonly(sb)) {
befs_warning(sb,
"No write support. Marking filesystem read-only");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
/*
@@ -948,7 +948,7 @@ static int
befs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- if (!(*flags & MS_RDONLY))
+ if (!(*flags & SB_RDONLY))
return -EINVAL;
return 0;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5429b035e249..429326b6e2e7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1498,7 +1498,9 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
struct vm_area_struct *vma;
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
+#ifdef CONFIG_MMU
unsigned long addr;
+#endif
if (!maydump(vma, cprm->mm_flags))
continue;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b35ce16b3df3..5982c8a71f02 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -295,7 +295,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
- unsigned long nr_pages)
+ unsigned long nr_pages,
+ unsigned int write_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio *bio = NULL;
@@ -327,7 +328,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bdev = fs_info->fs_devices->latest_bdev;
bio = btrfs_bio_alloc(bdev, first_byte);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
refcount_set(&cb->pending_bios, 1);
@@ -374,7 +375,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_put(bio);
bio = btrfs_bio_alloc(bdev, first_byte);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -1528,5 +1529,5 @@ unsigned int btrfs_compress_str2level(const char *str)
if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
return str[5] - '0';
- return 0;
+ return BTRFS_ZLIB_DEFAULT_LEVEL;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index da20755ebf21..0868cc554f14 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -34,6 +34,8 @@
/* Maximum size of data before compression */
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
+#define BTRFS_ZLIB_DEFAULT_LEVEL 3
+
struct compressed_bio {
/* number of bios pending for this compressed extent */
refcount_t pending_bios;
@@ -91,7 +93,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
- unsigned long nr_pages);
+ unsigned long nr_pages,
+ unsigned int write_flags);
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f7df5536ab61..13c260b525a1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2957,7 +2957,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
*/
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
{
- return fs_info->sb->s_flags & MS_RDONLY || btrfs_fs_closing(fs_info);
+ return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
@@ -3180,6 +3180,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
int nr);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
+ unsigned int extra_bits,
struct extent_state **cached_state, int dedupe);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index efce9a2fa9be..10a2a579cc7f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -610,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
- if (found_level == 0 && btrfs_check_leaf(root, eb)) {
+ if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
@@ -3848,7 +3848,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->len,
fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) {
+ /*
+ * Since btrfs_mark_buffer_dirty() can be called with item pointer set
+ * but item data not updated.
+ * So here we should only check item pointers, not item data.
+ */
+ if (btrfs_header_level(buf) == 0 &&
+ btrfs_check_leaf_relaxed(root, buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7208ecef7088..4497f937e8fb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3502,13 +3502,6 @@ again:
goto again;
}
- /* We've already setup this transaction, go ahead and exit */
- if (block_group->cache_generation == trans->transid &&
- i_size_read(inode)) {
- dcs = BTRFS_DC_SETUP;
- goto out_put;
- }
-
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
@@ -3532,6 +3525,13 @@ again:
}
WARN_ON(ret);
+ /* We've already setup this transaction, go ahead and exit */
+ if (block_group->cache_generation == trans->transid &&
+ i_size_read(inode)) {
+ dcs = BTRFS_DC_SETUP;
+ goto out_put;
+ }
+
if (i_size_read(inode) > 0) {
ret = btrfs_check_trunc_cache_free_space(fs_info,
&fs_info->global_block_rsv);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 16045ea86fc1..012d63870b99 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1984,7 +1984,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
struct btrfs_bio *bbio = NULL;
int ret;
- ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
+ ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
BUG_ON(!mirror_num);
bio = btrfs_io_bio_alloc(1);
@@ -3253,7 +3253,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
delalloc_start,
delalloc_end,
&page_started,
- nr_written);
+ nr_written, wbc);
/* File system has been set read-only */
if (ret) {
SetPageError(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4a8861379d3e..93dcae0c3183 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -116,7 +116,8 @@ struct extent_io_ops {
*/
int (*fill_delalloc)(void *private_data, struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written);
+ unsigned long *nr_written,
+ struct writeback_control *wbc);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
@@ -365,10 +366,11 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state);
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state)
+ u64 end, unsigned int extra_bits,
+ struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE,
+ EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
NULL, cached_state, GFP_NOFS);
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f80254d82f40..eb1bac7c8553 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -477,6 +477,47 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
}
}
+static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
+ const u64 start,
+ const u64 len,
+ struct extent_state **cached_state)
+{
+ u64 search_start = start;
+ const u64 end = start + len - 1;
+
+ while (search_start < end) {
+ const u64 search_len = end - search_start + 1;
+ struct extent_map *em;
+ u64 em_len;
+ int ret = 0;
+
+ em = btrfs_get_extent(inode, NULL, 0, search_start,
+ search_len, 0);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+
+ if (em->block_start != EXTENT_MAP_HOLE)
+ goto next;
+
+ em_len = em->len;
+ if (em->start < search_start)
+ em_len -= search_start - em->start;
+ if (em_len > search_len)
+ em_len = search_len;
+
+ ret = set_extent_bit(&inode->io_tree, search_start,
+ search_start + em_len - 1,
+ EXTENT_DELALLOC_NEW,
+ NULL, cached_state, GFP_NOFS);
+next:
+ search_start = extent_map_end(em);
+ free_extent_map(em);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
/*
* after copy_from_user, pages need to be dirtied and we need to make
* sure holes are created between the current EOF and the start of
@@ -497,14 +538,34 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
u64 end_of_last_block;
u64 end_pos = pos + write_bytes;
loff_t isize = i_size_read(inode);
+ unsigned int extra_bits = 0;
start_pos = pos & ~((u64) fs_info->sectorsize - 1);
num_bytes = round_up(write_bytes + pos - start_pos,
fs_info->sectorsize);
end_of_last_block = start_pos + num_bytes - 1;
+
+ if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ if (start_pos >= isize &&
+ !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) {
+ /*
+ * There can't be any extents following eof in this case
+ * so just set the delalloc new bit for the range
+ * directly.
+ */
+ extra_bits |= EXTENT_DELALLOC_NEW;
+ } else {
+ err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode),
+ start_pos,
+ num_bytes, cached);
+ if (err)
+ return err;
+ }
+ }
+
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
- cached, 0);
+ extra_bits, cached, 0);
if (err)
return err;
@@ -1404,47 +1465,6 @@ fail:
}
-static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
- const u64 start,
- const u64 len,
- struct extent_state **cached_state)
-{
- u64 search_start = start;
- const u64 end = start + len - 1;
-
- while (search_start < end) {
- const u64 search_len = end - search_start + 1;
- struct extent_map *em;
- u64 em_len;
- int ret = 0;
-
- em = btrfs_get_extent(inode, NULL, 0, search_start,
- search_len, 0);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
- if (em->block_start != EXTENT_MAP_HOLE)
- goto next;
-
- em_len = em->len;
- if (em->start < search_start)
- em_len -= search_start - em->start;
- if (em_len > search_len)
- em_len = search_len;
-
- ret = set_extent_bit(&inode->io_tree, search_start,
- search_start + em_len - 1,
- EXTENT_DELALLOC_NEW,
- NULL, cached_state, GFP_NOFS);
-next:
- search_start = extent_map_end(em);
- free_extent_map(em);
- if (ret)
- return ret;
- }
- return 0;
-}
-
/*
* This function locks the extent and properly waits for data=ordered extents
* to finish before allowing the pages to be modified if need.
@@ -1473,10 +1493,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
+ round_up(pos + write_bytes - start_pos,
fs_info->sectorsize) - 1;
- if (start_pos < inode->vfs_inode.i_size ||
- (inode->flags & BTRFS_INODE_PREALLOC)) {
+ if (start_pos < inode->vfs_inode.i_size) {
struct btrfs_ordered_extent *ordered;
- unsigned int clear_bits;
lock_extent_bits(&inode->io_tree, start_pos, last_pos,
cached_state);
@@ -1498,19 +1516,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
}
if (ordered)
btrfs_put_ordered_extent(ordered);
- ret = btrfs_find_new_delalloc_bytes(inode, start_pos,
- last_pos - start_pos + 1,
- cached_state);
- clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG;
- if (ret)
- clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED;
- clear_extent_bit(&inode->io_tree, start_pos,
- last_pos, clear_bits,
- (clear_bits & EXTENT_LOCKED) ? 1 : 0,
- 0, cached_state, GFP_NOFS);
- if (ret)
- return ret;
+ clear_extent_bit(&inode->io_tree, start_pos, last_pos,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ 0, 0, cached_state, GFP_NOFS);
*lockstart = start_pos;
*lockend = last_pos;
ret = 1;
@@ -2048,6 +2057,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
len = (u64)end - (u64)start + 1;
trace_btrfs_sync_file(file, datasync);
+ btrfs_init_log_ctx(&ctx, inode);
+
/*
* We write the dirty pages in the range and wait until they complete
* out of the ->i_mutex. If so, we can flush the dirty pages by
@@ -2194,8 +2205,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
trans->sync = true;
- btrfs_init_log_ctx(&ctx, inode);
-
ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
@@ -2253,6 +2262,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_end_transaction(trans);
}
out:
+ ASSERT(list_empty(&ctx.list));
err = file_check_and_advance_wb_err(file);
if (!ret)
ret = err;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index cdc9f4015ec3..4426d1c73e50 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1264,7 +1264,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
/* Lock all pages first so we can lock the extent safely. */
ret = io_ctl_prepare_pages(io_ctl, inode, 0);
if (ret)
- goto out;
+ goto out_unlock;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
&cached_state);
@@ -1358,6 +1358,7 @@ out_nospc_locked:
out_nospc:
cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
+out_unlock:
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
up_write(&block_group->data_rwsem);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b93fe05a39c7..993061f83067 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -378,6 +378,7 @@ struct async_cow {
struct page *locked_page;
u64 start;
u64 end;
+ unsigned int write_flags;
struct list_head extents;
struct btrfs_work work;
};
@@ -857,7 +858,8 @@ retry:
async_extent->ram_size,
ins.objectid,
ins.offset, async_extent->pages,
- async_extent->nr_pages)) {
+ async_extent->nr_pages,
+ async_cow->write_flags)) {
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
@@ -1191,7 +1193,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
static int cow_file_range_async(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
+ unsigned long *nr_written,
+ unsigned int write_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct async_cow *async_cow;
@@ -1208,6 +1211,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_cow->root = root;
async_cow->locked_page = locked_page;
async_cow->start = start;
+ async_cow->write_flags = write_flags;
if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
!btrfs_test_opt(fs_info, FORCE_COMPRESS))
@@ -1577,11 +1581,13 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
*/
static int run_delalloc_range(void *private_data, struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
+ unsigned long *nr_written,
+ struct writeback_control *wbc)
{
struct inode *inode = private_data;
int ret;
int force_cow = need_force_cow(inode, start, end);
+ unsigned int write_flags = wbc_to_write_flags(wbc);
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
@@ -1596,7 +1602,8 @@ static int run_delalloc_range(void *private_data, struct page *locked_page,
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
ret = cow_file_range_async(inode, locked_page, start, end,
- page_started, nr_written);
+ page_started, nr_written,
+ write_flags);
}
if (ret)
btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
@@ -2025,11 +2032,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
}
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
+ unsigned int extra_bits,
struct extent_state **cached_state, int dedupe)
{
WARN_ON((end & (PAGE_SIZE - 1)) == 0);
return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
- cached_state);
+ extra_bits, cached_state);
}
/* see btrfs_writepage_start_hook for details on why this is required */
@@ -2090,7 +2098,7 @@ again:
goto out;
}
- btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state,
+ btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state,
0);
ClearPageChecked(page);
set_page_dirty(page);
@@ -4790,7 +4798,7 @@ again:
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
- ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
+ ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
&cached_state, 0);
if (ret) {
unlock_extent_cached(io_tree, block_start, block_end,
@@ -5438,6 +5446,14 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
goto out_err;
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
+ if (location->type != BTRFS_INODE_ITEM_KEY &&
+ location->type != BTRFS_ROOT_ITEM_KEY) {
+ btrfs_warn(root->fs_info,
+"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
+ __func__, name, btrfs_ino(BTRFS_I(dir)),
+ location->objectid, location->type, location->offset);
+ goto out_err;
+ }
out:
btrfs_free_path(path);
return ret;
@@ -5754,8 +5770,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return inode;
}
- BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
-
index = srcu_read_lock(&fs_info->subvol_srcu);
ret = fixup_tree_root_location(fs_info, dir, dentry,
&location, &sub_root);
@@ -9150,7 +9164,7 @@ again:
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
- ret = btrfs_set_extent_delalloc(inode, page_start, end,
+ ret = btrfs_set_extent_delalloc(inode, page_start, end, 0,
&cached_state, 0);
if (ret) {
unlock_extent_cached(io_tree, page_start, page_end,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fd172a93d11a..d748ad1c3620 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1172,7 +1172,7 @@ again:
if (!i_done || ret)
goto out;
- if (!(inode->i_sb->s_flags & MS_ACTIVE))
+ if (!(inode->i_sb->s_flags & SB_ACTIVE))
goto out;
/*
@@ -1333,7 +1333,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
* make sure we stop running if someone unmounts
* the FS
*/
- if (!(inode->i_sb->s_flags & MS_ACTIVE))
+ if (!(inode->i_sb->s_flags & SB_ACTIVE))
break;
if (btrfs_defrag_cancelled(fs_info)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4cf2eb67eba6..f0c3f00e97cb 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3268,7 +3268,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
nr++;
}
- btrfs_set_extent_delalloc(inode, page_start, page_end, NULL, 0);
+ btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL,
+ 0);
set_page_dirty(page);
unlock_extent(&BTRFS_I(inode)->io_tree,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c10e4c70f02d..20d3300bd268 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3521,7 +3521,40 @@ out:
}
/*
- * Check if ino ino1 is an ancestor of inode ino2 in the given root.
+ * Check if inode ino2, or any of its ancestors, is inode ino1.
+ * Return 1 if true, 0 if false and < 0 on error.
+ */
+static int check_ino_in_path(struct btrfs_root *root,
+ const u64 ino1,
+ const u64 ino1_gen,
+ const u64 ino2,
+ const u64 ino2_gen,
+ struct fs_path *fs_path)
+{
+ u64 ino = ino2;
+
+ if (ino1 == ino2)
+ return ino1_gen == ino2_gen;
+
+ while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+ u64 parent;
+ u64 parent_gen;
+ int ret;
+
+ fs_path_reset(fs_path);
+ ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
+ if (ret < 0)
+ return ret;
+ if (parent == ino1)
+ return parent_gen == ino1_gen;
+ ino = parent;
+ }
+ return 0;
+}
+
+/*
+ * Check if ino ino1 is an ancestor of inode ino2 in the given root for any
+ * possible path (in case ino2 is not a directory and has multiple hard links).
* Return 1 if true, 0 if false and < 0 on error.
*/
static int is_ancestor(struct btrfs_root *root,
@@ -3530,36 +3563,91 @@ static int is_ancestor(struct btrfs_root *root,
const u64 ino2,
struct fs_path *fs_path)
{
- u64 ino = ino2;
- bool free_path = false;
+ bool free_fs_path = false;
int ret = 0;
+ struct btrfs_path *path = NULL;
+ struct btrfs_key key;
if (!fs_path) {
fs_path = fs_path_alloc();
if (!fs_path)
return -ENOMEM;
- free_path = true;
+ free_fs_path = true;
}
- while (ino > BTRFS_FIRST_FREE_OBJECTID) {
- u64 parent;
- u64 parent_gen;
+ path = alloc_path_for_send();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fs_path_reset(fs_path);
- ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
- if (ret < 0) {
- if (ret == -ENOENT && ino == ino2)
- ret = 0;
- goto out;
+ key.objectid = ino2;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (true) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ u32 cur_offset = 0;
+ u32 item_size;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ break;
+ continue;
}
- if (parent == ino1) {
- ret = parent_gen == ino1_gen ? 1 : 0;
- goto out;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != ino2)
+ break;
+ if (key.type != BTRFS_INODE_REF_KEY &&
+ key.type != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ item_size = btrfs_item_size_nr(leaf, slot);
+ while (cur_offset < item_size) {
+ u64 parent;
+ u64 parent_gen;
+
+ if (key.type == BTRFS_INODE_EXTREF_KEY) {
+ unsigned long ptr;
+ struct btrfs_inode_extref *extref;
+
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ extref = (struct btrfs_inode_extref *)
+ (ptr + cur_offset);
+ parent = btrfs_inode_extref_parent(leaf,
+ extref);
+ cur_offset += sizeof(*extref);
+ cur_offset += btrfs_inode_extref_name_len(leaf,
+ extref);
+ } else {
+ parent = key.offset;
+ cur_offset = item_size;
+ }
+
+ ret = get_inode_info(root, parent, NULL, &parent_gen,
+ NULL, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ret = check_ino_in_path(root, ino1, ino1_gen,
+ parent, parent_gen, fs_path);
+ if (ret)
+ goto out;
}
- ino = parent;
+ path->slots[0]++;
}
+ ret = 0;
out:
- if (free_path)
+ btrfs_free_path(path);
+ if (free_fs_path)
fs_path_free(fs_path);
return ret;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 65af029559b5..3a4dce153645 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -107,7 +107,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
return;
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
btrfs_info(fs_info, "forced readonly");
/*
* Note that a running device replace operation is not
@@ -137,7 +137,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
/*
* Special case: if the error is EROFS, and we're already
- * under MS_RDONLY, then it is safe here.
+ * under SB_RDONLY, then it is safe here.
*/
if (errno == -EROFS && sb_rdonly(sb))
return;
@@ -168,7 +168,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
/* Don't go through full error handling during mount */
- if (sb->s_flags & MS_BORN)
+ if (sb->s_flags & SB_BORN)
btrfs_handle_error(fs_info);
}
@@ -507,9 +507,18 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
token == Opt_compress_force ||
strncmp(args[0].from, "zlib", 4) == 0) {
compress_type = "zlib";
+
info->compress_type = BTRFS_COMPRESS_ZLIB;
- info->compress_level =
- btrfs_compress_str2level(args[0].from);
+ info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
+ /*
+ * args[0] contains uninitialized data since
+ * for these tokens we don't expect any
+ * parameter.
+ */
+ if (token != Opt_compress &&
+ token != Opt_compress_force)
+ info->compress_level =
+ btrfs_compress_str2level(args[0].from);
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
@@ -625,7 +634,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_acl:
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- info->sb->s_flags |= MS_POSIXACL;
+ info->sb->s_flags |= SB_POSIXACL;
break;
#else
btrfs_err(info, "support for ACL not compiled in!");
@@ -633,7 +642,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
goto out;
#endif
case Opt_noacl:
- info->sb->s_flags &= ~MS_POSIXACL;
+ info->sb->s_flags &= ~SB_POSIXACL;
break;
case Opt_notreelog:
btrfs_set_and_info(info, NOTREELOG,
@@ -851,7 +860,7 @@ check:
/*
* Extra check for current option against current flag
*/
- if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
+ if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {
btrfs_err(info,
"nologreplay must be used with ro mount option");
ret = -EINVAL;
@@ -1147,7 +1156,7 @@ static int btrfs_fill_super(struct super_block *sb,
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
#endif
sb->s_flags |= SB_I_VERSION;
sb->s_iflags |= SB_I_CGROUPWB;
@@ -1180,7 +1189,7 @@ static int btrfs_fill_super(struct super_block *sb,
}
cleancache_init_fs(sb);
- sb->s_flags |= MS_ACTIVE;
+ sb->s_flags |= SB_ACTIVE;
return 0;
fail_close:
@@ -1277,7 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",flushoncommit");
if (btrfs_test_opt(info, DISCARD))
seq_puts(seq, ",discard");
- if (!(info->sb->s_flags & MS_POSIXACL))
+ if (!(info->sb->s_flags & SB_POSIXACL))
seq_puts(seq, ",noacl");
if (btrfs_test_opt(info, SPACE_CACHE))
seq_puts(seq, ",space_cache");
@@ -1409,11 +1418,11 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
- if (flags & MS_RDONLY) {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
+ if (flags & SB_RDONLY) {
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,
device_name, newargs);
} else {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY,
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,
device_name, newargs);
if (IS_ERR(mnt)) {
root = ERR_CAST(mnt);
@@ -1565,7 +1574,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
u64 subvol_objectid = 0;
int error = 0;
- if (!(flags & MS_RDONLY))
+ if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
error = btrfs_parse_early_options(data, mode, fs_type,
@@ -1619,13 +1628,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (error)
goto error_fs_info;
- if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
+ if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
error = -EACCES;
goto error_close_devices;
}
bdev = fs_devices->latest_bdev;
- s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
+ s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
fs_info);
if (IS_ERR(s)) {
error = PTR_ERR(s);
@@ -1635,7 +1644,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (s->s_root) {
btrfs_close_devices(fs_devices);
free_fs_info(fs_info);
- if ((flags ^ s->s_flags) & MS_RDONLY)
+ if ((flags ^ s->s_flags) & SB_RDONLY)
error = -EBUSY;
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
@@ -1702,11 +1711,11 @@ static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
{
if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
- (flags & MS_RDONLY))) {
+ (flags & SB_RDONLY))) {
/* wait for any defraggers to finish */
wait_event(fs_info->transaction_wait,
(atomic_read(&fs_info->defrag_running) == 0));
- if (flags & MS_RDONLY)
+ if (flags & SB_RDONLY)
sync_filesystem(fs_info->sb);
}
}
@@ -1766,10 +1775,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
- if (*flags & MS_RDONLY) {
+ if (*flags & SB_RDONLY) {
/*
* this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy.
@@ -1781,10 +1790,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
/* avoid complains from lockdep et al. */
up(&fs_info->uuid_tree_rescan_sem);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
/*
- * Setting MS_RDONLY will put the cleaner thread to
+ * Setting SB_RDONLY will put the cleaner thread to
* sleep at the next loop if it's already active.
* If it's already asleep, we'll leave unused block
* groups on disk until we're mounted read-write again
@@ -1856,7 +1865,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
}
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
}
@@ -1866,9 +1875,9 @@ out:
return 0;
restore:
- /* We've hit an error - don't reset MS_RDONLY */
+ /* We've hit an error - don't reset SB_RDONLY */
if (sb_rdonly(sb))
- old_flags |= MS_RDONLY;
+ old_flags |= SB_RDONLY;
sb->s_flags = old_flags;
fs_info->mount_opt = old_opts;
fs_info->compress_type = old_compress_type;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index d06b1c931d05..2e7f64a3b22b 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -114,7 +114,7 @@ static int test_find_delalloc(u32 sectorsize)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL);
+ set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL);
start = 0;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -145,7 +145,7 @@ static int test_find_delalloc(u32 sectorsize)
test_msg("Couldn't find the locked page\n");
goto out_bits;
}
- set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL);
+ set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -200,7 +200,7 @@ static int test_find_delalloc(u32 sectorsize)
*
* We are re-using our test_start from above since it works out well.
*/
- set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL);
+ set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index f797642c013d..30affb60da51 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -968,7 +968,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
btrfs_test_inode_set_ops(inode);
/* [BTRFS_MAX_EXTENT_SIZE] */
- ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
+ ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,
NULL, 0);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
@@ -984,7 +984,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
- NULL, 0);
+ 0, NULL, 0);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1018,7 +1018,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1)
+ sectorsize - 1,
- NULL, 0);
+ 0, NULL, 0);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1036,7 +1036,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = btrfs_set_extent_delalloc(inode,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
- NULL, 0);
+ 0, NULL, 0);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1053,7 +1053,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
*/
ret = btrfs_set_extent_delalloc(inode,
BTRFS_MAX_EXTENT_SIZE + sectorsize,
- BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1089,7 +1089,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
*/
ret = btrfs_set_extent_delalloc(inode,
BTRFS_MAX_EXTENT_SIZE + sectorsize,
- BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 114fc5f0ecc5..ce4ed6ec8f39 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -242,7 +242,8 @@ static int check_leaf_item(struct btrfs_root *root,
return ret;
}
-int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
+static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
+ bool check_item_data)
{
struct btrfs_fs_info *fs_info = root->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
@@ -361,10 +362,15 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
return -EUCLEAN;
}
- /* Check if the item size and content meet other criteria */
- ret = check_leaf_item(root, leaf, &key, slot);
- if (ret < 0)
- return ret;
+ if (check_item_data) {
+ /*
+ * Check if the item size and content meet other
+ * criteria
+ */
+ ret = check_leaf_item(root, leaf, &key, slot);
+ if (ret < 0)
+ return ret;
+ }
prev_key.objectid = key.objectid;
prev_key.type = key.type;
@@ -374,6 +380,17 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
return 0;
}
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, true);
+}
+
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, false);
+}
+
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
{
unsigned long nr = btrfs_header_nritems(node);
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index 96c486e95d70..3d53e8d6fda0 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -20,7 +20,19 @@
#include "ctree.h"
#include "extent_io.h"
-int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf);
+/*
+ * Comprehensive leaf checker.
+ * Will check not only the item pointers, but also every possible member
+ * in item data.
+ */
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
+
+/*
+ * Less strict leaf checker.
+ * Will only check item pointers, not reading item data.
+ */
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf);
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index aa7c71cff575..7bf9b31561db 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4102,7 +4102,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (ordered_io_err) {
ctx->io_err = -EIO;
- return 0;
+ return ctx->io_err;
}
btrfs_init_map_token(&token);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f1ecb938ba4d..49810b70afd3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -189,6 +189,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
struct btrfs_device, dev_list);
list_del(&device->dev_list);
rcu_string_free(device->name);
+ bio_put(device->flush_bio);
kfree(device);
}
kfree(fs_devices);
@@ -578,6 +579,7 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
fs_devs->num_devices--;
list_del(&dev->dev_list);
rcu_string_free(dev->name);
+ bio_put(dev->flush_bio);
kfree(dev);
}
break;
@@ -630,6 +632,7 @@ static noinline int device_list_add(const char *path,
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
+ bio_put(device->flush_bio);
kfree(device);
return -ENOMEM;
}
@@ -742,6 +745,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
name = rcu_string_strdup(orig_dev->name->str,
GFP_KERNEL);
if (!name) {
+ bio_put(device->flush_bio);
kfree(device);
goto error;
}
@@ -807,6 +811,7 @@ again:
list_del_init(&device->dev_list);
fs_devices->num_devices--;
rcu_string_free(device->name);
+ bio_put(device->flush_bio);
kfree(device);
}
@@ -1750,20 +1755,24 @@ static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info,
key.offset = device->devid;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto out;
-
- if (ret > 0) {
- ret = -ENOENT;
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
goto out;
}
ret = btrfs_del_item(trans, root, path);
- if (ret)
- goto out;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ }
+
out:
btrfs_free_path(path);
- btrfs_commit_transaction(trans);
+ if (!ret)
+ ret = btrfs_commit_transaction(trans);
return ret;
}
@@ -1993,7 +2002,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
fs_devices = srcdev->fs_devices;
list_del_rcu(&srcdev->dev_list);
- list_del_rcu(&srcdev->dev_alloc_list);
+ list_del(&srcdev->dev_alloc_list);
fs_devices->num_devices--;
if (srcdev->missing)
fs_devices->missing_devices--;
@@ -2349,6 +2358,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
+ bio_put(device->flush_bio);
kfree(device);
ret = -ENOMEM;
goto error;
@@ -2358,6 +2368,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
rcu_string_free(device->name);
+ bio_put(device->flush_bio);
kfree(device);
ret = PTR_ERR(trans);
goto error;
@@ -2384,7 +2395,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
if (seeding_dev) {
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
ret = btrfs_prepare_sprout(fs_info);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -2497,10 +2508,11 @@ error_sysfs:
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
error_trans:
if (seeding_dev)
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
if (trans)
btrfs_end_transaction(trans);
rcu_string_free(device->name);
+ bio_put(device->flush_bio);
kfree(device);
error:
blkdev_put(bdev, FMODE_EXCL);
@@ -2567,6 +2579,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
+ bio_put(device->flush_bio);
kfree(device);
ret = -ENOMEM;
goto error;
@@ -6284,6 +6297,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
ret = find_next_devid(fs_info, &tmp);
if (ret) {
+ bio_put(dev->flush_bio);
kfree(dev);
return ERR_PTR(ret);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ff5d32cf9578..a14b2c974c9e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1160,7 +1160,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
struct ceph_inode_info *ci = cap->ci;
struct inode *inode = &ci->vfs_inode;
struct cap_msg_args arg;
- int held, revoking, dropping;
+ int held, revoking;
int wake = 0;
int delayed = 0;
int ret;
@@ -1168,7 +1168,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
held = cap->issued | cap->implemented;
revoking = cap->implemented & ~cap->issued;
retain &= ~revoking;
- dropping = cap->issued & ~retain;
dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n",
inode, cap, cap->session,
@@ -1712,7 +1711,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
/* if we are unmounting, flush any unused caps immediately. */
if (mdsc->stopping)
- is_delayed = 1;
+ is_delayed = true;
spin_lock(&ci->i_ceph_lock);
@@ -3189,8 +3188,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
int dirty = le32_to_cpu(m->dirty);
int cleaned = 0;
bool drop = false;
- bool wake_ci = 0;
- bool wake_mdsc = 0;
+ bool wake_ci = false;
+ bool wake_mdsc = false;
list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
if (cf->tid == flush_tid)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f2550a076edc..ab81652198c4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -493,6 +493,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_wb_ref = 0;
ci->i_wrbuffer_ref = 0;
ci->i_wrbuffer_ref_head = 0;
+ atomic_set(&ci->i_filelock_ref, 0);
ci->i_shared_gen = 0;
ci->i_rdcache_gen = 0;
ci->i_rdcache_revoking = 0;
@@ -786,7 +787,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
/* update inode */
ci->i_version = le64_to_cpu(info->version);
- inode->i_version++;
inode->i_rdev = le32_to_cpu(info->rdev);
inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
@@ -1185,6 +1185,7 @@ retry_lookup:
ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
+ ceph_dir_clear_ordered(dir);
d_delete(dn);
dput(dn);
goto retry_lookup;
@@ -1322,6 +1323,7 @@ retry_lookup:
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)),
ceph_vinop(in));
+ ceph_dir_clear_ordered(dir);
d_invalidate(dn);
have_lease = false;
}
@@ -1573,6 +1575,7 @@ retry_lookup:
ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
+ __ceph_dir_clear_ordered(ci);
d_delete(dn);
dput(dn);
goto retry_lookup;
@@ -1597,7 +1600,9 @@ retry_lookup:
&req->r_caps_reservation);
if (ret < 0) {
pr_err("fill_inode badness on %p\n", in);
- if (d_really_is_negative(dn))
+ if (d_really_is_positive(dn))
+ __ceph_dir_clear_ordered(ci);
+ else
iput(in);
d_drop(dn);
err = ret;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index e7cce412f2cf..9e66f69ee8a5 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -30,19 +30,52 @@ void __init ceph_flock_init(void)
get_random_bytes(&lock_secret, sizeof(lock_secret));
}
+static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
+{
+ struct inode *inode = file_inode(src->fl_file);
+ atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+}
+
+static void ceph_fl_release_lock(struct file_lock *fl)
+{
+ struct inode *inode = file_inode(fl->fl_file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ if (atomic_dec_and_test(&ci->i_filelock_ref)) {
+ /* clear error when all locks are released */
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
+ spin_unlock(&ci->i_ceph_lock);
+ }
+}
+
+static const struct file_lock_operations ceph_fl_lock_ops = {
+ .fl_copy_lock = ceph_fl_copy_lock,
+ .fl_release_private = ceph_fl_release_lock,
+};
+
/**
* Implement fcntl and flock locking functions.
*/
-static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
+static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
int cmd, u8 wait, struct file_lock *fl)
{
- struct inode *inode = file_inode(file);
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
int err;
u64 length = 0;
u64 owner;
+ if (operation == CEPH_MDS_OP_SETFILELOCK) {
+ /*
+ * increasing i_filelock_ref closes race window between
+ * handling request reply and adding file_lock struct to
+ * inode. Otherwise, auth caps may get trimmed in the
+ * window. Caller function will decrease the counter.
+ */
+ fl->fl_ops = &ceph_fl_lock_ops;
+ atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+ }
+
if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
wait = 0;
@@ -180,10 +213,12 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
*/
int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
{
- u8 lock_cmd;
- int err;
- u8 wait = 0;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int err = 0;
u16 op = CEPH_MDS_OP_SETFILELOCK;
+ u8 wait = 0;
+ u8 lock_cmd;
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
@@ -199,6 +234,26 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
else if (IS_SETLKW(cmd))
wait = 1;
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+ err = -EIO;
+ } else if (op == CEPH_MDS_OP_SETFILELOCK) {
+ /*
+ * increasing i_filelock_ref closes race window between
+ * handling request reply and adding file_lock struct to
+ * inode. Otherwise, i_auth_cap may get trimmed in the
+ * window. Caller function will decrease the counter.
+ */
+ fl->fl_ops = &ceph_fl_lock_ops;
+ atomic_inc(&ci->i_filelock_ref);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ if (err < 0) {
+ if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
+ posix_lock_file(file, fl, NULL);
+ return err;
+ }
+
if (F_RDLCK == fl->fl_type)
lock_cmd = CEPH_LOCK_SHARED;
else if (F_WRLCK == fl->fl_type)
@@ -206,16 +261,16 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
else
lock_cmd = CEPH_LOCK_UNLOCK;
- err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
+ err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
if (!err) {
- if (op != CEPH_MDS_OP_GETFILELOCK) {
+ if (op == CEPH_MDS_OP_SETFILELOCK) {
dout("mds locked, locking locally");
err = posix_lock_file(file, fl, NULL);
- if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+ if (err) {
/* undo! This should only happen if
* the kernel detects local
* deadlock. */
- ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on posix_lock_file, undid lock",
err);
@@ -227,9 +282,11 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
{
- u8 lock_cmd;
- int err;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int err = 0;
u8 wait = 0;
+ u8 lock_cmd;
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
@@ -239,6 +296,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
dout("ceph_flock, fl_file: %p", fl->fl_file);
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+ err = -EIO;
+ } else {
+ /* see comment in ceph_lock */
+ fl->fl_ops = &ceph_fl_lock_ops;
+ atomic_inc(&ci->i_filelock_ref);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ if (err < 0) {
+ if (F_UNLCK == fl->fl_type)
+ locks_lock_file_wait(file, fl);
+ return err;
+ }
+
if (IS_SETLKW(cmd))
wait = 1;
@@ -250,13 +322,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
lock_cmd = CEPH_LOCK_UNLOCK;
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
- file, lock_cmd, wait, fl);
+ inode, lock_cmd, wait, fl);
if (!err) {
err = locks_lock_file_wait(file, fl);
if (err) {
ceph_lock_message(CEPH_LOCK_FLOCK,
CEPH_MDS_OP_SETFILELOCK,
- file, CEPH_LOCK_UNLOCK, 0, fl);
+ inode, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on locks_lock_file_wait, undid lock", err);
}
}
@@ -288,6 +360,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
*flock_count, *fcntl_count);
}
+/*
+ * Given a pointer to a lock, convert it to a ceph filelock
+ */
+static int lock_to_ceph_filelock(struct file_lock *lock,
+ struct ceph_filelock *cephlock)
+{
+ int err = 0;
+ cephlock->start = cpu_to_le64(lock->fl_start);
+ cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
+ cephlock->client = cpu_to_le64(0);
+ cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
+ cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+
+ switch (lock->fl_type) {
+ case F_RDLCK:
+ cephlock->type = CEPH_LOCK_SHARED;
+ break;
+ case F_WRLCK:
+ cephlock->type = CEPH_LOCK_EXCL;
+ break;
+ case F_UNLCK:
+ cephlock->type = CEPH_LOCK_UNLOCK;
+ break;
+ default:
+ dout("Have unknown lock type %d", lock->fl_type);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
/**
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held.
@@ -356,50 +459,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
if (err)
goto out_fail;
- err = ceph_pagelist_append(pagelist, flocks,
- num_fcntl_locks * sizeof(*flocks));
- if (err)
- goto out_fail;
+ if (num_fcntl_locks > 0) {
+ err = ceph_pagelist_append(pagelist, flocks,
+ num_fcntl_locks * sizeof(*flocks));
+ if (err)
+ goto out_fail;
+ }
nlocks = cpu_to_le32(num_flock_locks);
err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
if (err)
goto out_fail;
- err = ceph_pagelist_append(pagelist,
- &flocks[num_fcntl_locks],
- num_flock_locks * sizeof(*flocks));
-out_fail:
- return err;
-}
-
-/*
- * Given a pointer to a lock, convert it to a ceph filelock
- */
-int lock_to_ceph_filelock(struct file_lock *lock,
- struct ceph_filelock *cephlock)
-{
- int err = 0;
- cephlock->start = cpu_to_le64(lock->fl_start);
- cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
- cephlock->client = cpu_to_le64(0);
- cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
- cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
-
- switch (lock->fl_type) {
- case F_RDLCK:
- cephlock->type = CEPH_LOCK_SHARED;
- break;
- case F_WRLCK:
- cephlock->type = CEPH_LOCK_EXCL;
- break;
- case F_UNLCK:
- cephlock->type = CEPH_LOCK_UNLOCK;
- break;
- default:
- dout("Have unknown lock type %d", lock->fl_type);
- err = -EINVAL;
+ if (num_flock_locks > 0) {
+ err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
+ num_flock_locks * sizeof(*flocks));
}
-
+out_fail:
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0687ab3c3267..ab69dcb70e8a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1039,22 +1039,23 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
* session caps
*/
-/* caller holds s_cap_lock, we drop it */
-static void cleanup_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
- __releases(session->s_cap_lock)
+static void detach_cap_releases(struct ceph_mds_session *session,
+ struct list_head *target)
{
- LIST_HEAD(tmp_list);
- list_splice_init(&session->s_cap_releases, &tmp_list);
+ lockdep_assert_held(&session->s_cap_lock);
+
+ list_splice_init(&session->s_cap_releases, target);
session->s_num_cap_releases = 0;
- spin_unlock(&session->s_cap_lock);
+ dout("dispose_cap_releases mds%d\n", session->s_mds);
+}
- dout("cleanup_cap_releases mds%d\n", session->s_mds);
- while (!list_empty(&tmp_list)) {
+static void dispose_cap_releases(struct ceph_mds_client *mdsc,
+ struct list_head *dispose)
+{
+ while (!list_empty(dispose)) {
struct ceph_cap *cap;
/* zero out the in-progress message */
- cap = list_first_entry(&tmp_list,
- struct ceph_cap, session_caps);
+ cap = list_first_entry(dispose, struct ceph_cap, session_caps);
list_del(&cap->session_caps);
ceph_put_cap(mdsc, cap);
}
@@ -1215,6 +1216,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
spin_unlock(&mdsc->cap_dirty_lock);
+ if (atomic_read(&ci->i_filelock_ref) > 0) {
+ /* make further file lock syscall return -EIO */
+ ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+ pr_warn_ratelimited(" dropping file locks for %p %lld\n",
+ inode, ceph_ino(inode));
+ }
+
if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
@@ -1244,6 +1252,8 @@ static void remove_session_caps(struct ceph_mds_session *session)
{
struct ceph_fs_client *fsc = session->s_mdsc->fsc;
struct super_block *sb = fsc->sb;
+ LIST_HEAD(dispose);
+
dout("remove_session_caps on %p\n", session);
iterate_session_caps(session, remove_session_caps_cb, fsc);
@@ -1278,10 +1288,12 @@ static void remove_session_caps(struct ceph_mds_session *session)
}
// drop cap expires and unlock s_cap_lock
- cleanup_cap_releases(session->s_mdsc, session);
+ detach_cap_releases(session, &dispose);
BUG_ON(session->s_nr_caps > 0);
BUG_ON(!list_empty(&session->s_cap_flushing));
+ spin_unlock(&session->s_cap_lock);
+ dispose_cap_releases(session->s_mdsc, &dispose);
}
/*
@@ -1462,6 +1474,11 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
goto out;
if ((used | wanted) & CEPH_CAP_ANY_WR)
goto out;
+ /* Note: it's possible that i_filelock_ref becomes non-zero
+ * after dropping auth caps. It doesn't hurt because reply
+ * of lock mds request will re-add auth caps. */
+ if (atomic_read(&ci->i_filelock_ref) > 0)
+ goto out;
}
/* The inode has cached pages, but it's no longer used.
* we can safely drop it */
@@ -2827,7 +2844,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_mds_cap_reconnect v2;
struct ceph_mds_cap_reconnect_v1 v1;
} rec;
- struct ceph_inode_info *ci;
+ struct ceph_inode_info *ci = cap->ci;
struct ceph_reconnect_state *recon_state = arg;
struct ceph_pagelist *pagelist = recon_state->pagelist;
char *path;
@@ -2836,8 +2853,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
u64 snap_follows;
struct dentry *dentry;
- ci = cap->ci;
-
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
inode, ceph_vinop(inode), cap, cap->cap_id,
ceph_cap_string(cap->issued));
@@ -2870,7 +2885,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v2.issued = cpu_to_le32(cap->issued);
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
rec.v2.pathbase = cpu_to_le64(pathbase);
- rec.v2.flock_len = 0;
+ rec.v2.flock_len = (__force __le32)
+ ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
@@ -2894,26 +2910,37 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
if (recon_state->msg_version >= 2) {
int num_fcntl_locks, num_flock_locks;
- struct ceph_filelock *flocks;
+ struct ceph_filelock *flocks = NULL;
size_t struct_len, total_len = 0;
u8 struct_v = 0;
encode_again:
- ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
- flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
- sizeof(struct ceph_filelock), GFP_NOFS);
- if (!flocks) {
- err = -ENOMEM;
- goto out_free;
+ if (rec.v2.flock_len) {
+ ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+ } else {
+ num_fcntl_locks = 0;
+ num_flock_locks = 0;
}
- err = ceph_encode_locks_to_buffer(inode, flocks,
- num_fcntl_locks,
- num_flock_locks);
- if (err) {
+ if (num_fcntl_locks + num_flock_locks > 0) {
+ flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
+ sizeof(struct ceph_filelock), GFP_NOFS);
+ if (!flocks) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ err = ceph_encode_locks_to_buffer(inode, flocks,
+ num_fcntl_locks,
+ num_flock_locks);
+ if (err) {
+ kfree(flocks);
+ flocks = NULL;
+ if (err == -ENOSPC)
+ goto encode_again;
+ goto out_free;
+ }
+ } else {
kfree(flocks);
- if (err == -ENOSPC)
- goto encode_again;
- goto out_free;
+ flocks = NULL;
}
if (recon_state->msg_version >= 3) {
@@ -2993,6 +3020,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
int s_nr_caps;
struct ceph_pagelist *pagelist;
struct ceph_reconnect_state recon_state;
+ LIST_HEAD(dispose);
pr_info("mds%d reconnect start\n", mds);
@@ -3026,7 +3054,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
*/
session->s_cap_reconnect = 1;
/* drop old cap expires; we're about to reestablish that state */
- cleanup_cap_releases(mdsc, session);
+ detach_cap_releases(session, &dispose);
+ spin_unlock(&session->s_cap_lock);
+ dispose_cap_releases(mdsc, &dispose);
/* trim unused caps to reduce MDS's cache rejoin time */
if (mdsc->fsc->sb->s_root)
@@ -3857,14 +3887,14 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
goto err_out;
}
return;
+
bad:
pr_err("error decoding fsmap\n");
err_out:
mutex_lock(&mdsc->mutex);
- mdsc->mdsmap_err = -ENOENT;
+ mdsc->mdsmap_err = err;
__wake_requests(mdsc, &mdsc->waiting_for_map);
mutex_unlock(&mdsc->mutex);
- return;
}
/*
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e4082afedcb1..a62d2a9841dc 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -84,8 +84,9 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_ffree = -1;
buf->f_namelen = NAME_MAX;
- /* leave fsid little-endian, regardless of host endianness */
- fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
+ /* Must convert the fsid, for consistent values across arches */
+ fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^
+ le64_to_cpu(*((__le64 *)&monmap->fsid + 1));
buf->f_fsid.val[0] = fsid & 0xffffffff;
buf->f_fsid.val[1] = fsid >> 32;
@@ -330,11 +331,11 @@ static int parse_fsopt_token(char *c, void *private)
break;
#ifdef CONFIG_CEPH_FS_POSIX_ACL
case Opt_acl:
- fsopt->sb_flags |= MS_POSIXACL;
+ fsopt->sb_flags |= SB_POSIXACL;
break;
#endif
case Opt_noacl:
- fsopt->sb_flags &= ~MS_POSIXACL;
+ fsopt->sb_flags &= ~SB_POSIXACL;
break;
default:
BUG_ON(token);
@@ -519,7 +520,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",nopoolperm");
#ifdef CONFIG_CEPH_FS_POSIX_ACL
- if (fsopt->sb_flags & MS_POSIXACL)
+ if (fsopt->sb_flags & SB_POSIXACL)
seq_puts(m, ",acl");
else
seq_puts(m, ",noacl");
@@ -987,7 +988,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
dout("ceph_mount\n");
#ifdef CONFIG_CEPH_FS_POSIX_ACL
- flags |= MS_POSIXACL;
+ flags |= SB_POSIXACL;
#endif
err = parse_mount_options(&fsopt, &opt, flags, data, dev_name);
if (err < 0) {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3e27a28aa44a..2beeec07fa76 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -352,6 +352,7 @@ struct ceph_inode_info {
int i_pin_ref;
int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
int i_wrbuffer_ref, i_wrbuffer_ref_head;
+ atomic_t i_filelock_ref;
u32 i_shared_gen; /* increment each time we get FILE_SHARED */
u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
@@ -487,6 +488,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
#define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */
#define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */
#define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK (1 << 12) /* have seen file lock errors */
+
/*
* We set the ERROR_WRITE bit when we start seeing write errors on an inode
@@ -1011,7 +1014,6 @@ extern int ceph_encode_locks_to_buffer(struct inode *inode,
extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
struct ceph_pagelist *pagelist,
int num_fcntl_locks, int num_flock_locks);
-extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
/* debugfs.c */
extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index cbd216b57239..350fa55a1bf7 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -42,7 +42,7 @@
#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */
#define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */
-#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */
+#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of SB_POSIXACL in mnt_cifs_flags */
#define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */
#define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */
#define CIFS_MOUNT_MAP_SFM_CHR 0x800000 /* SFM/MAC mapping for illegal chars */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8c8b75d33f31..31b7565b1617 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -125,7 +125,7 @@ cifs_read_super(struct super_block *sb)
tcon = cifs_sb_master_tcon(cifs_sb);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -497,7 +497,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",cifsacl");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
seq_puts(s, ",dynperm");
- if (root->d_sb->s_flags & MS_POSIXACL)
+ if (root->d_sb->s_flags & SB_POSIXACL)
seq_puts(s, ",acl");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
seq_puts(s, ",mfsymlinks");
@@ -573,7 +573,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
static int cifs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_NODIRATIME;
+ *flags |= SB_NODIRATIME;
return 0;
}
@@ -708,7 +708,7 @@ cifs_do_mount(struct file_system_type *fs_type,
rc = cifs_mount(cifs_sb, volume_info);
if (rc) {
- if (!(flags & MS_SILENT))
+ if (!(flags & SB_SILENT))
cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",
rc);
root = ERR_PTR(rc);
@@ -720,7 +720,7 @@ cifs_do_mount(struct file_system_type *fs_type,
mnt_data.flags = flags;
/* BB should we make this contingent on mount parm? */
- flags |= MS_NODIRATIME | MS_NOATIME;
+ flags |= SB_NODIRATIME | SB_NOATIME;
sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data);
if (IS_ERR(sb)) {
@@ -739,7 +739,7 @@ cifs_do_mount(struct file_system_type *fs_type,
goto out_super;
}
- sb->s_flags |= MS_ACTIVE;
+ sb->s_flags |= SB_ACTIVE;
}
root = cifs_get_root(volume_info, sb);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e185b2853eab..b16583594d1a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -559,8 +559,8 @@ struct smb_vol {
CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \
CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID)
-#define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \
- MS_NODEV | MS_SYNCHRONOUS)
+#define CIFS_MS_MASK (SB_RDONLY | SB_MANDLOCK | SB_NOEXEC | SB_NOSUID | \
+ SB_NODEV | SB_SYNCHRONOUS)
struct cifs_mnt_data {
struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7c732cb44164..ecb99079363a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -985,7 +985,7 @@ retry_iget5_locked:
}
cifs_fattr_to_inode(inode, fattr);
- if (sb->s_flags & MS_NOATIME)
+ if (sb->s_flags & SB_NOATIME)
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 52f975d848a0..316af84674f1 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -117,7 +117,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
#ifdef CONFIG_CIFS_POSIX
if (!value)
goto out;
- if (sb->s_flags & MS_POSIXACL)
+ if (sb->s_flags & SB_POSIXACL)
rc = CIFSSMBSetPosixACL(xid, pTcon, full_path,
value, (const int)size,
ACL_TYPE_ACCESS, cifs_sb->local_nls,
@@ -129,7 +129,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
#ifdef CONFIG_CIFS_POSIX
if (!value)
goto out;
- if (sb->s_flags & MS_POSIXACL)
+ if (sb->s_flags & SB_POSIXACL)
rc = CIFSSMBSetPosixACL(xid, pTcon, full_path,
value, (const int)size,
ACL_TYPE_DEFAULT, cifs_sb->local_nls,
@@ -266,7 +266,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
case XATTR_ACL_ACCESS:
#ifdef CONFIG_CIFS_POSIX
- if (sb->s_flags & MS_POSIXACL)
+ if (sb->s_flags & SB_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
value, size, ACL_TYPE_ACCESS,
cifs_sb->local_nls,
@@ -276,7 +276,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
case XATTR_ACL_DEFAULT:
#ifdef CONFIG_CIFS_POSIX
- if (sb->s_flags & MS_POSIXACL)
+ if (sb->s_flags & SB_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
value, size, ACL_TYPE_DEFAULT,
cifs_sb->local_nls,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 6f0a6a4d5faa..97424cf206c0 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -96,7 +96,7 @@ void coda_destroy_inodecache(void)
static int coda_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_NOATIME;
+ *flags |= SB_NOATIME;
return 0;
}
@@ -188,7 +188,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
mutex_unlock(&vc->vc_mutex);
sb->s_fs_info = vc;
- sb->s_flags |= MS_NOATIME;
+ sb->s_flags |= SB_NOATIME;
sb->s_blocksize = 4096; /* XXXXX what do we put here?? */
sb->s_blocksize_bits = 12;
sb->s_magic = CODA_SUPER_MAGIC;
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index a37f003530d7..1175a1722411 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -447,8 +447,7 @@ int venus_fsync(struct super_block *sb, struct CodaFid *fid)
UPARG(CODA_FSYNC);
inp->coda_fsync.VFid = *fid;
- error = coda_upcall(coda_vcp(sb), sizeof(union inputArgs),
- &outsize, inp);
+ error = coda_upcall(coda_vcp(sb), insize, &outsize, inp);
CODA_FREE(inp, insize);
return error;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index bd5d91e119ca..5fc5dc660600 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -54,8 +54,6 @@
#include <linux/if_tun.h>
#include <linux/ctype.h>
#include <linux/syscalls.h>
-#include <linux/i2c.h>
-#include <linux/i2c-dev.h>
#include <linux/atalk.h>
#include <linux/gfp.h>
#include <linux/cec.h>
@@ -137,22 +135,6 @@ static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return vfs_ioctl(file, cmd, arg);
}
-static int w_long(struct file *file,
- unsigned int cmd, compat_ulong_t __user *argp)
-{
- int err;
- unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
-
- if (valp == NULL)
- return -EFAULT;
- err = do_ioctl(file, cmd, (unsigned long)valp);
- if (err)
- return err;
- if (convert_in_user(valp, argp))
- return -EFAULT;
- return 0;
-}
-
struct compat_video_event {
int32_t type;
compat_time_t timestamp;
@@ -671,96 +653,6 @@ static int serial_struct_ioctl(struct file *file,
return err;
}
-/*
- * I2C layer ioctls
- */
-
-struct i2c_msg32 {
- u16 addr;
- u16 flags;
- u16 len;
- compat_caddr_t buf;
-};
-
-struct i2c_rdwr_ioctl_data32 {
- compat_caddr_t msgs; /* struct i2c_msg __user *msgs */
- u32 nmsgs;
-};
-
-struct i2c_smbus_ioctl_data32 {
- u8 read_write;
- u8 command;
- u32 size;
- compat_caddr_t data; /* union i2c_smbus_data *data */
-};
-
-struct i2c_rdwr_aligned {
- struct i2c_rdwr_ioctl_data cmd;
- struct i2c_msg msgs[0];
-};
-
-static int do_i2c_rdwr_ioctl(struct file *file,
- unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata)
-{
- struct i2c_rdwr_aligned __user *tdata;
- struct i2c_msg __user *tmsgs;
- struct i2c_msg32 __user *umsgs;
- compat_caddr_t datap;
- u32 nmsgs;
- int i;
-
- if (get_user(nmsgs, &udata->nmsgs))
- return -EFAULT;
- if (nmsgs > I2C_RDWR_IOCTL_MAX_MSGS)
- return -EINVAL;
-
- if (get_user(datap, &udata->msgs))
- return -EFAULT;
- umsgs = compat_ptr(datap);
-
- tdata = compat_alloc_user_space(sizeof(*tdata) +
- nmsgs * sizeof(struct i2c_msg));
- tmsgs = &tdata->msgs[0];
-
- if (put_user(nmsgs, &tdata->cmd.nmsgs) ||
- put_user(tmsgs, &tdata->cmd.msgs))
- return -EFAULT;
-
- for (i = 0; i < nmsgs; i++) {
- if (copy_in_user(&tmsgs[i].addr, &umsgs[i].addr, 3*sizeof(u16)))
- return -EFAULT;
- if (get_user(datap, &umsgs[i].buf) ||
- put_user(compat_ptr(datap), &tmsgs[i].buf))
- return -EFAULT;
- }
- return do_ioctl(file, cmd, (unsigned long)tdata);
-}
-
-static int do_i2c_smbus_ioctl(struct file *file,
- unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata)
-{
- struct i2c_smbus_ioctl_data __user *tdata;
- union {
- /* beginnings of those have identical layouts */
- struct i2c_smbus_ioctl_data32 data32;
- struct i2c_smbus_ioctl_data data;
- } v;
-
- tdata = compat_alloc_user_space(sizeof(*tdata));
- if (tdata == NULL)
- return -ENOMEM;
-
- memset(&v, 0, sizeof(v));
- if (copy_from_user(&v.data32, udata, sizeof(v.data32)))
- return -EFAULT;
- v.data.data = compat_ptr(v.data32.data);
-
- if (copy_to_user(tdata, &v.data, sizeof(v.data)))
- return -EFAULT;
-
- return do_ioctl(file, cmd, (unsigned long)tdata);
-}
-
#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t)
#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
@@ -1283,13 +1175,6 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
-/* i2c */
-COMPATIBLE_IOCTL(I2C_SLAVE)
-COMPATIBLE_IOCTL(I2C_SLAVE_FORCE)
-COMPATIBLE_IOCTL(I2C_TENBIT)
-COMPATIBLE_IOCTL(I2C_PEC)
-COMPATIBLE_IOCTL(I2C_RETRIES)
-COMPATIBLE_IOCTL(I2C_TIMEOUT)
/* hiddev */
COMPATIBLE_IOCTL(HIDIOCGVERSION)
COMPATIBLE_IOCTL(HIDIOCAPPLICATION)
@@ -1464,13 +1349,6 @@ static long do_ioctl_trans(unsigned int cmd,
case TIOCGSERIAL:
case TIOCSSERIAL:
return serial_struct_ioctl(file, cmd, argp);
- /* i2c */
- case I2C_FUNCS:
- return w_long(file, cmd, argp);
- case I2C_RDWR:
- return do_i2c_rdwr_ioctl(file, cmd, argp);
- case I2C_SMBUS:
- return do_i2c_smbus_ioctl(file, cmd, argp);
/* Not implemented in the native kernel */
case RTC_IRQP_READ32:
case RTC_IRQP_SET32:
@@ -1580,6 +1458,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
case FICLONE:
case FICLONERANGE:
case FIDEDUPERANGE:
+ case FS_IOC_FIEMAP:
goto do_ioctl;
case FIBMAP:
diff --git a/fs/coredump.c b/fs/coredump.c
index 52c63d6c9143..1e2c87acac9b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -680,16 +680,11 @@ void do_coredump(const siginfo_t *siginfo)
* privs and don't want to unlink another user's coredump.
*/
if (!need_suid_safe) {
- mm_segment_t old_fs;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
/*
* If it doesn't exist, that's fine. If there's some
* other problem, we'll catch it at the filp_open().
*/
- (void) sys_unlink((const char __user *)cn.corename);
- set_fs(old_fs);
+ do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
}
/*
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index 11b29d491b7c..f937082f3244 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -1,6 +1,5 @@
config CRAMFS
- tristate "Compressed ROM file system support (cramfs) (OBSOLETE)"
- depends on BLOCK
+ tristate "Compressed ROM file system support (cramfs)"
select ZLIB_INFLATE
help
Saying Y here includes support for CramFs (Compressed ROM File
@@ -16,7 +15,39 @@ config CRAMFS
cramfs. Note that the root file system (the one containing the
directory /) cannot be compiled as a module.
- This filesystem is obsoleted by SquashFS, which is much better
- in terms of performance and features.
+ This filesystem is limited in capabilities and performance on
+ purpose to remain small and low on RAM usage. It is most suitable
+ for small embedded systems. If you have ample RAM to spare, you may
+ consider a more capable compressed filesystem such as SquashFS
+ which is much better in terms of performance and features.
+
+ If unsure, say N.
+
+config CRAMFS_BLOCKDEV
+ bool "Support CramFs image over a regular block device" if EXPERT
+ depends on CRAMFS && BLOCK
+ default y
+ help
+ This option allows the CramFs driver to load data from a regular
+ block device such a disk partition or a ramdisk.
+
+config CRAMFS_MTD
+ bool "Support CramFs image directly mapped in physical memory"
+ depends on CRAMFS && MTD
+ default y if !CRAMFS_BLOCKDEV
+ help
+ This option allows the CramFs driver to load data directly from
+ a linear adressed memory range (usually non volatile memory
+ like flash) instead of going through the block device layer.
+ This saves some memory since no intermediate buffering is
+ necessary.
+
+ The location of the CramFs image is determined by a
+ MTD device capable of direct memory mapping e.g. from
+ the 'physmap' map driver or a resulting MTD partition.
+ For example, this would mount the cramfs image stored in
+ the MTD partition named "xip_fs" on the /mnt mountpoint:
+
+ mount -t cramfs mtd:xip_fs /mnt
If unsure, say N.
diff --git a/fs/cramfs/README b/fs/cramfs/README
index 9d4e7ea311f4..d71b27e0ff15 100644
--- a/fs/cramfs/README
+++ b/fs/cramfs/README
@@ -49,17 +49,46 @@ same as the start of the (i+1)'th <block> if there is one). The first
<block> immediately follows the last <block_pointer> for the file.
<block_pointer>s are each 32 bits long.
+When the CRAMFS_FLAG_EXT_BLOCK_POINTERS capability bit is set, each
+<block_pointer>'s top bits may contain special flags as follows:
+
+CRAMFS_BLK_FLAG_UNCOMPRESSED (bit 31):
+ The block data is not compressed and should be copied verbatim.
+
+CRAMFS_BLK_FLAG_DIRECT_PTR (bit 30):
+ The <block_pointer> stores the actual block start offset and not
+ its end, shifted right by 2 bits. The block must therefore be
+ aligned to a 4-byte boundary. The block size is either blksize
+ if CRAMFS_BLK_FLAG_UNCOMPRESSED is also specified, otherwise
+ the compressed data length is included in the first 2 bytes of
+ the block data. This is used to allow discontiguous data layout
+ and specific data block alignments e.g. for XIP applications.
+
+
The order of <file_data>'s is a depth-first descent of the directory
tree, i.e. the same order as `find -size +0 \( -type f -o -type l \)
-print'.
<block>: The i'th <block> is the output of zlib's compress function
-applied to the i'th blksize-sized chunk of the input data.
+applied to the i'th blksize-sized chunk of the input data if the
+corresponding CRAMFS_BLK_FLAG_UNCOMPRESSED <block_ptr> bit is not set,
+otherwise it is the input data directly.
(For the last <block> of the file, the input may of course be smaller.)
Each <block> may be a different size. (See <block_pointer> above.)
+
<block>s are merely byte-aligned, not generally u32-aligned.
+When CRAMFS_BLK_FLAG_DIRECT_PTR is specified then the corresponding
+<block> may be located anywhere and not necessarily contiguous with
+the previous/next blocks. In that case it is minimally u32-aligned.
+If CRAMFS_BLK_FLAG_UNCOMPRESSED is also specified then the size is always
+blksize except for the last block which is limited by the file length.
+If CRAMFS_BLK_FLAG_DIRECT_PTR is set and CRAMFS_BLK_FLAG_UNCOMPRESSED
+is not set then the first 2 bytes of the block contains the size of the
+remaining block data as this cannot be determined from the placement of
+logically adjacent blocks.
+
Holes
-----
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 7919967488cb..017b0ab19bc4 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -15,10 +15,15 @@
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/file.h>
#include <linux/pagemap.h>
+#include <linux/pfn_t.h>
+#include <linux/ramfs.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/blkdev.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/super.h>
#include <linux/slab.h>
#include <linux/vfs.h>
#include <linux/mutex.h>
@@ -36,6 +41,9 @@ struct cramfs_sb_info {
unsigned long blocks;
unsigned long files;
unsigned long flags;
+ void *linear_virt_addr;
+ resource_size_t linear_phys_addr;
+ size_t mtd_point_size;
};
static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb)
@@ -46,6 +54,7 @@ static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb)
static const struct super_operations cramfs_ops;
static const struct inode_operations cramfs_dir_inode_operations;
static const struct file_operations cramfs_directory_operations;
+static const struct file_operations cramfs_physmem_fops;
static const struct address_space_operations cramfs_aops;
static DEFINE_MUTEX(read_mutex);
@@ -93,6 +102,10 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
case S_IFREG:
inode->i_fop = &generic_ro_fops;
inode->i_data.a_ops = &cramfs_aops;
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD) &&
+ CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS &&
+ CRAMFS_SB(sb)->linear_phys_addr)
+ inode->i_fop = &cramfs_physmem_fops;
break;
case S_IFDIR:
inode->i_op = &cramfs_dir_inode_operations;
@@ -140,6 +153,9 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
* BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to
* worry about end-of-buffer issues even when decompressing a full
* page cache.
+ *
+ * Note: This is all optimized away at compile time when
+ * CONFIG_CRAMFS_BLOCKDEV=n.
*/
#define READ_BUFFERS (2)
/* NEXT_BUFFER(): Loop over [0..(READ_BUFFERS-1)]. */
@@ -160,10 +176,10 @@ static struct super_block *buffer_dev[READ_BUFFERS];
static int next_buffer;
/*
- * Returns a pointer to a buffer containing at least LEN bytes of
- * filesystem starting at byte offset OFFSET into the filesystem.
+ * Populate our block cache and return a pointer to it.
*/
-static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len)
+static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset,
+ unsigned int len)
{
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct page *pages[BLKS_PER_BUF];
@@ -239,49 +255,278 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
return read_buffers[buffer] + offset;
}
+/*
+ * Return a pointer to the linearly addressed cramfs image in memory.
+ */
+static void *cramfs_direct_read(struct super_block *sb, unsigned int offset,
+ unsigned int len)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
+ if (!len)
+ return NULL;
+ if (len > sbi->size || offset > sbi->size - len)
+ return page_address(ZERO_PAGE(0));
+ return sbi->linear_virt_addr + offset;
+}
+
+/*
+ * Returns a pointer to a buffer containing at least LEN bytes of
+ * filesystem starting at byte offset OFFSET into the filesystem.
+ */
+static void *cramfs_read(struct super_block *sb, unsigned int offset,
+ unsigned int len)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sbi->linear_virt_addr)
+ return cramfs_direct_read(sb, offset, len);
+ else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV))
+ return cramfs_blkdev_read(sb, offset, len);
+ else
+ return NULL;
+}
+
+/*
+ * For a mapping to be possible, we need a range of uncompressed and
+ * contiguous blocks. Return the offset for the first block and number of
+ * valid blocks for which that is true, or zero otherwise.
+ */
+static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb);
+ int i;
+ u32 *blockptrs, first_block_addr;
+
+ /*
+ * We can dereference memory directly here as this code may be
+ * reached only when there is a direct filesystem image mapping
+ * available in memory.
+ */
+ blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode) + pgoff * 4);
+ first_block_addr = blockptrs[0] & ~CRAMFS_BLK_FLAGS;
+ i = 0;
+ do {
+ u32 block_off = i * (PAGE_SIZE >> CRAMFS_BLK_DIRECT_PTR_SHIFT);
+ u32 expect = (first_block_addr + block_off) |
+ CRAMFS_BLK_FLAG_DIRECT_PTR |
+ CRAMFS_BLK_FLAG_UNCOMPRESSED;
+ if (blockptrs[i] != expect) {
+ pr_debug("range: block %d/%d got %#x expects %#x\n",
+ pgoff+i, pgoff + *pages - 1,
+ blockptrs[i], expect);
+ if (i == 0)
+ return 0;
+ break;
+ }
+ } while (++i < *pages);
+
+ *pages = i;
+ return first_block_addr << CRAMFS_BLK_DIRECT_PTR_SHIFT;
+}
+
+#ifdef CONFIG_MMU
+
+/*
+ * Return true if the last page of a file in the filesystem image contains
+ * some other data that doesn't belong to that file. It is assumed that the
+ * last block is CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED
+ * (verified by cramfs_get_block_range() and directly accessible in memory.
+ */
+static bool cramfs_last_page_is_shared(struct inode *inode)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb);
+ u32 partial, last_page, blockaddr, *blockptrs;
+ char *tail_data;
+
+ partial = offset_in_page(inode->i_size);
+ if (!partial)
+ return false;
+ last_page = inode->i_size >> PAGE_SHIFT;
+ blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode));
+ blockaddr = blockptrs[last_page] & ~CRAMFS_BLK_FLAGS;
+ blockaddr <<= CRAMFS_BLK_DIRECT_PTR_SHIFT;
+ tail_data = sbi->linear_virt_addr + blockaddr + partial;
+ return memchr_inv(tail_data, 0, PAGE_SIZE - partial) ? true : false;
+}
+
+static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *inode = file_inode(file);
+ struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb);
+ unsigned int pages, max_pages, offset;
+ unsigned long address, pgoff = vma->vm_pgoff;
+ char *bailout_reason;
+ int ret;
+
+ ret = generic_file_readonly_mmap(file, vma);
+ if (ret)
+ return ret;
+
+ /*
+ * Now try to pre-populate ptes for this vma with a direct
+ * mapping avoiding memory allocation when possible.
+ */
+
+ /* Could COW work here? */
+ bailout_reason = "vma is writable";
+ if (vma->vm_flags & VM_WRITE)
+ goto bailout;
+
+ max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ bailout_reason = "beyond file limit";
+ if (pgoff >= max_pages)
+ goto bailout;
+ pages = min(vma_pages(vma), max_pages - pgoff);
+
+ offset = cramfs_get_block_range(inode, pgoff, &pages);
+ bailout_reason = "unsuitable block layout";
+ if (!offset)
+ goto bailout;
+ address = sbi->linear_phys_addr + offset;
+ bailout_reason = "data is not page aligned";
+ if (!PAGE_ALIGNED(address))
+ goto bailout;
+
+ /* Don't map the last page if it contains some other data */
+ if (pgoff + pages == max_pages && cramfs_last_page_is_shared(inode)) {
+ pr_debug("mmap: %s: last page is shared\n",
+ file_dentry(file)->d_name.name);
+ pages--;
+ }
+
+ if (!pages) {
+ bailout_reason = "no suitable block remaining";
+ goto bailout;
+ }
+
+ if (pages == vma_pages(vma)) {
+ /*
+ * The entire vma is mappable. remap_pfn_range() will
+ * make it distinguishable from a non-direct mapping
+ * in /proc/<pid>/maps by substituting the file offset
+ * with the actual physical address.
+ */
+ ret = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
+ pages * PAGE_SIZE, vma->vm_page_prot);
+ } else {
+ /*
+ * Let's create a mixed map if we can't map it all.
+ * The normal paging machinery will take care of the
+ * unpopulated ptes via cramfs_readpage().
+ */
+ int i;
+ vma->vm_flags |= VM_MIXEDMAP;
+ for (i = 0; i < pages && !ret; i++) {
+ unsigned long off = i * PAGE_SIZE;
+ pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV);
+ ret = vm_insert_mixed(vma, vma->vm_start + off, pfn);
+ }
+ }
+
+ if (!ret)
+ pr_debug("mapped %s[%lu] at 0x%08lx (%u/%lu pages) "
+ "to vma 0x%08lx, page_prot 0x%llx\n",
+ file_dentry(file)->d_name.name, pgoff,
+ address, pages, vma_pages(vma), vma->vm_start,
+ (unsigned long long)pgprot_val(vma->vm_page_prot));
+ return ret;
+
+bailout:
+ pr_debug("%s[%lu]: direct mmap impossible: %s\n",
+ file_dentry(file)->d_name.name, pgoff, bailout_reason);
+ /* Didn't manage any direct map, but normal paging is still possible */
+ return 0;
+}
+
+#else /* CONFIG_MMU */
+
+static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
+}
+
+static unsigned long cramfs_physmem_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+ unsigned int pages, block_pages, max_pages, offset;
+
+ pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (pgoff >= max_pages || pages > max_pages - pgoff)
+ return -EINVAL;
+ block_pages = pages;
+ offset = cramfs_get_block_range(inode, pgoff, &block_pages);
+ if (!offset || block_pages != pages)
+ return -ENOSYS;
+ addr = sbi->linear_phys_addr + offset;
+ pr_debug("get_unmapped for %s ofs %#lx siz %lu at 0x%08lx\n",
+ file_dentry(file)->d_name.name, pgoff*PAGE_SIZE, len, addr);
+ return addr;
+}
+
+static unsigned int cramfs_physmem_mmap_capabilities(struct file *file)
+{
+ return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT |
+ NOMMU_MAP_READ | NOMMU_MAP_EXEC;
+}
+
+#endif /* CONFIG_MMU */
+
+static const struct file_operations cramfs_physmem_fops = {
+ .llseek = generic_file_llseek,
+ .read_iter = generic_file_read_iter,
+ .splice_read = generic_file_splice_read,
+ .mmap = cramfs_physmem_mmap,
+#ifndef CONFIG_MMU
+ .get_unmapped_area = cramfs_physmem_get_unmapped_area,
+ .mmap_capabilities = cramfs_physmem_mmap_capabilities,
+#endif
+};
+
static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
- kill_block_super(sb);
+ if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) {
+ if (sbi && sbi->mtd_point_size)
+ mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
+ kill_mtd_super(sb);
+ } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
+ kill_block_super(sb);
+ }
kfree(sbi);
}
static int cramfs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
-static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
+static int cramfs_read_super(struct super_block *sb,
+ struct cramfs_super *super, int silent)
{
- int i;
- struct cramfs_super super;
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
unsigned long root_offset;
- struct cramfs_sb_info *sbi;
- struct inode *root;
-
- sb->s_flags |= MS_RDONLY;
-
- sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
- sb->s_fs_info = sbi;
- /* Invalidate the read buffers on mount: think disk change.. */
- mutex_lock(&read_mutex);
- for (i = 0; i < READ_BUFFERS; i++)
- buffer_blocknr[i] = -1;
+ /* We don't know the real size yet */
+ sbi->size = PAGE_SIZE;
/* Read the first block and get the superblock from it */
- memcpy(&super, cramfs_read(sb, 0, sizeof(super)), sizeof(super));
+ mutex_lock(&read_mutex);
+ memcpy(super, cramfs_read(sb, 0, sizeof(*super)), sizeof(*super));
mutex_unlock(&read_mutex);
/* Do sanity checks on the superblock */
- if (super.magic != CRAMFS_MAGIC) {
+ if (super->magic != CRAMFS_MAGIC) {
/* check for wrong endianness */
- if (super.magic == CRAMFS_MAGIC_WEND) {
+ if (super->magic == CRAMFS_MAGIC_WEND) {
if (!silent)
pr_err("wrong endianness\n");
return -EINVAL;
@@ -289,10 +534,12 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
/* check at 512 byte offset */
mutex_lock(&read_mutex);
- memcpy(&super, cramfs_read(sb, 512, sizeof(super)), sizeof(super));
+ memcpy(super,
+ cramfs_read(sb, 512, sizeof(*super)),
+ sizeof(*super));
mutex_unlock(&read_mutex);
- if (super.magic != CRAMFS_MAGIC) {
- if (super.magic == CRAMFS_MAGIC_WEND && !silent)
+ if (super->magic != CRAMFS_MAGIC) {
+ if (super->magic == CRAMFS_MAGIC_WEND && !silent)
pr_err("wrong endianness\n");
else if (!silent)
pr_err("wrong magic\n");
@@ -301,34 +548,34 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
}
/* get feature flags first */
- if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
+ if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) {
pr_err("unsupported filesystem features\n");
return -EINVAL;
}
/* Check that the root inode is in a sane state */
- if (!S_ISDIR(super.root.mode)) {
+ if (!S_ISDIR(super->root.mode)) {
pr_err("root is not a directory\n");
return -EINVAL;
}
/* correct strange, hard-coded permissions of mkcramfs */
- super.root.mode |= (S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+ super->root.mode |= 0555;
- root_offset = super.root.offset << 2;
- if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
- sbi->size = super.size;
- sbi->blocks = super.fsid.blocks;
- sbi->files = super.fsid.files;
+ root_offset = super->root.offset << 2;
+ if (super->flags & CRAMFS_FLAG_FSID_VERSION_2) {
+ sbi->size = super->size;
+ sbi->blocks = super->fsid.blocks;
+ sbi->files = super->fsid.files;
} else {
sbi->size = 1<<28;
sbi->blocks = 0;
sbi->files = 0;
}
- sbi->magic = super.magic;
- sbi->flags = super.flags;
+ sbi->magic = super->magic;
+ sbi->flags = super->flags;
if (root_offset == 0)
pr_info("empty filesystem");
- else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
+ else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
((root_offset != sizeof(struct cramfs_super)) &&
(root_offset != 512 + sizeof(struct cramfs_super))))
{
@@ -336,9 +583,18 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
return -EINVAL;
}
+ return 0;
+}
+
+static int cramfs_finalize_super(struct super_block *sb,
+ struct cramfs_inode *cramfs_root)
+{
+ struct inode *root;
+
/* Set it all up.. */
+ sb->s_flags |= SB_RDONLY;
sb->s_op = &cramfs_ops;
- root = get_cramfs_inode(sb, &super.root, 0);
+ root = get_cramfs_inode(sb, cramfs_root, 0);
if (IS_ERR(root))
return PTR_ERR(root);
sb->s_root = d_make_root(root);
@@ -347,10 +603,79 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
+static int cramfs_blkdev_fill_super(struct super_block *sb, void *data,
+ int silent)
+{
+ struct cramfs_sb_info *sbi;
+ struct cramfs_super super;
+ int i, err;
+
+ sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+ sb->s_fs_info = sbi;
+
+ /* Invalidate the read buffers on mount: think disk change.. */
+ for (i = 0; i < READ_BUFFERS; i++)
+ buffer_blocknr[i] = -1;
+
+ err = cramfs_read_super(sb, &super, silent);
+ if (err)
+ return err;
+ return cramfs_finalize_super(sb, &super.root);
+}
+
+static int cramfs_mtd_fill_super(struct super_block *sb, void *data,
+ int silent)
+{
+ struct cramfs_sb_info *sbi;
+ struct cramfs_super super;
+ int err;
+
+ sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+ sb->s_fs_info = sbi;
+
+ /* Map only one page for now. Will remap it when fs size is known. */
+ err = mtd_point(sb->s_mtd, 0, PAGE_SIZE, &sbi->mtd_point_size,
+ &sbi->linear_virt_addr, &sbi->linear_phys_addr);
+ if (err || sbi->mtd_point_size != PAGE_SIZE) {
+ pr_err("unable to get direct memory access to mtd:%s\n",
+ sb->s_mtd->name);
+ return err ? : -ENODATA;
+ }
+
+ pr_info("checking physical address %pap for linear cramfs image\n",
+ &sbi->linear_phys_addr);
+ err = cramfs_read_super(sb, &super, silent);
+ if (err)
+ return err;
+
+ /* Remap the whole filesystem now */
+ pr_info("linear cramfs image on mtd:%s appears to be %lu KB in size\n",
+ sb->s_mtd->name, sbi->size/1024);
+ mtd_unpoint(sb->s_mtd, 0, PAGE_SIZE);
+ err = mtd_point(sb->s_mtd, 0, sbi->size, &sbi->mtd_point_size,
+ &sbi->linear_virt_addr, &sbi->linear_phys_addr);
+ if (err || sbi->mtd_point_size != sbi->size) {
+ pr_err("unable to get direct memory access to mtd:%s\n",
+ sb->s_mtd->name);
+ return err ? : -ENODATA;
+ }
+
+ return cramfs_finalize_super(sb, &super.root);
+}
+
static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
- u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ u64 id = 0;
+
+ if (sb->s_bdev)
+ id = huge_encode_dev(sb->s_bdev->bd_dev);
+ else if (sb->s_dev)
+ id = huge_encode_dev(sb->s_dev);
buf->f_type = CRAMFS_MAGIC;
buf->f_bsize = PAGE_SIZE;
@@ -502,34 +827,86 @@ static int cramfs_readpage(struct file *file, struct page *page)
if (page->index < maxblock) {
struct super_block *sb = inode->i_sb;
- u32 blkptr_offset = OFFSET(inode) + page->index*4;
- u32 start_offset, compr_len;
+ u32 blkptr_offset = OFFSET(inode) + page->index * 4;
+ u32 block_ptr, block_start, block_len;
+ bool uncompressed, direct;
- start_offset = OFFSET(inode) + maxblock*4;
mutex_lock(&read_mutex);
- if (page->index)
- start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4,
- 4);
- compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) -
- start_offset);
- mutex_unlock(&read_mutex);
+ block_ptr = *(u32 *) cramfs_read(sb, blkptr_offset, 4);
+ uncompressed = (block_ptr & CRAMFS_BLK_FLAG_UNCOMPRESSED);
+ direct = (block_ptr & CRAMFS_BLK_FLAG_DIRECT_PTR);
+ block_ptr &= ~CRAMFS_BLK_FLAGS;
+
+ if (direct) {
+ /*
+ * The block pointer is an absolute start pointer,
+ * shifted by 2 bits. The size is included in the
+ * first 2 bytes of the data block when compressed,
+ * or PAGE_SIZE otherwise.
+ */
+ block_start = block_ptr << CRAMFS_BLK_DIRECT_PTR_SHIFT;
+ if (uncompressed) {
+ block_len = PAGE_SIZE;
+ /* if last block: cap to file length */
+ if (page->index == maxblock - 1)
+ block_len =
+ offset_in_page(inode->i_size);
+ } else {
+ block_len = *(u16 *)
+ cramfs_read(sb, block_start, 2);
+ block_start += 2;
+ }
+ } else {
+ /*
+ * The block pointer indicates one past the end of
+ * the current block (start of next block). If this
+ * is the first block then it starts where the block
+ * pointer table ends, otherwise its start comes
+ * from the previous block's pointer.
+ */
+ block_start = OFFSET(inode) + maxblock * 4;
+ if (page->index)
+ block_start = *(u32 *)
+ cramfs_read(sb, blkptr_offset - 4, 4);
+ /* Beware... previous ptr might be a direct ptr */
+ if (unlikely(block_start & CRAMFS_BLK_FLAG_DIRECT_PTR)) {
+ /* See comments on earlier code. */
+ u32 prev_start = block_start;
+ block_start = prev_start & ~CRAMFS_BLK_FLAGS;
+ block_start <<= CRAMFS_BLK_DIRECT_PTR_SHIFT;
+ if (prev_start & CRAMFS_BLK_FLAG_UNCOMPRESSED) {
+ block_start += PAGE_SIZE;
+ } else {
+ block_len = *(u16 *)
+ cramfs_read(sb, block_start, 2);
+ block_start += 2 + block_len;
+ }
+ }
+ block_start &= ~CRAMFS_BLK_FLAGS;
+ block_len = block_ptr - block_start;
+ }
- if (compr_len == 0)
+ if (block_len == 0)
; /* hole */
- else if (unlikely(compr_len > (PAGE_SIZE << 1))) {
- pr_err("bad compressed blocksize %u\n",
- compr_len);
+ else if (unlikely(block_len > 2*PAGE_SIZE ||
+ (uncompressed && block_len > PAGE_SIZE))) {
+ mutex_unlock(&read_mutex);
+ pr_err("bad data blocksize %u\n", block_len);
goto err;
+ } else if (uncompressed) {
+ memcpy(pgdata,
+ cramfs_read(sb, block_start, block_len),
+ block_len);
+ bytes_filled = block_len;
} else {
- mutex_lock(&read_mutex);
bytes_filled = cramfs_uncompress_block(pgdata,
PAGE_SIZE,
- cramfs_read(sb, start_offset, compr_len),
- compr_len);
- mutex_unlock(&read_mutex);
- if (unlikely(bytes_filled < 0))
- goto err;
+ cramfs_read(sb, block_start, block_len),
+ block_len);
}
+ mutex_unlock(&read_mutex);
+ if (unlikely(bytes_filled < 0))
+ goto err;
}
memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
@@ -573,10 +950,22 @@ static const struct super_operations cramfs_ops = {
.statfs = cramfs_statfs,
};
-static struct dentry *cramfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static struct dentry *cramfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
+ struct dentry *ret = ERR_PTR(-ENOPROTOOPT);
+
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD)) {
+ ret = mount_mtd(fs_type, flags, dev_name, data,
+ cramfs_mtd_fill_super);
+ if (!IS_ERR(ret))
+ return ret;
+ }
+ if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) {
+ ret = mount_bdev(fs_type, flags, dev_name, data,
+ cramfs_blkdev_fill_super);
+ }
+ return ret;
}
static struct file_system_type cramfs_fs_type = {
diff --git a/fs/dax.c b/fs/dax.c
index 3652b26a0048..78b72c48374e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -526,13 +526,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
static void *dax_insert_mapping_entry(struct address_space *mapping,
struct vm_fault *vmf,
void *entry, sector_t sector,
- unsigned long flags)
+ unsigned long flags, bool dirty)
{
struct radix_tree_root *page_tree = &mapping->page_tree;
void *new_entry;
pgoff_t index = vmf->pgoff;
- if (vmf->flags & FAULT_FLAG_WRITE)
+ if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
@@ -569,7 +569,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
entry = new_entry;
}
- if (vmf->flags & FAULT_FLAG_WRITE)
+ if (dirty)
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&mapping->tree_lock);
@@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
if (pfn != pmd_pfn(*pmdp))
goto unlock_pmd;
- if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
+ if (!pmd_dirty(*pmdp)
+ && !pmd_access_permitted(*pmdp, WRITE))
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
@@ -825,38 +826,42 @@ out:
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
-static int dax_insert_mapping(struct address_space *mapping,
- struct block_device *bdev, struct dax_device *dax_dev,
- sector_t sector, size_t size, void *entry,
- struct vm_area_struct *vma, struct vm_fault *vmf)
+static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
- unsigned long vaddr = vmf->address;
- void *ret, *kaddr;
+ return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
+}
+
+static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
+ pfn_t *pfnp)
+{
+ const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
+ void *kaddr;
int id, rc;
- pfn_t pfn;
+ long length;
- rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
+ rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
if (rc)
return rc;
-
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
- if (rc < 0) {
- dax_read_unlock(id);
- return rc;
+ length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
+ &kaddr, pfnp);
+ if (length < 0) {
+ rc = length;
+ goto out;
}
+ rc = -EINVAL;
+ if (PFN_PHYS(length) < size)
+ goto out;
+ if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
+ goto out;
+ /* For larger pages we need devmap */
+ if (length > 1 && !pfn_t_devmap(*pfnp))
+ goto out;
+ rc = 0;
+out:
dax_read_unlock(id);
-
- ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0);
- if (IS_ERR(ret))
- return PTR_ERR(ret);
-
- trace_dax_insert_mapping(mapping->host, vmf, ret);
- if (vmf->flags & FAULT_FLAG_WRITE)
- return vm_insert_mixed_mkwrite(vma, vaddr, pfn);
- else
- return vm_insert_mixed(vma, vaddr, pfn);
+ return rc;
}
/*
@@ -882,7 +887,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
}
entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
- RADIX_DAX_ZERO_PAGE);
+ RADIX_DAX_ZERO_PAGE, false);
if (IS_ERR(entry2)) {
ret = VM_FAULT_SIGBUS;
goto out;
@@ -941,11 +946,6 @@ int __dax_zero_page_range(struct block_device *bdev,
}
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
-static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
-{
- return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
-}
-
static loff_t
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap)
@@ -1085,19 +1085,33 @@ static int dax_fault_return(int error)
return VM_FAULT_SIGBUS;
}
-static int dax_iomap_pte_fault(struct vm_fault *vmf,
+/*
+ * MAP_SYNC on a dax mapping guarantees dirty metadata is
+ * flushed on write-faults (non-cow), but not read-faults.
+ */
+static bool dax_fault_is_synchronous(unsigned long flags,
+ struct vm_area_struct *vma, struct iomap *iomap)
+{
+ return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
+ && (iomap->flags & IOMAP_F_DIRTY);
+}
+
+static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
const struct iomap_ops *ops)
{
- struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ struct vm_area_struct *vma = vmf->vma;
+ struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
- sector_t sector;
struct iomap iomap = { 0 };
unsigned flags = IOMAP_FAULT;
int error, major = 0;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+ bool sync;
int vmf_ret = 0;
void *entry;
+ pfn_t pfn;
trace_dax_pte_fault(inode, vmf, vmf_ret);
/*
@@ -1110,7 +1124,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
goto out;
}
- if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+ if (write && !vmf->cow_page)
flags |= IOMAP_WRITE;
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
@@ -1145,9 +1159,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
goto error_finish_iomap;
}
- sector = dax_iomap_sector(&iomap, pos);
-
if (vmf->cow_page) {
+ sector_t sector = dax_iomap_sector(&iomap, pos);
+
switch (iomap.type) {
case IOMAP_HOLE:
case IOMAP_UNWRITTEN:
@@ -1173,22 +1187,55 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
goto finish_iomap;
}
+ sync = dax_fault_is_synchronous(flags, vma, &iomap);
+
switch (iomap.type) {
case IOMAP_MAPPED:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
- count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
+ count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
- error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
- sector, PAGE_SIZE, entry, vmf->vma, vmf);
+ error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
+ if (error < 0)
+ goto error_finish_iomap;
+
+ entry = dax_insert_mapping_entry(mapping, vmf, entry,
+ dax_iomap_sector(&iomap, pos),
+ 0, write && !sync);
+ if (IS_ERR(entry)) {
+ error = PTR_ERR(entry);
+ goto error_finish_iomap;
+ }
+
+ /*
+ * If we are doing synchronous page fault and inode needs fsync,
+ * we can insert PTE into page tables only after that happens.
+ * Skip insertion for now and return the pfn so that caller can
+ * insert it after fsync is done.
+ */
+ if (sync) {
+ if (WARN_ON_ONCE(!pfnp)) {
+ error = -EIO;
+ goto error_finish_iomap;
+ }
+ *pfnp = pfn;
+ vmf_ret = VM_FAULT_NEEDDSYNC | major;
+ goto finish_iomap;
+ }
+ trace_dax_insert_mapping(inode, vmf, entry);
+ if (write)
+ error = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
+ else
+ error = vm_insert_mixed(vma, vaddr, pfn);
+
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error == -EBUSY)
error = 0;
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
- if (!(vmf->flags & FAULT_FLAG_WRITE)) {
+ if (!write) {
vmf_ret = dax_load_hole(mapping, entry, vmf);
goto finish_iomap;
}
@@ -1223,53 +1270,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
}
#ifdef CONFIG_FS_DAX_PMD
-static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
- loff_t pos, void *entry)
-{
- struct address_space *mapping = vmf->vma->vm_file->f_mapping;
- const sector_t sector = dax_iomap_sector(iomap, pos);
- struct dax_device *dax_dev = iomap->dax_dev;
- struct block_device *bdev = iomap->bdev;
- struct inode *inode = mapping->host;
- const size_t size = PMD_SIZE;
- void *ret = NULL, *kaddr;
- long length = 0;
- pgoff_t pgoff;
- pfn_t pfn = {};
- int id;
-
- if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
- goto fallback;
-
- id = dax_read_lock();
- length = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
- if (length < 0)
- goto unlock_fallback;
- length = PFN_PHYS(length);
-
- if (length < size)
- goto unlock_fallback;
- if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR)
- goto unlock_fallback;
- if (!pfn_t_devmap(pfn))
- goto unlock_fallback;
- dax_read_unlock(id);
-
- ret = dax_insert_mapping_entry(mapping, vmf, entry, sector,
- RADIX_DAX_PMD);
- if (IS_ERR(ret))
- goto fallback;
-
- trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
- return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
- pfn, vmf->flags & FAULT_FLAG_WRITE);
-
-unlock_fallback:
- dax_read_unlock(id);
-fallback:
- trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, pfn, ret);
- return VM_FAULT_FALLBACK;
-}
+/*
+ * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
+ * more often than one might expect in the below functions.
+ */
+#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
void *entry)
@@ -1288,7 +1293,7 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
goto fallback;
ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
- RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE);
+ RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
if (IS_ERR(ret))
goto fallback;
@@ -1310,13 +1315,14 @@ fallback:
return VM_FAULT_FALLBACK;
}
-static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
const struct iomap_ops *ops)
{
struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
bool write = vmf->flags & FAULT_FLAG_WRITE;
+ bool sync;
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
struct inode *inode = mapping->host;
int result = VM_FAULT_FALLBACK;
@@ -1325,6 +1331,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
void *entry;
loff_t pos;
int error;
+ pfn_t pfn;
/*
* Check whether offset isn't beyond end of file now. Caller is
@@ -1332,7 +1339,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
* this is a reliable test.
*/
pgoff = linear_page_index(vma, pmd_addr);
- max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+ max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
@@ -1356,13 +1363,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
- if (pgoff > max_pgoff) {
+ if (pgoff >= max_pgoff) {
result = VM_FAULT_SIGBUS;
goto out;
}
/* If the PMD would extend beyond the file size */
- if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
+ if ((pgoff | PG_PMD_COLOUR) >= max_pgoff)
goto fallback;
/*
@@ -1400,9 +1407,37 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
if (iomap.offset + iomap.length < pos + PMD_SIZE)
goto finish_iomap;
+ sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
+
switch (iomap.type) {
case IOMAP_MAPPED:
- result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
+ error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
+ if (error < 0)
+ goto finish_iomap;
+
+ entry = dax_insert_mapping_entry(mapping, vmf, entry,
+ dax_iomap_sector(&iomap, pos),
+ RADIX_DAX_PMD, write && !sync);
+ if (IS_ERR(entry))
+ goto finish_iomap;
+
+ /*
+ * If we are doing synchronous page fault and inode needs fsync,
+ * we can insert PMD into page tables only after that happens.
+ * Skip insertion for now and return the pfn so that caller can
+ * insert it after fsync is done.
+ */
+ if (sync) {
+ if (WARN_ON_ONCE(!pfnp))
+ goto finish_iomap;
+ *pfnp = pfn;
+ result = VM_FAULT_NEEDDSYNC;
+ goto finish_iomap;
+ }
+
+ trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
+ result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
+ write);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
@@ -1442,7 +1477,7 @@ out:
return result;
}
#else
-static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
const struct iomap_ops *ops)
{
return VM_FAULT_FALLBACK;
@@ -1452,7 +1487,9 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
/**
* dax_iomap_fault - handle a page fault on a DAX file
* @vmf: The description of the fault
- * @ops: iomap ops passed from the file system
+ * @pe_size: Size of the page to fault in
+ * @pfnp: PFN to insert for synchronous faults if fsync is required
+ * @ops: Iomap ops passed from the file system
*
* When a page fault occurs, filesystems may call this helper in
* their fault handler for DAX files. dax_iomap_fault() assumes the caller
@@ -1460,15 +1497,98 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
* successfully.
*/
int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
- const struct iomap_ops *ops)
+ pfn_t *pfnp, const struct iomap_ops *ops)
{
switch (pe_size) {
case PE_SIZE_PTE:
- return dax_iomap_pte_fault(vmf, ops);
+ return dax_iomap_pte_fault(vmf, pfnp, ops);
case PE_SIZE_PMD:
- return dax_iomap_pmd_fault(vmf, ops);
+ return dax_iomap_pmd_fault(vmf, pfnp, ops);
default:
return VM_FAULT_FALLBACK;
}
}
EXPORT_SYMBOL_GPL(dax_iomap_fault);
+
+/**
+ * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
+ * @vmf: The description of the fault
+ * @pe_size: Size of entry to be inserted
+ * @pfn: PFN to insert
+ *
+ * This function inserts writeable PTE or PMD entry into page tables for mmaped
+ * DAX file. It takes care of marking corresponding radix tree entry as dirty
+ * as well.
+ */
+static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
+ enum page_entry_size pe_size,
+ pfn_t pfn)
+{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ void *entry, **slot;
+ pgoff_t index = vmf->pgoff;
+ int vmf_ret, error;
+
+ spin_lock_irq(&mapping->tree_lock);
+ entry = get_unlocked_mapping_entry(mapping, index, &slot);
+ /* Did we race with someone splitting entry or so? */
+ if (!entry ||
+ (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
+ (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
+ put_unlocked_mapping_entry(mapping, index, entry);
+ spin_unlock_irq(&mapping->tree_lock);
+ trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
+ VM_FAULT_NOPAGE);
+ return VM_FAULT_NOPAGE;
+ }
+ radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
+ entry = lock_slot(mapping, slot);
+ spin_unlock_irq(&mapping->tree_lock);
+ switch (pe_size) {
+ case PE_SIZE_PTE:
+ error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
+ vmf_ret = dax_fault_return(error);
+ break;
+#ifdef CONFIG_FS_DAX_PMD
+ case PE_SIZE_PMD:
+ vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+ pfn, true);
+ break;
+#endif
+ default:
+ vmf_ret = VM_FAULT_FALLBACK;
+ }
+ put_locked_mapping_entry(mapping, index);
+ trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret);
+ return vmf_ret;
+}
+
+/**
+ * dax_finish_sync_fault - finish synchronous page fault
+ * @vmf: The description of the fault
+ * @pe_size: Size of entry to be inserted
+ * @pfn: PFN to insert
+ *
+ * This function ensures that the file range touched by the page fault is
+ * stored persistently on the media and handles inserting of appropriate page
+ * table entry.
+ */
+int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+ pfn_t pfn)
+{
+ int err;
+ loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
+ size_t len = 0;
+
+ if (pe_size == PE_SIZE_PTE)
+ len = PAGE_SIZE;
+ else if (pe_size == PE_SIZE_PMD)
+ len = PMD_SIZE;
+ else
+ WARN_ON_ONCE(1);
+ err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
+ if (err)
+ return VM_FAULT_SIGBUS;
+ return dax_insert_pfn_mkwrite(vmf, pe_size, pfn);
+}
+EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e5e29f8c920b..846ca150d52e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -36,27 +36,13 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
+#include <linux/kernel.h>
#include "ecryptfs_kernel.h"
#define DECRYPT 0
#define ENCRYPT 1
/**
- * ecryptfs_to_hex
- * @dst: Buffer to take hex character representation of contents of
- * src; must be at least of size (src_size * 2)
- * @src: Buffer to be converted to a hex string representation
- * @src_size: number of bytes to convert
- */
-void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
-{
- int x;
-
- for (x = 0; x < src_size; x++)
- sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]);
-}
-
-/**
* ecryptfs_from_hex
* @dst: Buffer to take the bytes from src hex; must be at least of
* size (src_size / 2)
@@ -899,8 +885,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
u32 flags;
flags = get_unaligned_be32(page_virt);
- for (i = 0; i < ((sizeof(ecryptfs_flag_map)
- / sizeof(struct ecryptfs_flag_map_elem))); i++)
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_flag_map); i++)
if (flags & ecryptfs_flag_map[i].file_flag) {
crypt_stat->flags |= ecryptfs_flag_map[i].local_flag;
} else
@@ -937,8 +922,7 @@ void ecryptfs_write_crypt_stat_flags(char *page_virt,
u32 flags = 0;
int i;
- for (i = 0; i < ((sizeof(ecryptfs_flag_map)
- / sizeof(struct ecryptfs_flag_map_elem))); i++)
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_flag_map); i++)
if (crypt_stat->flags & ecryptfs_flag_map[i].local_flag)
flags |= ecryptfs_flag_map[i].file_flag;
/* Version is in top 8 bits of the 32-bit flag vector */
@@ -1434,8 +1418,6 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
page_virt = kmem_cache_alloc(ecryptfs_header_cache, GFP_USER);
if (!page_virt) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Unable to allocate page_virt\n",
- __func__);
goto out;
}
rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
@@ -1522,9 +1504,6 @@ ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
filename->encrypted_filename =
kmalloc(filename->encrypted_filename_size, GFP_KERNEL);
if (!filename->encrypted_filename) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kmalloc [%zd] bytes\n", __func__,
- filename->encrypted_filename_size);
rc = -ENOMEM;
goto out;
}
@@ -1669,12 +1648,10 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
BUG_ON(!mutex_is_locked(&key_tfm_list_mutex));
tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL);
- if (key_tfm != NULL)
+ if (key_tfm)
(*key_tfm) = tmp_tfm;
if (!tmp_tfm) {
rc = -ENOMEM;
- printk(KERN_ERR "Error attempting to allocate from "
- "ecryptfs_key_tfm_cache\n");
goto out;
}
mutex_init(&tmp_tfm->key_tfm_mutex);
@@ -1690,7 +1667,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
"cipher with name = [%s]; rc = [%d]\n",
tmp_tfm->cipher_name, rc);
kmem_cache_free(ecryptfs_key_tfm_cache, tmp_tfm);
- if (key_tfm != NULL)
+ if (key_tfm)
(*key_tfm) = NULL;
goto out;
}
@@ -1881,7 +1858,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
size_t src_byte_offset = 0;
size_t dst_byte_offset = 0;
- if (dst == NULL) {
+ if (!dst) {
(*dst_size) = ecryptfs_max_decoded_size(src_size);
goto out;
}
@@ -1949,9 +1926,6 @@ int ecryptfs_encrypt_and_encode_filename(
filename = kzalloc(sizeof(*filename), GFP_KERNEL);
if (!filename) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kzalloc [%zd] bytes\n", __func__,
- sizeof(*filename));
rc = -ENOMEM;
goto out;
}
@@ -1980,9 +1954,6 @@ int ecryptfs_encrypt_and_encode_filename(
+ encoded_name_no_prefix_size);
(*encoded_name) = kmalloc((*encoded_name_size) + 1, GFP_KERNEL);
if (!(*encoded_name)) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kzalloc [%zd] bytes\n", __func__,
- (*encoded_name_size));
rc = -ENOMEM;
kfree(filename->encrypted_filename);
kfree(filename);
@@ -2064,9 +2035,6 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
name, name_size);
decoded_name = kmalloc(decoded_name_size, GFP_KERNEL);
if (!decoded_name) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kmalloc [%zd] bytes\n", __func__,
- decoded_name_size);
rc = -ENOMEM;
goto out;
}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 3fbc0ff79699..e74cb2a0b299 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -31,6 +31,7 @@
#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
+#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/fs_stack.h>
#include <linux/namei.h>
@@ -51,7 +52,13 @@
#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
-extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
+static inline void
+ecryptfs_to_hex(char *dst, char *src, size_t src_size)
+{
+ char *end = bin2hex(dst, src, src_size);
+ *end = '\0';
+}
+
extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
struct ecryptfs_key_record {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index efc2db42d175..847904aa63a9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -64,7 +64,6 @@ static int ecryptfs_inode_set(struct inode *inode, void *opaque)
/* i_size will be overwritten for encrypted regular files */
fsstack_copy_inode_size(inode, lower_inode);
inode->i_ino = lower_inode->i_ino;
- inode->i_version++;
inode->i_mapping->a_ops = &ecryptfs_aops;
if (S_ISLNK(inode->i_mode))
@@ -334,9 +333,6 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
if (!dentry_info) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to allocate ecryptfs_dentry_info struct\n",
- __func__);
dput(lower_dentry);
return ERR_PTR(-ENOMEM);
}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index fa218cd64f74..c89a58cfc991 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -639,11 +639,9 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
int rc = 0;
s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s) {
- printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
- "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
+ if (!s)
return -ENOMEM;
- }
+
(*packet_size) = 0;
rc = ecryptfs_find_auth_tok_for_sig(
&auth_tok_key,
@@ -687,7 +685,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
* separator, and then the filename */
s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE
+ s->block_aligned_filename_size);
- if (dest == NULL) {
+ if (!dest) {
(*packet_size) = s->max_packet_size;
goto out_unlock;
}
@@ -714,9 +712,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
s->block_aligned_filename = kzalloc(s->block_aligned_filename_size,
GFP_KERNEL);
if (!s->block_aligned_filename) {
- printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
- "kzalloc [%zd] bytes\n", __func__,
- s->block_aligned_filename_size);
rc = -ENOMEM;
goto out_unlock;
}
@@ -769,10 +764,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
s->hash_desc = kmalloc(sizeof(*s->hash_desc) +
crypto_shash_descsize(s->hash_tfm), GFP_KERNEL);
if (!s->hash_desc) {
- printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
- "kmalloc [%zd] bytes\n", __func__,
- sizeof(*s->hash_desc) +
- crypto_shash_descsize(s->hash_tfm));
rc = -ENOMEM;
goto out_release_free_unlock;
}
@@ -925,11 +916,9 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
(*filename_size) = 0;
(*filename) = NULL;
s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s) {
- printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
- "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
+ if (!s)
return -ENOMEM;
- }
+
if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
"at least [%d]\n", __func__, max_packet_size,
@@ -1015,9 +1004,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
s->decrypted_filename = kmalloc(s->block_aligned_filename_size,
GFP_KERNEL);
if (!s->decrypted_filename) {
- printk(KERN_ERR "%s: Out of memory whilst attempting to "
- "kmalloc [%zd] bytes\n", __func__,
- s->block_aligned_filename_size);
rc = -ENOMEM;
goto out_unlock;
}
@@ -1097,9 +1083,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
}
(*filename) = kmalloc(((*filename_size) + 1), GFP_KERNEL);
if (!(*filename)) {
- printk(KERN_ERR "%s: Out of memory whilst attempting to "
- "kmalloc [%zd] bytes\n", __func__,
- ((*filename_size) + 1));
rc = -ENOMEM;
goto out_free_unlock;
}
@@ -1333,7 +1316,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
if ((*new_auth_tok)->session_key.encrypted_key_size
> ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
printk(KERN_WARNING "Tag 1 packet contains key larger "
- "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
+ "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n");
rc = -EINVAL;
goto out;
}
@@ -2525,11 +2508,9 @@ int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
struct ecryptfs_key_sig *new_key_sig;
new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL);
- if (!new_key_sig) {
- printk(KERN_ERR
- "Error allocating from ecryptfs_key_sig_cache\n");
+ if (!new_key_sig)
return -ENOMEM;
- }
+
memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
new_key_sig->keysig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
/* Caller must hold keysig_list_mutex */
@@ -2545,16 +2526,12 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
char *sig, u32 global_auth_tok_flags)
{
struct ecryptfs_global_auth_tok *new_auth_tok;
- int rc = 0;
new_auth_tok = kmem_cache_zalloc(ecryptfs_global_auth_tok_cache,
GFP_KERNEL);
- if (!new_auth_tok) {
- rc = -ENOMEM;
- printk(KERN_ERR "Error allocating from "
- "ecryptfs_global_auth_tok_cache\n");
- goto out;
- }
+ if (!new_auth_tok)
+ return -ENOMEM;
+
memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX);
new_auth_tok->flags = global_auth_tok_flags;
new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
@@ -2562,7 +2539,6 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
list_add(&new_auth_tok->mount_crypt_stat_list,
&mount_crypt_stat->global_auth_tok_list);
mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
-out:
- return rc;
+ return 0;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 25aeaa7328ba..025d66a705db 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -426,7 +426,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
mount_crypt_stat->global_default_cipher_key_size);
if (!cipher_code) {
ecryptfs_printk(KERN_ERR,
- "eCryptfs doesn't support cipher: %s",
+ "eCryptfs doesn't support cipher: %s\n",
mount_crypt_stat->global_default_cipher_name);
rc = -EINVAL;
goto out;
@@ -560,8 +560,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
* Set the POSIX ACL flag based on whether they're enabled in the lower
* mount.
*/
- s->s_flags = flags & ~MS_POSIXACL;
- s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+ s->s_flags = flags & ~SB_POSIXACL;
+ s->s_flags |= path.dentry->d_sb->s_flags & SB_POSIXACL;
/**
* Force a read-only eCryptfs mount when:
@@ -569,7 +569,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
* 2) The ecryptfs_encrypted_view mount option is specified
*/
if (sb_rdonly(path.dentry->d_sb) || mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
- s->s_flags |= MS_RDONLY;
+ s->s_flags |= SB_RDONLY;
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
@@ -602,7 +602,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
ecryptfs_set_dentry_private(s->s_root, root_info);
root_info->lower_path = path;
- s->s_flags |= MS_ACTIVE;
+ s->s_flags |= SB_ACTIVE;
return dget(s->s_root);
out_free:
@@ -781,7 +781,7 @@ static struct attribute *attributes[] = {
NULL,
};
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
.attrs = attributes,
};
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 286f10b0363b..9fdd5bcf4564 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -147,8 +147,6 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file)
(*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
if (!(*daemon)) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
- "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
goto out;
}
(*daemon)->file = file;
@@ -250,8 +248,6 @@ int ecryptfs_process_response(struct ecryptfs_daemon *daemon,
msg_ctx->msg = kmemdup(msg, msg_size, GFP_KERNEL);
if (!msg_ctx->msg) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
- "GFP_KERNEL memory\n", __func__, msg_size);
goto unlock;
}
msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE;
@@ -386,7 +382,6 @@ int __init ecryptfs_init_messaging(void)
GFP_KERNEL);
if (!ecryptfs_daemon_hash) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
mutex_unlock(&ecryptfs_daemon_hash_mux);
goto out;
}
@@ -398,7 +393,6 @@ int __init ecryptfs_init_messaging(void)
GFP_KERNEL);
if (!ecryptfs_msg_ctx_arr) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
goto out;
}
mutex_init(&ecryptfs_msg_ctx_lists_mux);
@@ -442,15 +436,16 @@ void ecryptfs_release_messaging(void)
}
if (ecryptfs_daemon_hash) {
struct ecryptfs_daemon *daemon;
+ struct hlist_node *n;
int i;
mutex_lock(&ecryptfs_daemon_hash_mux);
for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
int rc;
- hlist_for_each_entry(daemon,
- &ecryptfs_daemon_hash[i],
- euid_chain) {
+ hlist_for_each_entry_safe(daemon, n,
+ &ecryptfs_daemon_hash[i],
+ euid_chain) {
rc = ecryptfs_exorcise_daemon(daemon);
if (rc)
printk(KERN_ERR "%s: Error whilst "
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index e4141f257495..f09cacaf8c80 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -163,12 +163,8 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
struct ecryptfs_message *msg;
msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
- if (!msg) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
- (sizeof(*msg) + data_size));
+ if (!msg)
return -ENOMEM;
- }
mutex_lock(&msg_ctx->mux);
msg_ctx->msg = msg;
@@ -383,7 +379,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
goto memdup;
} else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) {
printk(KERN_WARNING "%s: Acceptable packet size range is "
- "[%d-%zu], but amount of data written is [%zu].",
+ "[%d-%zu], but amount of data written is [%zu].\n",
__func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count);
return -EINVAL;
}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 1f0c471b4ba3..cdf358b209d9 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -431,8 +431,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
}
xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL);
if (!xattr_virt) {
- printk(KERN_ERR "Out of memory whilst attempting to write "
- "inode size to xattr\n");
rc = -ENOMEM;
goto out;
}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 65b59009555b..6ffb7ba1547a 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -116,7 +116,7 @@ static void destroy_inodecache(void)
static int efs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
@@ -311,7 +311,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
#ifdef DEBUG
pr_info("forcing read-only mode\n");
#endif
- s->s_flags |= MS_RDONLY;
+ s->s_flags |= SB_RDONLY;
}
s->s_op = &efs_superblock_operations;
s->s_export_op = &efs_export_ops;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 2fabd19cdeea..afd548ebc328 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -276,12 +276,6 @@ static DEFINE_MUTEX(epmutex);
/* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls;
-/* Used for safe wake up implementation */
-static struct nested_calls poll_safewake_ncalls;
-
-/* Used to call file's f_op->poll() under the nested calls boundaries */
-static struct nested_calls poll_readywalk_ncalls;
-
/* Slab cache used to allocate "struct epitem" */
static struct kmem_cache *epi_cache __read_mostly;
@@ -551,40 +545,21 @@ out_unlock:
* this special case of epoll.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
- unsigned long events, int subclass)
+
+static struct nested_calls poll_safewake_ncalls;
+
+static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
{
unsigned long flags;
+ wait_queue_head_t *wqueue = (wait_queue_head_t *)cookie;
- spin_lock_irqsave_nested(&wqueue->lock, flags, subclass);
- wake_up_locked_poll(wqueue, events);
+ spin_lock_irqsave_nested(&wqueue->lock, flags, call_nests + 1);
+ wake_up_locked_poll(wqueue, POLLIN);
spin_unlock_irqrestore(&wqueue->lock, flags);
-}
-#else
-static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
- unsigned long events, int subclass)
-{
- wake_up_poll(wqueue, events);
-}
-#endif
-static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
-{
- ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN,
- 1 + call_nests);
return 0;
}
-/*
- * Perform a safe wake up of the poll wait list. The problem is that
- * with the new callback'd wake up system, it is possible that the
- * poll callback is reentered from inside the call to wake_up() done
- * on the poll wait queue head. The rule is that we cannot reenter the
- * wake up code from the same task more than EP_MAX_NESTS times,
- * and we cannot reenter the same wait queue head at all. This will
- * enable to have a hierarchy of epoll file descriptor of no more than
- * EP_MAX_NESTS deep.
- */
static void ep_poll_safewake(wait_queue_head_t *wq)
{
int this_cpu = get_cpu();
@@ -595,6 +570,15 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
put_cpu();
}
+#else
+
+static void ep_poll_safewake(wait_queue_head_t *wq)
+{
+ wake_up_poll(wq, POLLIN);
+}
+
+#endif
+
static void ep_remove_wait_queue(struct eppoll_entry *pwq)
{
wait_queue_head_t *whead;
@@ -880,11 +864,33 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
return 0;
}
-static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt)
+static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
+ void *priv);
+static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+ poll_table *pt);
+
+/*
+ * Differs from ep_eventpoll_poll() in that internal callers already have
+ * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
+ * is correctly annotated.
+ */
+static unsigned int ep_item_poll(struct epitem *epi, poll_table *pt, int depth)
{
+ struct eventpoll *ep;
+ bool locked;
+
pt->_key = epi->event.events;
+ if (!is_file_epoll(epi->ffd.file))
+ return epi->ffd.file->f_op->poll(epi->ffd.file, pt) &
+ epi->event.events;
+
+ ep = epi->ffd.file->private_data;
+ poll_wait(epi->ffd.file, &ep->poll_wait, pt);
+ locked = pt && (pt->_qproc == ep_ptable_queue_proc);
- return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events;
+ return ep_scan_ready_list(epi->ffd.file->private_data,
+ ep_read_events_proc, &depth, depth,
+ locked) & epi->event.events;
}
static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
@@ -892,13 +898,15 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
{
struct epitem *epi, *tmp;
poll_table pt;
+ int depth = *(int *)priv;
init_poll_funcptr(&pt, NULL);
+ depth++;
list_for_each_entry_safe(epi, tmp, head, rdllink) {
- if (ep_item_poll(epi, &pt))
+ if (ep_item_poll(epi, &pt, depth)) {
return POLLIN | POLLRDNORM;
- else {
+ } else {
/*
* Item has been dropped into the ready list by the poll
* callback, but it's not actually ready, as far as
@@ -912,48 +920,20 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
return 0;
}
-static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
- poll_table *pt);
-
-struct readyevents_arg {
- struct eventpoll *ep;
- bool locked;
-};
-
-static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
-{
- struct readyevents_arg *arg = priv;
-
- return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
- call_nests + 1, arg->locked);
-}
-
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
{
- int pollflags;
struct eventpoll *ep = file->private_data;
- struct readyevents_arg arg;
-
- /*
- * During ep_insert() we already hold the ep->mtx for the tfile.
- * Prevent re-aquisition.
- */
- arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
- arg.ep = ep;
+ int depth = 0;
/* Insert inside our poll wait queue */
poll_wait(file, &ep->poll_wait, wait);
/*
* Proceed to find out if wanted events are really available inside
- * the ready list. This need to be done under ep_call_nested()
- * supervision, since the call to f_op->poll() done on listed files
- * could re-enter here.
+ * the ready list.
*/
- pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
- ep_poll_readyevents_proc, &arg, ep, current);
-
- return pollflags != -1 ? pollflags : 0;
+ return ep_scan_ready_list(ep, ep_read_events_proc,
+ &depth, depth, false);
}
#ifdef CONFIG_PROC_FS
@@ -1472,7 +1452,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
* this operation completes, the poll callback can start hitting
* the new item.
*/
- revents = ep_item_poll(epi, &epq.pt);
+ revents = ep_item_poll(epi, &epq.pt, 1);
/*
* We have to check if something went wrong during the poll wait queue
@@ -1606,7 +1586,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
* Get current event bits. We can safely use the file* here because
* its usage count has been increased by the caller of this function.
*/
- revents = ep_item_poll(epi, &pt);
+ revents = ep_item_poll(epi, &pt, 1);
/*
* If the item is "hot" and it is not registered inside the ready
@@ -1674,7 +1654,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
list_del_init(&epi->rdllink);
- revents = ep_item_poll(epi, &pt);
+ revents = ep_item_poll(epi, &pt, 1);
/*
* If the event mask intersect the caller-requested one,
@@ -2259,7 +2239,6 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
compat_size_t, sigsetsize)
{
long err;
- compat_sigset_t csigmask;
sigset_t ksigmask, sigsaved;
/*
@@ -2269,9 +2248,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
if (sigmask) {
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
+ if (get_compat_sigset(&ksigmask, sigmask))
return -EFAULT;
- sigset_from_compat(&ksigmask, &csigmask);
sigsaved = current->blocked;
set_current_blocked(&ksigmask);
}
@@ -2315,11 +2293,10 @@ static int __init eventpoll_init(void)
*/
ep_nested_calls_init(&poll_loop_ncalls);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_nested_calls_init(&poll_safewake_ncalls);
-
- /* Initialize the structure used to perform file's f_op->poll() calls */
- ep_nested_calls_init(&poll_readywalk_ncalls);
+#endif
/*
* We can have many thousands of epitems, so prevent this from
@@ -2329,11 +2306,11 @@ static int __init eventpoll_init(void)
/* Allocates slab cache used to allocate "struct epitem" items */
epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
/* Allocates slab cache used to allocate "struct eppoll_entry" */
pwq_cache = kmem_cache_create("eventpoll_pwq",
- sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);
+ sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
return 0;
}
diff --git a/fs/exec.c b/fs/exec.c
index 1d6243d9f2b6..6be2aa0ab26f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm)
* avoid bad behavior from the prior rlimits. This has to
* happen before arch_pick_mmap_layout(), which examines
* RLIMIT_STACK, but after the point of no return to avoid
- * needing to clean up the change on failure.
+ * races from other threads changing the limits. This also
+ * must be protected from races with prlimit() calls.
*/
+ task_lock(current->group_leader);
if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+ if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
+ current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
+ task_unlock(current->group_leader);
}
arch_pick_mmap_layout(current->mm);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e1b3724bebf2..33db13365c5e 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -548,7 +548,7 @@ do_more:
}
mark_buffer_dirty(bitmap_bh);
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
group_adjust_blocks(sb, block_group, desc, bh2, group_freed);
@@ -1424,7 +1424,7 @@ allocated:
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
mark_buffer_dirty(bitmap_bh);
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
*errp = 0;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index c67b486488fd..2da67699dc33 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf)
}
down_read(&ei->dax_sem);
- ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops);
up_read(&ei->dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE)
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index a1fc3dabca41..6484199b35d1 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -145,7 +145,7 @@ void ext2_free_inode (struct inode * inode)
else
ext2_release_inode(sb, block_group, is_directory);
mark_buffer_dirty(bitmap_bh);
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
brelse(bitmap_bh);
@@ -517,7 +517,7 @@ repeat_in_this_group:
goto fail;
got:
mark_buffer_dirty(bitmap_bh);
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
brelse(bitmap_bh);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index e2b6be03e69b..7646818ab266 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -75,7 +75,7 @@ void ext2_error(struct super_block *sb, const char *function,
if (test_opt(sb, ERRORS_RO)) {
ext2_msg(sb, KERN_CRIT,
"error: remounting filesystem read-only");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
}
@@ -656,7 +656,7 @@ static int ext2_setup_super (struct super_block * sb,
ext2_msg(sb, KERN_ERR,
"error: revision level too high, "
"forcing read-only mode");
- res = MS_RDONLY;
+ res = SB_RDONLY;
}
if (read_only)
return res;
@@ -924,9 +924,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resuid = opts.s_resuid;
sbi->s_resgid = opts.s_resgid;
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
- MS_POSIXACL : 0);
+ SB_POSIXACL : 0);
sb->s_iflags |= SB_I_CGROUPWB;
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
@@ -1178,7 +1178,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_msg(sb, KERN_WARNING,
"warning: mounting ext3 filesystem as ext2");
if (ext2_setup_super (sb, es, sb_rdonly(sb)))
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ext2_write_super(sb);
return 0;
@@ -1341,9 +1341,9 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
"dax flag with busy inodes while remounting");
new_opts.s_mount_opt ^= EXT2_MOUNT_DAX;
}
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out_set;
- if (*flags & MS_RDONLY) {
+ if (*flags & SB_RDONLY) {
if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
!(sbi->s_mount_state & EXT2_VALID_FS))
goto out_set;
@@ -1379,7 +1379,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
*/
sbi->s_mount_state = le16_to_cpu(es->s_state);
if (!ext2_setup_super (sb, es, 0))
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
spin_unlock(&sbi->s_lock);
ext2_write_super(sb);
@@ -1392,8 +1392,8 @@ out_set:
sbi->s_mount_opt = new_opts.s_mount_opt;
sbi->s_resuid = new_opts.s_resuid;
sbi->s_resgid = new_opts.s_resgid;
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0);
spin_unlock(&sbi->s_lock);
return 0;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ad204d2724ac..a0ae27b1bc66 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -28,6 +28,7 @@
#include <linux/quotaops.h>
#include <linux/pagevec.h>
#include <linux/uio.h>
+#include <linux/mman.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -297,6 +298,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
*/
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);
+ pfn_t pfn;
if (write) {
sb_start_pagefault(sb);
@@ -304,16 +306,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
+ if (IS_ERR(handle)) {
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(sb);
+ return VM_FAULT_SIGBUS;
+ }
} else {
down_read(&EXT4_I(inode)->i_mmap_sem);
}
- if (!IS_ERR(handle))
- result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
- else
- result = VM_FAULT_SIGBUS;
+ result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops);
if (write) {
- if (!IS_ERR(handle))
- ext4_journal_stop(handle);
+ ext4_journal_stop(handle);
+ /* Handling synchronous page fault? */
+ if (result & VM_FAULT_NEEDDSYNC)
+ result = dax_finish_sync_fault(vmf, pe_size, pfn);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
} else {
@@ -351,6 +357,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
+ /*
+ * We don't support synchronous mappings for non-DAX files. At least
+ * until someone comes with a sensible use case.
+ */
+ if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
+ return -EOPNOTSUPP;
+
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
@@ -469,6 +482,7 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
+ .mmap_supported_flags = MAP_SYNC,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8d2b582fb141..7df2c5644e59 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2742,7 +2742,7 @@ static int ext4_writepages(struct address_space *mapping,
* If the filesystem has aborted, it is read-only, so return
* right away instead of dumping stack traces later on that
* will obscure the real source of the problem. We test
- * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
+ * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because
* the latter could be true if the filesystem is mounted
* read-only, and in that case, ext4_writepages should
* *never* be called, so if that ever happens, we would want
@@ -3384,6 +3384,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
+static bool ext4_inode_datasync_dirty(struct inode *inode)
+{
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+
+ if (journal)
+ return !jbd2_transaction_committed(journal,
+ EXT4_I(inode)->i_datasync_tid);
+ /* Any metadata buffers to write? */
+ if (!list_empty(&inode->i_mapping->private_list))
+ return true;
+ return inode->i_state & I_DIRTY_DATASYNC;
+}
+
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap)
{
@@ -3497,6 +3510,8 @@ retry:
}
iomap->flags = 0;
+ if (ext4_inode_datasync_dirty(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
iomap->bdev = inode->i_sb->s_bdev;
iomap->dax_dev = sbi->s_daxdev;
iomap->offset = first_block << blkbits;
@@ -5168,7 +5183,7 @@ static int ext4_do_update_inode(handle_t *handle,
ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
- if (inode->i_sb->s_flags & MS_LAZYTIME)
+ if (inode->i_sb->s_flags & SB_LAZYTIME)
ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
bh->b_data);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b7558f292420..1eec25014f62 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -592,6 +592,44 @@ static int ext4_ioc_getfsmap(struct super_block *sb,
return 0;
}
+static long ext4_ioctl_group_add(struct file *file,
+ struct ext4_new_group_data *input)
+{
+ struct super_block *sb = file_inode(file)->i_sb;
+ int err, err2=0;
+
+ err = ext4_resize_begin(sb);
+ if (err)
+ return err;
+
+ if (ext4_has_feature_bigalloc(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online resizing not supported with bigalloc");
+ err = -EOPNOTSUPP;
+ goto group_add_out;
+ }
+
+ err = mnt_want_write_file(file);
+ if (err)
+ goto group_add_out;
+
+ err = ext4_group_add(sb, input);
+ if (EXT4_SB(sb)->s_journal) {
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ }
+ if (err == 0)
+ err = err2;
+ mnt_drop_write_file(file);
+ if (!err && ext4_has_group_desc_csum(sb) &&
+ test_opt(sb, INIT_INODE_TABLE))
+ err = ext4_register_li_request(sb, input->group);
+group_add_out:
+ ext4_resize_end(sb);
+ return err;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -776,44 +814,12 @@ mext_out:
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
- int err, err2=0;
-
- err = ext4_resize_begin(sb);
- if (err)
- return err;
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
- sizeof(input))) {
- err = -EFAULT;
- goto group_add_out;
- }
-
- if (ext4_has_feature_bigalloc(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not supported with bigalloc");
- err = -EOPNOTSUPP;
- goto group_add_out;
- }
-
- err = mnt_want_write_file(filp);
- if (err)
- goto group_add_out;
+ sizeof(input)))
+ return -EFAULT;
- err = ext4_group_add(sb, &input);
- if (EXT4_SB(sb)->s_journal) {
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- }
- if (err == 0)
- err = err2;
- mnt_drop_write_file(filp);
- if (!err && ext4_has_group_desc_csum(sb) &&
- test_opt(sb, INIT_INODE_TABLE))
- err = ext4_register_li_request(sb, input.group);
-group_add_out:
- ext4_resize_end(sb);
- return err;
+ return ext4_ioctl_group_add(filp, &input);
}
case EXT4_IOC_MIGRATE:
@@ -1078,8 +1084,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
case EXT4_IOC32_GROUP_ADD: {
struct compat_ext4_new_group_input __user *uinput;
- struct ext4_new_group_input input;
- mm_segment_t old_fs;
+ struct ext4_new_group_data input;
int err;
uinput = compat_ptr(arg);
@@ -1092,12 +1097,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
&uinput->reserved_blocks);
if (err)
return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
- (unsigned long) &input);
- set_fs(old_fs);
- return err;
+ return ext4_ioctl_group_add(file, &input);
}
case EXT4_IOC_MOVE_EXT:
case EXT4_IOC_RESIZE_FS:
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0556cd036b69..7c46693a14d7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -422,7 +422,7 @@ static void ext4_handle_error(struct super_block *sb)
* before ->s_flags update
*/
smp_wmb();
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
if (test_opt(sb, ERRORS_PANIC)) {
if (EXT4_SB(sb)->s_journal &&
@@ -635,7 +635,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
* before ->s_flags update
*/
smp_wmb();
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
save_error_info(sb, function, line);
@@ -1682,10 +1682,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
sb->s_flags |= SB_I_VERSION;
return 1;
case Opt_lazytime:
- sb->s_flags |= MS_LAZYTIME;
+ sb->s_flags |= SB_LAZYTIME;
return 1;
case Opt_nolazytime:
- sb->s_flags &= ~MS_LAZYTIME;
+ sb->s_flags &= ~SB_LAZYTIME;
return 1;
}
@@ -2116,7 +2116,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
ext4_msg(sb, KERN_ERR, "revision level too high, "
"forcing read-only mode");
- res = MS_RDONLY;
+ res = SB_RDONLY;
}
if (read_only)
goto done;
@@ -2429,7 +2429,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
/* don't clear list on RO mount w/ errors */
- if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
+ if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
"clearing orphan list.\n");
es->s_last_orphan = 0;
@@ -2438,19 +2438,19 @@ static void ext4_orphan_cleanup(struct super_block *sb,
return;
}
- if (s_flags & MS_RDONLY) {
+ if (s_flags & SB_RDONLY) {
ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
- sb->s_flags |= MS_ACTIVE;
+ sb->s_flags |= SB_ACTIVE;
/*
* Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly.
*/
- if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) {
+ if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
int ret = ext4_enable_quotas(sb);
if (!ret)
@@ -2539,7 +2539,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
}
}
#endif
- sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ sb->s_flags = s_flags; /* Restore SB_RDONLY status */
}
/*
@@ -2741,7 +2741,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
if (ext4_has_feature_readonly(sb)) {
ext4_msg(sb, KERN_INFO, "filesystem is read-only");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
return 1;
}
@@ -3623,8 +3623,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_iflags |= SB_I_CGROUPWB;
}
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
(ext4_has_compat_features(sb) ||
@@ -4199,7 +4199,7 @@ no_journal:
}
if (ext4_setup_super(sb, es, sb_rdonly(sb)))
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
@@ -4693,7 +4693,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* the clock is set in the future, and this will cause e2fsck
* to complain and force a full file system check.
*/
- if (!(sb->s_flags & MS_RDONLY))
+ if (!(sb->s_flags & SB_RDONLY))
es->s_wtime = cpu_to_le32(get_seconds());
if (sb->s_bdev->bd_part)
es->s_kbytes_written =
@@ -5047,8 +5047,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, "Abort forced by user");
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
es = sbi->s_es;
@@ -5057,16 +5057,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
- if (*flags & MS_LAZYTIME)
- sb->s_flags |= MS_LAZYTIME;
+ if (*flags & SB_LAZYTIME)
+ sb->s_flags |= SB_LAZYTIME;
- if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) {
+ if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
goto restore_opts;
}
- if (*flags & MS_RDONLY) {
+ if (*flags & SB_RDONLY) {
err = sync_filesystem(sb);
if (err < 0)
goto restore_opts;
@@ -5078,7 +5078,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* First of all, the unconditional stuff we have to do
* to disable replay of the journal when we next remount
*/
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
/*
* OK, test if we are remounting a valid rw partition
@@ -5140,7 +5140,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_clear_journal_err(sb, es);
sbi->s_mount_state = le16_to_cpu(es->s_state);
if (!ext4_setup_super(sb, es, 0))
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
if (ext4_has_feature_mmp(sb))
if (ext4_multi_mount_protect(sb,
le64_to_cpu(es->s_mmp_block))) {
@@ -5164,7 +5164,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
ext4_setup_system_zone(sb);
- if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
+ if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY))
ext4_commit_super(sb, 1);
#ifdef CONFIG_QUOTA
@@ -5182,7 +5182,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
#endif
- *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
+ *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
return 0;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index dd2e73e10857..4aa69bc1c70a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -617,17 +617,17 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
- if (s_flags & MS_RDONLY) {
+ if (s_flags & SB_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
- sbi->sb->s_flags &= ~MS_RDONLY;
+ sbi->sb->s_flags &= ~SB_RDONLY;
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
- sbi->sb->s_flags |= MS_ACTIVE;
+ sbi->sb->s_flags |= SB_ACTIVE;
/* Turn on quotas so that they are updated correctly */
- quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
+ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -658,7 +658,7 @@ out:
if (quota_enabled)
f2fs_quota_off_umount(sbi->sb);
#endif
- sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
return err;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f4e094e816c6..6abf26c31d01 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2378,7 +2378,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
static inline int f2fs_readonly(struct super_block *sb)
{
- return sb->s_flags & MS_RDONLY;
+ return sb->s_flags & SB_RDONLY;
}
static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5d5bba462f26..d844dcb80570 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1005,7 +1005,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
cpc.reason = __get_cp_reason(sbi);
gc_more:
- if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
+ if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
goto stop;
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 92c57ace1939..b3a14b0429f2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -598,16 +598,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int quota_enabled;
#endif
- if (s_flags & MS_RDONLY) {
+ if (s_flags & SB_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
- sbi->sb->s_flags &= ~MS_RDONLY;
+ sbi->sb->s_flags &= ~SB_RDONLY;
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
- sbi->sb->s_flags |= MS_ACTIVE;
+ sbi->sb->s_flags |= SB_ACTIVE;
/* Turn on quotas so that they are updated correctly */
- quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
+ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@@ -671,7 +671,7 @@ out:
if (quota_enabled)
f2fs_quota_off_umount(sbi->sb);
#endif
- sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
return ret ? ret: err;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a6c5dd450002..708155d9c2e4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -534,10 +534,10 @@ static int parse_options(struct super_block *sb, char *options)
#endif
break;
case Opt_lazytime:
- sb->s_flags |= MS_LAZYTIME;
+ sb->s_flags |= SB_LAZYTIME;
break;
case Opt_nolazytime:
- sb->s_flags &= ~MS_LAZYTIME;
+ sb->s_flags &= ~SB_LAZYTIME;
break;
#ifdef CONFIG_QUOTA
case Opt_quota:
@@ -1168,7 +1168,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, INLINE_DENTRY);
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
- sbi->sb->s_flags |= MS_LAZYTIME;
+ sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
@@ -1236,7 +1236,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
#endif
/* recover superblocks we couldn't write due to previous RO mount */
- if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
+ if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
err = f2fs_commit_super(sbi, false);
f2fs_msg(sb, KERN_INFO,
"Try to recover all the superblocks, ret: %d", err);
@@ -1255,17 +1255,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* Previous and new state of filesystem is RO,
* so skip checking GC and FLUSH_MERGE conditions.
*/
- if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
+ if (f2fs_readonly(sb) && (*flags & SB_RDONLY))
goto skip;
#ifdef CONFIG_QUOTA
- if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
+ if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) {
err = dquot_suspend(sb, -1);
if (err < 0)
goto restore_opts;
} else {
/* dquot_resume needs RW */
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
if (sb_any_quota_suspended(sb)) {
dquot_resume(sb, -1);
} else if (f2fs_sb_has_quota_ino(sb)) {
@@ -1288,7 +1288,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* or if background_gc = off is passed in mount
* option. Also sync the filesystem.
*/
- if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
+ if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
if (sbi->gc_thread) {
stop_gc_thread(sbi);
need_restart_gc = true;
@@ -1300,7 +1300,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
need_stop_gc = true;
}
- if (*flags & MS_RDONLY) {
+ if (*flags & SB_RDONLY) {
writeback_inodes_sb(sb, WB_REASON_SYNC);
sync_inodes_sb(sb);
@@ -1314,7 +1314,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
*/
- if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
clear_opt(sbi, FLUSH_MERGE);
destroy_flush_cmd_control(sbi, false);
} else {
@@ -1329,8 +1329,8 @@ skip:
kfree(s_qf_names[i]);
#endif
/* Update the POSIXACL Flag */
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
return 0;
restore_gc:
@@ -2472,8 +2472,8 @@ try_onemore:
sb->s_export_op = &f2fs_export_ops;
sb->s_magic = F2FS_SUPER_MAGIC;
sb->s_time_gran = 1;
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
/* init f2fs-specific super block info */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 81cecbe6d7cf..b833ffeee1e1 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -291,7 +291,6 @@ static int fat_parse_long(struct inode *dir, loff_t *pos,
}
}
parse_long:
- slots = 0;
ds = (struct msdos_dir_slot *)*de;
id = ds->id;
if (!(id & 0x40))
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 48b2336692f9..bac10de678cc 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -392,7 +392,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
set_buffer_uptodate(c_bh);
mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
err = sync_dirty_buffer(c_bh);
brelse(c_bh);
if (err)
@@ -597,7 +597,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
}
if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) {
- if (sb->s_flags & MS_SYNCHRONOUS) {
+ if (sb->s_flags & SB_SYNCHRONOUS) {
err = fat_sync_bhs(bhs, nr_bhs);
if (err)
goto error;
@@ -612,7 +612,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
fat_collect_bhs(bhs, &nr_bhs, &fatent);
} while (cluster != FAT_ENT_EOF);
- if (sb->s_flags & MS_SYNCHRONOUS) {
+ if (sb->s_flags & SB_SYNCHRONOUS) {
err = fat_sync_bhs(bhs, nr_bhs);
if (err)
goto error;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 30c52394a7ad..20a0a89eaca5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -779,14 +779,14 @@ static void __exit fat_destroy_inodecache(void)
static int fat_remount(struct super_block *sb, int *flags, char *data)
{
- int new_rdonly;
+ bool new_rdonly;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME);
+ *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
sync_filesystem(sb);
/* make sure we update state on remount. */
- new_rdonly = *flags & MS_RDONLY;
+ new_rdonly = *flags & SB_RDONLY;
if (new_rdonly != sb_rdonly(sb)) {
if (new_rdonly)
fat_set_state(sb, 0, 0);
@@ -1352,7 +1352,7 @@ out:
if (opts->unicode_xlate)
opts->utf8 = 0;
if (opts->nfs == FAT_NFS_NOSTALE_RO) {
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
sb->s_export_op = &fat_export_ops_nostale;
}
@@ -1608,7 +1608,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
return -ENOMEM;
sb->s_fs_info = sbi;
- sb->s_flags |= MS_NODIRATIME;
+ sb->s_flags |= SB_NODIRATIME;
sb->s_magic = MSDOS_SUPER_MAGIC;
sb->s_op = &fat_sops;
sb->s_export_op = &fat_export_ops;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index acc3aa30ee54..f9bdc1e01c98 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -33,7 +33,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
if (opts->errors == FAT_ERRORS_PANIC)
panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
else if (opts->errors == FAT_ERRORS_RO && !sb_rdonly(sb)) {
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
fat_msg(sb, KERN_ERR, "Filesystem has been set read-only");
}
}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 7d6a105d601b..d24d2758a363 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -646,7 +646,7 @@ static void setup(struct super_block *sb)
{
MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
sb->s_d_op = &msdos_dentry_operations;
- sb->s_flags |= MS_NOATIME;
+ sb->s_flags |= SB_NOATIME;
}
static int msdos_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 30f47d0f74a0..0522e283a4f4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -563,6 +563,9 @@ static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __u
{
struct compat_flock64 fl;
+ BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
+ BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
+
memset(&fl, 0, sizeof(struct compat_flock64));
copy_flock_fields(&fl, kfl);
if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
@@ -632,9 +635,8 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
if (err)
break;
err = fixup_compat_flock(&flock);
- if (err)
- return err;
- err = put_compat_flock(&flock, compat_ptr(arg));
+ if (!err)
+ err = put_compat_flock(&flock, compat_ptr(arg));
break;
case F_GETLK64:
case F_OFD_GETLK:
@@ -642,12 +644,8 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
if (err)
break;
err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
- if (err)
- break;
- err = fixup_compat_flock(&flock);
- if (err)
- return err;
- err = put_compat_flock64(&flock, compat_ptr(arg));
+ if (!err)
+ err = put_compat_flock64(&flock, compat_ptr(arg));
break;
case F_SETLK:
case F_SETLKW:
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 474adc8d2a3a..0ace128f5d23 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -213,8 +213,8 @@ out_err:
return retval;
}
-long do_handle_open(int mountdirfd,
- struct file_handle __user *ufh, int open_flag)
+static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
+ int open_flag)
{
long retval = 0;
struct path path;
diff --git a/fs/file.c b/fs/file.c
index 4eecbf4244a5..3b080834b870 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -593,13 +593,16 @@ void __fd_install(struct files_struct *files, unsigned int fd,
{
struct fdtable *fdt;
- might_sleep();
rcu_read_lock_sched();
- while (unlikely(files->resize_in_progress)) {
+ if (unlikely(files->resize_in_progress)) {
rcu_read_unlock_sched();
- wait_event(files->resize_wait, !files->resize_in_progress);
- rcu_read_lock_sched();
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ BUG_ON(fdt->fd[fd] != NULL);
+ rcu_assign_pointer(fdt->fd[fd], file);
+ spin_unlock(&files->file_lock);
+ return;
}
/* coupled with smp_wmb() in expand_fdtable() */
smp_rmb();
@@ -632,7 +635,6 @@ int __close_fd(struct files_struct *files, unsigned fd)
if (!file)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL);
- __clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
return filp_close(file, files);
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 455ce5b77e9b..f989efa051a0 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -116,7 +116,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)
static int vxfs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
@@ -220,7 +220,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
int ret = -EINVAL;
u32 j;
- sbp->s_flags |= MS_RDONLY;
+ sbp->s_flags |= SB_RDONLY;
infp = kzalloc(sizeof(*infp), GFP_KERNEL);
if (!infp) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 08f5debd07d1..cea4836385b7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -490,7 +490,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock);
- if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
+ if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
inode->i_state & (I_WB_SWITCH | I_FREEING) ||
inode_to_wb(inode) == isw->new_wb) {
spin_unlock(&inode->i_lock);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2f504d615d92..624f18bbfd2b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -130,7 +130,7 @@ static void fuse_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- if (inode->i_sb->s_flags & MS_ACTIVE) {
+ if (inode->i_sb->s_flags & SB_ACTIVE) {
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
@@ -141,7 +141,7 @@ static void fuse_evict_inode(struct inode *inode)
static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- if (*flags & MS_MANDLOCK)
+ if (*flags & SB_MANDLOCK)
return -EINVAL;
return 0;
@@ -1056,10 +1056,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
int is_bdev = sb->s_bdev != NULL;
err = -EINVAL;
- if (sb->s_flags & MS_MANDLOCK)
+ if (sb->s_flags & SB_MANDLOCK)
goto err;
- sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION);
+ sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
if (!parse_fuse_opt(data, &d, is_bdev))
goto err;
@@ -1109,9 +1109,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err_dev_free;
/* Handle umasking inside the fuse code */
- if (sb->s_flags & MS_POSIXACL)
+ if (sb->s_flags & SB_POSIXACL)
fc->dont_mask = 1;
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
fc->default_permissions = d.default_permissions;
fc->allow_other = d.allow_other;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a3711f543405..ad55eb86a250 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1065,15 +1065,15 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
sdp->sd_args = *args;
if (sdp->sd_args.ar_spectator) {
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
set_bit(SDF_RORECOVERY, &sdp->sd_flags);
}
if (sdp->sd_args.ar_posix_acl)
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
if (sdp->sd_args.ar_nobarrier)
set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
- sb->s_flags |= MS_NOSEC;
+ sb->s_flags |= SB_NOSEC;
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_d_op = &gfs2_dops;
@@ -1257,7 +1257,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
struct gfs2_args args;
struct gfs2_sbd *sdp;
- if (!(flags & MS_RDONLY))
+ if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
bdev = blkdev_get_by_path(dev_name, mode, fs_type);
@@ -1313,15 +1313,15 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (s->s_root) {
error = -EBUSY;
- if ((flags ^ s->s_flags) & MS_RDONLY)
+ if ((flags ^ s->s_flags) & SB_RDONLY)
goto error_super;
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
sb_set_blocksize(s, block_size(bdev));
- error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
+ error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0);
if (error)
goto error_super;
- s->s_flags |= MS_ACTIVE;
+ s->s_flags |= SB_ACTIVE;
bdev->bd_super = s;
}
@@ -1365,7 +1365,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
pr_warn("gfs2 mount does not exist\n");
return ERR_CAST(s);
}
- if ((flags ^ s->s_flags) & MS_RDONLY) {
+ if ((flags ^ s->s_flags) & SB_RDONLY) {
deactivate_locked_super(s);
return ERR_PTR(-EBUSY);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9cb5c9a97d69..d81d46e19726 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1256,10 +1256,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
return -EINVAL;
if (sdp->sd_args.ar_spectator)
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
- if ((sb->s_flags ^ *flags) & MS_RDONLY) {
- if (*flags & MS_RDONLY)
+ if ((sb->s_flags ^ *flags) & SB_RDONLY) {
+ if (*flags & SB_RDONLY)
error = gfs2_make_fs_ro(sdp);
else
error = gfs2_make_fs_rw(sdp);
@@ -1269,9 +1269,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
sdp->sd_args = args;
if (sdp->sd_args.ar_posix_acl)
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
else
- sb->s_flags &= ~MS_POSIXACL;
+ sb->s_flags &= ~SB_POSIXACL;
if (sdp->sd_args.ar_nobarrier)
set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
else
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index a85ca8b2c9ba..ca8b72d0a831 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -117,7 +117,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
kfree(tr);
up_read(&sdp->sd_log_flush_lock);
- if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
+ if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
if (alloced)
sb_end_intwrite(sdp->sd_vfs);
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 8aec5e732abf..b63a4df7327b 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -98,13 +98,11 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
struct hfs_bnode *src_node, int src, int len)
{
- struct hfs_btree *tree;
struct page *src_page, *dst_page;
hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
- tree = src_node->tree;
src += src_node->page_offset;
dst += dst_node->page_offset;
src_page = src_node->page[0];
@@ -237,7 +235,6 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
{
- struct super_block *sb;
struct hfs_bnode *node, *node2;
struct address_space *mapping;
struct page *page;
@@ -249,7 +246,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
return NULL;
}
- sb = tree->inode->i_sb;
size = sizeof(struct hfs_bnode) + tree->pages_per_bnode *
sizeof(struct page *);
node = kzalloc(size, GFP_KERNEL);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 894994d2c885..460281b1299e 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -204,11 +204,11 @@ int hfs_mdb_get(struct super_block *sb)
attrib = mdb->drAtrb;
if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. mounting read-only.\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) {
pr_warn("filesystem is marked locked, mounting read-only.\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
if (!sb_rdonly(sb)) {
/* Mark the volume uncleanly unmounted in case we crash */
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 7e0d65e9586c..173876782f73 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -114,18 +114,18 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static int hfs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_NODIRATIME;
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ *flags |= SB_NODIRATIME;
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
return 0;
- if (!(*flags & MS_RDONLY)) {
+ if (!(*flags & SB_RDONLY)) {
if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. leaving read-only.\n");
- sb->s_flags |= MS_RDONLY;
- *flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
+ *flags |= SB_RDONLY;
} else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) {
pr_warn("filesystem is marked locked, leaving read-only.\n");
- sb->s_flags |= MS_RDONLY;
- *flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
+ *flags |= SB_RDONLY;
}
}
return 0;
@@ -407,7 +407,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &hfs_super_operations;
sb->s_xattr = hfs_xattr_handlers;
- sb->s_flags |= MS_NODIRATIME;
+ sb->s_flags |= SB_NODIRATIME;
mutex_init(&sbi->bitmap_lock);
res = hfs_mdb_get(sb);
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index d77015c3f22c..177fae4e6581 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -127,14 +127,12 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
struct hfs_bnode *src_node, int src, int len)
{
- struct hfs_btree *tree;
struct page **src_page, **dst_page;
int l;
hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
- tree = src_node->tree;
src += src_node->page_offset;
dst += dst_node->page_offset;
src_page = src_node->page + (src >> PAGE_SHIFT);
@@ -401,7 +399,6 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
{
- struct super_block *sb;
struct hfs_bnode *node, *node2;
struct address_space *mapping;
struct page *page;
@@ -414,7 +411,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
return NULL;
}
- sb = tree->inode->i_sb;
size = sizeof(struct hfs_bnode) + tree->pages_per_bnode *
sizeof(struct page *);
node = kzalloc(size, GFP_KERNEL);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index e5bb2de2262a..1d458b716957 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -329,9 +329,9 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
return 0;
- if (!(*flags & MS_RDONLY)) {
+ if (!(*flags & SB_RDONLY)) {
struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr;
int force = 0;
@@ -340,20 +340,20 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. leaving read-only.\n");
- sb->s_flags |= MS_RDONLY;
- *flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
+ *flags |= SB_RDONLY;
} else if (force) {
/* nothing */
} else if (vhdr->attributes &
cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
pr_warn("filesystem is marked locked, leaving read-only.\n");
- sb->s_flags |= MS_RDONLY;
- *flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
+ *flags |= SB_RDONLY;
} else if (vhdr->attributes &
cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
pr_warn("filesystem is marked journaled, leaving read-only.\n");
- sb->s_flags |= MS_RDONLY;
- *flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
+ *flags |= SB_RDONLY;
}
}
return 0;
@@ -455,16 +455,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
pr_warn("Filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. mounting read-only.\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
} else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) {
/* nothing */
} else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
pr_warn("Filesystem is marked locked, mounting read-only.\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
} else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) &&
!sb_rdonly(sb)) {
pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
err = -EINVAL;
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index e0e60b148400..7c49f1ef0c85 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -288,7 +288,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
goto bail;
}
if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) {
- if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok;
+ if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & SB_RDONLY) goto ok;
hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
goto bail;
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 1516fb4e28f4..c45a3b9b9ac7 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -78,7 +78,7 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)
else {
pr_cont("; remounting read-only\n");
mark_dirty(s, 0);
- s->s_flags |= MS_RDONLY;
+ s->s_flags |= SB_RDONLY;
}
} else if (sb_rdonly(s))
pr_cont("; going on - but anything won't be destroyed because it's read-only\n");
@@ -457,7 +457,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
sync_filesystem(s);
- *flags |= MS_NOATIME;
+ *flags |= SB_NOATIME;
hpfs_lock(s);
uid = sbi->sb_uid; gid = sbi->sb_gid;
@@ -488,7 +488,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;
sbi->sb_err = errs; sbi->sb_timeshift = timeshift;
- if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
+ if (!(*flags & SB_RDONLY)) mark_dirty(s, 1);
hpfs_unlock(s);
return 0;
@@ -614,7 +614,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
goto bail4;
}
- s->s_flags |= MS_NOATIME;
+ s->s_flags |= SB_NOATIME;
/* Fill superblock stuff */
s->s_magic = HPFS_SUPER_MAGIC;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1e76730aac0d..8a85f3f53446 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
- * page_put due to reference from alloc_huge_page()
* unlock_page because locked by add_to_page_cache()
+ * page_put due to reference from alloc_huge_page()
*/
- put_page(page);
unlock_page(page);
+ put_page(page);
}
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
diff --git a/fs/inode.c b/fs/inode.c
index fd401028a309..03102d6ef044 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -416,7 +416,7 @@ void inode_add_lru(struct inode *inode)
{
if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
I_FREEING | I_WILL_FREE)) &&
- !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
+ !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE)
inode_lru_list_add(inode);
}
@@ -595,7 +595,7 @@ static void dispose_list(struct list_head *head)
* @sb: superblock to operate on
*
* Make sure that no inodes with zero refcount are retained. This is
- * called by superblock shutdown after having MS_ACTIVE flag removed,
+ * called by superblock shutdown after having SB_ACTIVE flag removed,
* so any inode reaching zero refcount during or after that call will
* be immediately evicted.
*/
@@ -1492,7 +1492,7 @@ static void iput_final(struct inode *inode)
else
drop = generic_drop_inode(inode);
- if (!drop && (sb->s_flags & MS_ACTIVE)) {
+ if (!drop && (sb->s_flags & SB_ACTIVE)) {
inode_add_lru(inode);
spin_unlock(&inode->i_lock);
return;
@@ -1644,7 +1644,7 @@ int generic_update_time(struct inode *inode, struct timespec *time, int flags)
if (flags & S_MTIME)
inode->i_mtime = *time;
- if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
+ if (!(inode->i_sb->s_flags & SB_LAZYTIME) || (flags & S_VERSION))
iflags |= I_DIRTY_SYNC;
__mark_inode_dirty(inode, iflags);
return 0;
@@ -1691,7 +1691,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,
if (IS_NOATIME(inode))
return false;
- if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
+ if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
return false;
if (mnt->mnt_flags & MNT_NOATIME)
diff --git a/fs/internal.h b/fs/internal.h
index 48cee21b4f14..df262f41a0ef 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,6 +55,7 @@ extern void __init chrdev_init(void);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
+long do_unlinkat(int dfd, struct filename *name);
/*
* namespace.c
diff --git a/fs/iomap.c b/fs/iomap.c
index b9f74803e56c..47d29ccffaef 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -856,6 +856,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
struct bio *bio;
bool need_zeroout = false;
int nr_pages, ret;
+ size_t copied = 0;
if ((pos | length | align) & ((1 << blkbits) - 1))
return -EINVAL;
@@ -867,7 +868,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
/*FALLTHRU*/
case IOMAP_UNWRITTEN:
if (!(dio->flags & IOMAP_DIO_WRITE)) {
- iov_iter_zero(length, dio->submit.iter);
+ length = iov_iter_zero(length, dio->submit.iter);
dio->size += length;
return length;
}
@@ -904,8 +905,11 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
}
do {
- if (dio->error)
+ size_t n;
+ if (dio->error) {
+ iov_iter_revert(dio->submit.iter, copied);
return 0;
+ }
bio = bio_alloc(GFP_KERNEL, nr_pages);
bio_set_dev(bio, iomap->bdev);
@@ -918,20 +922,24 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
ret = bio_iov_iter_get_pages(bio, &iter);
if (unlikely(ret)) {
bio_put(bio);
- return ret;
+ return copied ? copied : ret;
}
+ n = bio->bi_iter.bi_size;
if (dio->flags & IOMAP_DIO_WRITE) {
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
- task_io_account_write(bio->bi_iter.bi_size);
+ task_io_account_write(n);
} else {
bio_set_op_attrs(bio, REQ_OP_READ, 0);
if (dio->flags & IOMAP_DIO_DIRTY)
bio_set_pages_dirty(bio);
}
- dio->size += bio->bi_iter.bi_size;
- pos += bio->bi_iter.bi_size;
+ iov_iter_advance(dio->submit.iter, n);
+
+ dio->size += n;
+ pos += n;
+ copied += n;
nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
@@ -947,9 +955,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
if (pad)
iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
}
-
- iov_iter_advance(dio->submit.iter, length);
- return length;
+ return copied;
}
ssize_t
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 447a24d77b89..bc258a4402f6 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -114,7 +114,7 @@ static void destroy_inodecache(void)
static int isofs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- if (!(*flags & MS_RDONLY))
+ if (!(*flags & SB_RDONLY))
return -EROFS;
return 0;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index d2a85c9720e9..67546c7ad473 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -737,6 +737,23 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
return err;
}
+/* Return 1 when transaction with given tid has already committed. */
+int jbd2_transaction_committed(journal_t *journal, tid_t tid)
+{
+ int ret = 1;
+
+ read_lock(&journal->j_state_lock);
+ if (journal->j_running_transaction &&
+ journal->j_running_transaction->t_tid == tid)
+ ret = 0;
+ if (journal->j_committing_transaction &&
+ journal->j_committing_transaction->t_tid == tid)
+ ret = 0;
+ read_unlock(&journal->j_state_lock);
+ return ret;
+}
+EXPORT_SYMBOL(jbd2_transaction_committed);
+
/*
* When this function returns the transaction corresponding to tid
* will be completed. If the transaction has currently running, start
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index e96c6b05e43e..d8c274d39ddb 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -409,10 +409,10 @@ int jffs2_do_remount_fs(struct super_block *sb, int *flags, char *data)
mutex_unlock(&c->alloc_sem);
}
- if (!(*flags & MS_RDONLY))
+ if (!(*flags & SB_RDONLY))
jffs2_start_garbage_collect_thread(c);
- *flags |= MS_NOATIME;
+ *flags |= SB_NOATIME;
return 0;
}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 824e61ede465..c2fbec19c616 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -59,7 +59,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
}
-#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY)
+#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & SB_RDONLY)
#define SECTOR_ADDR(x) ( (((unsigned long)(x) / c->sector_size) * c->sector_size) )
#ifndef CONFIG_JFFS2_FS_WRITEBUFFER
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 153f1c6eb169..f60dee7faf03 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -301,10 +301,10 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &jffs2_super_operations;
sb->s_export_op = &jffs2_export_ops;
- sb->s_flags = sb->s_flags | MS_NOATIME;
+ sb->s_flags = sb->s_flags | SB_NOATIME;
sb->s_xattr = jffs2_xattr_handlers;
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
#endif
ret = jffs2_do_fill_super(sb, data, silent);
return ret;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2f7b3af5b8b7..90373aebfdca 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -87,7 +87,7 @@ static void jfs_handle_error(struct super_block *sb)
else if (sbi->flag & JFS_ERR_REMOUNT_RO) {
jfs_err("ERROR: (device %s): remounting filesystem as read-only",
sb->s_id);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
/* nothing is done for continue beyond marking the superblock dirty */
@@ -477,7 +477,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
return rc;
}
- if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) {
+ if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) {
/*
* Invalidate any previously read metadata. fsck may have
* changed the on-disk data since we mounted r/o
@@ -488,12 +488,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
ret = jfs_mount_rw(sb, 1);
/* mark the fs r/w for quota activity */
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
dquot_resume(sb, -1);
return ret;
}
- if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) {
rc = dquot_suspend(sb, -1);
if (rc < 0)
return rc;
@@ -545,7 +545,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
sbi->flag = flag;
#ifdef CONFIG_JFS_POSIX_ACL
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
#endif
if (newLVSize) {
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 95a7c88baed9..26dd9a50f383 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -335,7 +335,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
deactivate_locked_super(sb);
return ERR_PTR(error);
}
- sb->s_flags |= MS_ACTIVE;
+ sb->s_flags |= SB_ACTIVE;
mutex_lock(&kernfs_mutex);
list_add(&info->node, &root->supers);
diff --git a/fs/libfs.c b/fs/libfs.c
index 3aabe553fc45..7ff3cb904acd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -246,7 +246,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
struct inode *root;
struct qstr d_name = QSTR_INIT(name, strlen(name));
- s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER,
+ s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER,
&init_user_ns, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
@@ -277,7 +277,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
d_instantiate(dentry, root);
s->s_root = dentry;
s->s_d_op = dops;
- s->s_flags |= MS_ACTIVE;
+ s->s_flags |= SB_ACTIVE;
return dget(s->s_root);
Enomem:
@@ -578,7 +578,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
spin_lock(&pin_fs_lock);
if (unlikely(!*mount)) {
spin_unlock(&pin_fs_lock);
- mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL);
+ mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
spin_lock(&pin_fs_lock);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 0d4e590e0549..826a89184f90 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -578,8 +578,10 @@ static void nlm_complain_hosts(struct net *net)
if (ln->nrhosts == 0)
return;
- printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net);
- dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net);
+ pr_warn("lockd: couldn't shutdown host module for net %x!\n",
+ net->ns.inum);
+ dprintk("lockd: %lu hosts left in net %x:\n", ln->nrhosts,
+ net->ns.inum);
} else {
if (nrhosts == 0)
return;
@@ -590,9 +592,9 @@ static void nlm_complain_hosts(struct net *net)
for_each_host(host, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
- dprintk(" %s (cnt %d use %d exp %ld net %p)\n",
+ dprintk(" %s (cnt %d use %d exp %ld net %x)\n",
host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires, host->net);
+ host->h_inuse, host->h_expires, host->net->ns.inum);
}
}
@@ -605,7 +607,8 @@ nlm_shutdown_hosts_net(struct net *net)
mutex_lock(&nlm_host_mutex);
/* First, make all hosts eligible for gc */
- dprintk("lockd: nuking all hosts in net %p...\n", net);
+ dprintk("lockd: nuking all hosts in net %x...\n",
+ net ? net->ns.inum : 0);
for_each_host(host, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
@@ -618,9 +621,8 @@ nlm_shutdown_hosts_net(struct net *net)
/* Then, perform a garbage collection pass */
nlm_gc_hosts(net);
- mutex_unlock(&nlm_host_mutex);
-
nlm_complain_hosts(net);
+ mutex_unlock(&nlm_host_mutex);
}
/*
@@ -646,7 +648,8 @@ nlm_gc_hosts(struct net *net)
struct hlist_node *next;
struct nlm_host *host;
- dprintk("lockd: host garbage collection for net %p\n", net);
+ dprintk("lockd: host garbage collection for net %x\n",
+ net ? net->ns.inum : 0);
for_each_host(host, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
@@ -662,9 +665,10 @@ nlm_gc_hosts(struct net *net)
if (atomic_read(&host->h_count) || host->h_inuse
|| time_before(jiffies, host->h_expires)) {
dprintk("nlm_gc_hosts skipping %s "
- "(cnt %d use %d exp %ld net %p)\n",
+ "(cnt %d use %d exp %ld net %x)\n",
host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires, host->net);
+ host->h_inuse, host->h_expires,
+ host->net->ns.inum);
continue;
}
nlm_destroy_host_locked(host);
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 9fbbd11f9ecb..96cfb2967ac7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -110,7 +110,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
clnt = nsm_create(host->net, host->nodename);
if (IS_ERR(clnt)) {
dprintk("lockd: failed to create NSM upcall transport, "
- "status=%ld, net=%p\n", PTR_ERR(clnt), host->net);
+ "status=%ld, net=%x\n", PTR_ERR(clnt),
+ host->net->ns.inum);
return PTR_ERR(clnt);
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b837fb7e290a..9c36d614bf89 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -57,6 +57,9 @@ static struct task_struct *nlmsvc_task;
static struct svc_rqst *nlmsvc_rqst;
unsigned long nlmsvc_timeout;
+atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
+DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
+
unsigned int lockd_net_id;
/*
@@ -259,7 +262,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
if (error < 0)
goto err_bind;
set_grace_period(net);
- dprintk("lockd_up_net: per-net data created; net=%p\n", net);
+ dprintk("%s: per-net data created; net=%x\n", __func__, net->ns.inum);
return 0;
err_bind:
@@ -274,12 +277,15 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
if (ln->nlmsvc_users) {
if (--ln->nlmsvc_users == 0) {
nlm_shutdown_hosts_net(net);
+ cancel_delayed_work_sync(&ln->grace_period_end);
+ locks_end_grace(&ln->lockd_manager);
svc_shutdown_net(serv, net);
- dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net);
+ dprintk("%s: per-net data destroyed; net=%x\n",
+ __func__, net->ns.inum);
}
} else {
- printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n",
- nlmsvc_task, net);
+ pr_err("%s: no users! task=%p, net=%x\n",
+ __func__, nlmsvc_task, net->ns.inum);
BUG();
}
}
@@ -290,7 +296,8 @@ static int lockd_inetaddr_event(struct notifier_block *this,
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct sockaddr_in sin;
- if (event != NETDEV_DOWN)
+ if ((event != NETDEV_DOWN) ||
+ !atomic_inc_not_zero(&nlm_ntf_refcnt))
goto out;
if (nlmsvc_rqst) {
@@ -301,6 +308,8 @@ static int lockd_inetaddr_event(struct notifier_block *this,
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin);
}
+ atomic_dec(&nlm_ntf_refcnt);
+ wake_up(&nlm_ntf_wq);
out:
return NOTIFY_DONE;
@@ -317,7 +326,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
struct sockaddr_in6 sin6;
- if (event != NETDEV_DOWN)
+ if ((event != NETDEV_DOWN) ||
+ !atomic_inc_not_zero(&nlm_ntf_refcnt))
goto out;
if (nlmsvc_rqst) {
@@ -329,6 +339,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin6);
}
+ atomic_dec(&nlm_ntf_refcnt);
+ wake_up(&nlm_ntf_wq);
out:
return NOTIFY_DONE;
@@ -345,10 +357,12 @@ static void lockd_unregister_notifiers(void)
#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
#endif
+ wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0);
}
static void lockd_svc_exit_thread(void)
{
+ atomic_dec(&nlm_ntf_refcnt);
lockd_unregister_notifiers();
svc_exit_thread(nlmsvc_rqst);
}
@@ -369,9 +383,11 @@ static int lockd_start_svc(struct svc_serv *serv)
printk(KERN_WARNING
"lockd_up: svc_rqst allocation failed, error=%d\n",
error);
+ lockd_unregister_notifiers();
goto out_rqst;
}
+ atomic_inc(&nlm_ntf_refcnt);
svc_sock_update_bufs(serv);
serv->sv_maxconn = nlm_max_connections;
@@ -459,13 +475,16 @@ int lockd_up(struct net *net)
}
error = lockd_up_net(serv, net);
- if (error < 0)
- goto err_net;
+ if (error < 0) {
+ lockd_unregister_notifiers();
+ goto err_put;
+ }
error = lockd_start_svc(serv);
- if (error < 0)
- goto err_start;
-
+ if (error < 0) {
+ lockd_down_net(serv, net);
+ goto err_put;
+ }
nlmsvc_users++;
/*
* Note: svc_serv structures have an initial use count of 1,
@@ -476,12 +495,6 @@ err_put:
err_create:
mutex_unlock(&nlmsvc_mutex);
return error;
-
-err_start:
- lockd_down_net(serv, net);
-err_net:
- lockd_unregister_notifiers();
- goto err_put;
}
EXPORT_SYMBOL_GPL(lockd_up);
@@ -678,6 +691,17 @@ static int lockd_init_net(struct net *net)
static void lockd_exit_net(struct net *net)
{
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+ WARN_ONCE(!list_empty(&ln->lockd_manager.list),
+ "net %x %s: lockd_manager.list is not empty\n",
+ net->ns.inum, __func__);
+ WARN_ONCE(!list_empty(&ln->nsm_handles),
+ "net %x %s: nsm_handles list is not empty\n",
+ net->ns.inum, __func__);
+ WARN_ONCE(delayed_work_pending(&ln->grace_period_end),
+ "net %x %s: grace_period_end was not cancelled\n",
+ net->ns.inum, __func__);
}
static struct pernet_operations lockd_net_ops = {
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index a563ddbc19e6..4ec3d6e03e76 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -370,7 +370,7 @@ nlmsvc_mark_resources(struct net *net)
{
struct nlm_host hint;
- dprintk("lockd: nlmsvc_mark_resources for net %p\n", net);
+ dprintk("lockd: %s for net %x\n", __func__, net ? net->ns.inum : 0);
hint.net = net;
nlm_traverse_files(&hint, nlmsvc_mark_host, NULL);
}
diff --git a/fs/locks.c b/fs/locks.c
index 1bd71c4d663a..21b4dfa289ee 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -141,7 +141,7 @@
static inline bool is_remote_lock(struct file *filp)
{
- return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK));
+ return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));
}
static bool lease_breaking(struct file_lock *fl)
diff --git a/fs/mbcache.c b/fs/mbcache.c
index d818fd236787..b8b8b9ced9f8 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink,
struct mb_cache *cache = container_of(shrink, struct mb_cache,
c_shrink);
+ /* Unlikely, but not impossible */
+ if (unlikely(cache->c_entry_count < 0))
+ return 0;
return cache->c_entry_count;
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index b6829d679643..72e308c3e66b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -125,9 +125,9 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
sync_filesystem(sb);
ms = sbi->s_ms;
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
return 0;
- if (*flags & MS_RDONLY) {
+ if (*flags & SB_RDONLY) {
if (ms->s_state & MINIX_VALID_FS ||
!(sbi->s_mount_state & MINIX_VALID_FS))
return 0;
diff --git a/fs/namei.c b/fs/namei.c
index 5424b10cfdc4..9cc91fb7f156 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* of the daemon to instantiate them before they can be used.
*/
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
- LOOKUP_OPEN | LOOKUP_CREATE |
- LOOKUP_AUTOMOUNT))) {
- /* Positive dentry that isn't meant to trigger an
- * automount, EISDIR will allow it to be used,
- * otherwise there's no mount here "now" so return
- * ENOENT.
- */
- if (path->dentry->d_inode)
- return -EISDIR;
- else
- return -ENOENT;
- }
+ LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
+ path->dentry->d_inode)
+ return -EISDIR;
if (path->dentry->d_sb->s_user_ns != &init_user_ns)
return -EACCES;
@@ -3459,7 +3450,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
goto out;
child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
error = PTR_ERR(child);
- if (unlikely(IS_ERR(child)))
+ if (IS_ERR(child))
goto out2;
dput(path.dentry);
path.dentry = child;
@@ -4010,10 +4001,9 @@ EXPORT_SYMBOL(vfs_unlink);
* writeout happening, and we don't want to prevent access to the directory
* while waiting on the I/O.
*/
-static long do_unlinkat(int dfd, const char __user *pathname)
+long do_unlinkat(int dfd, struct filename *name)
{
int error;
- struct filename *name;
struct dentry *dentry;
struct path path;
struct qstr last;
@@ -4022,8 +4012,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, getname(pathname), lookup_flags,
- &path, &last, &type);
+ name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
if (IS_ERR(name))
return PTR_ERR(name);
@@ -4065,12 +4054,12 @@ exit2:
mnt_drop_write(path.mnt);
exit1:
path_put(&path);
- putname(name);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
inode = NULL;
goto retry;
}
+ putname(name);
return error;
slashes:
@@ -4091,12 +4080,12 @@ SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
if (flag & AT_REMOVEDIR)
return do_rmdir(dfd, pathname);
- return do_unlinkat(dfd, pathname);
+ return do_unlinkat(dfd, getname(pathname));
}
SYSCALL_DEFINE1(unlink, const char __user *, pathname)
{
- return do_unlinkat(AT_FDCWD, pathname);
+ return do_unlinkat(AT_FDCWD, getname(pathname));
}
int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 129f1937fa2c..41de88cdc053 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -103,7 +103,7 @@ static void destroy_inodecache(void)
static int ncp_remount(struct super_block *sb, int *flags, char* data)
{
sync_filesystem(sb);
- *flags |= MS_NODIRATIME;
+ *flags |= SB_NODIRATIME;
return 0;
}
@@ -547,7 +547,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
else
default_bufsize = 1024;
- sb->s_flags |= MS_NODIRATIME; /* probably even noatime */
+ sb->s_flags |= SB_NODIRATIME; /* probably even noatime */
sb->s_maxbytes = 0xFFFFFFFFU;
sb->s_blocksize = 1024; /* Eh... Is this correct? */
sb->s_blocksize_bits = 10;
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index b60627bcfc62..ef6729568432 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -67,7 +67,7 @@ out:
*/
void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
{
- if (atomic_dec_and_test(&dreq->count))
+ if (refcount_dec_and_test(&dreq->count))
kfree(dreq);
}
@@ -87,7 +87,7 @@ static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
dreq = container_of(req, struct nfs_cache_defer_req, req);
dreq->deferred_req.revisit = nfs_dns_cache_revisit;
- atomic_inc(&dreq->count);
+ refcount_inc(&dreq->count);
return &dreq->deferred_req;
}
@@ -99,7 +99,7 @@ struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
if (dreq) {
init_completion(&dreq->completion);
- atomic_set(&dreq->count, 1);
+ refcount_set(&dreq->count, 1);
dreq->req.defer = nfs_dns_cache_defer;
}
return dreq;
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 4e6236a86cf7..220ee409abc4 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -16,7 +16,7 @@ struct nfs_cache_defer_req {
struct cache_req req;
struct cache_deferred_req deferred_req;
struct completion completion;
- atomic_t count;
+ refcount_t count;
};
extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index cd9d992feb2e..509dc5adeb8f 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -49,15 +49,15 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
if (ret <= 0)
goto out_err;
nn->nfs_callback_tcpport = ret;
- dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
- nn->nfs_callback_tcpport, PF_INET, net);
+ dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
+ nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
- dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
- nn->nfs_callback_tcpport6, PF_INET6, net);
+ dprintk("NFS: Callback listener port = %u (af %u, net %x\n",
+ nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum);
} else if (ret != -EAFNOSUPPORT)
goto out_err;
return 0;
@@ -185,7 +185,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
if (--nn->cb_users[minorversion])
return;
- dprintk("NFS: destroy per-net callback data; net=%p\n", net);
+ dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
svc_shutdown_net(serv, net);
}
@@ -198,7 +198,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
if (nn->cb_users[minorversion]++)
return 0;
- dprintk("NFS: create per-net callback data; net=%p\n", net);
+ dprintk("NFS: create per-net callback data; net=%x\n", net->ns.inum);
ret = svc_bind(serv, net);
if (ret < 0) {
@@ -223,7 +223,7 @@ err_socks:
err_bind:
nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; "
- "net = %p\n", ret, net);
+ "net = %x\n", ret, net->ns.inum);
return ret;
}
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 19151f6c0e97..2435af56b87e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -440,7 +440,7 @@ static bool referring_call_exists(struct nfs_client *clp,
uint32_t nrclists,
struct referring_call_list *rclists)
{
- bool status = 0;
+ bool status = false;
int i, j;
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 22880ef6d8dd..0ac2fb1c6b63 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -163,7 +163,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
- atomic_set(&clp->cl_count, 1);
+ refcount_set(&clp->cl_count, 1);
clp->cl_cons_state = NFS_CS_INITING;
memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen);
@@ -269,7 +269,7 @@ void nfs_put_client(struct nfs_client *clp)
nn = net_generic(clp->cl_net, nfs_net_id);
- if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
+ if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
list_del(&clp->cl_share_link);
nfs_cb_idr_remove_locked(clp);
spin_unlock(&nn->nfs_client_lock);
@@ -314,7 +314,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
sap))
continue;
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
return clp;
}
return NULL;
@@ -1006,7 +1006,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
/* Copy data from the source */
server->nfs_client = source->nfs_client;
server->destroy = source->destroy;
- atomic_inc(&server->nfs_client->cl_count);
+ refcount_inc(&server->nfs_client->cl_count);
nfs_server_copy_userdata(server, source);
server->fsid = fattr->fsid;
@@ -1166,7 +1166,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
- atomic_read(&clp->cl_count),
+ refcount_read(&clp->cl_count),
clp->cl_hostname);
rcu_read_unlock();
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 606dd3871f66..ade44ca0c66c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -1041,6 +1041,33 @@ int nfs_delegations_present(struct nfs_client *clp)
}
/**
+ * nfs4_refresh_delegation_stateid - Update delegation stateid seqid
+ * @dst: stateid to refresh
+ * @inode: inode to check
+ *
+ * Returns "true" and updates "dst->seqid" * if inode had a delegation
+ * that matches our delegation stateid. Otherwise "false" is returned.
+ */
+bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ bool ret = false;
+ if (!inode)
+ goto out;
+
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL &&
+ nfs4_stateid_match_other(dst, &delegation->stateid)) {
+ dst->seqid = delegation->stateid.seqid;
+ return ret;
+ }
+ rcu_read_unlock();
+out:
+ return ret;
+}
+
+/**
* nfs4_copy_delegation_stateid - Copy inode's state ID information
* @inode: inode to check
* @flags: delegation type requirement
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index ddaf2644cf13..185a09f37a89 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -62,6 +62,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred);
+bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f439f1c45008..2f3f86726f5b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -118,13 +118,6 @@ nfs_opendir(struct inode *inode, struct file *filp)
goto out;
}
filp->private_data = ctx;
- if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
- /* This is a mountpoint, so d_revalidate will never
- * have been called, so we need to refresh the
- * inode (for close-open consistency) ourselves.
- */
- __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- }
out:
put_rpccred(cred);
return res;
@@ -253,7 +246,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
desc->cache_entry_index = index;
return 0;
out_eof:
- desc->eof = 1;
+ desc->eof = true;
return -EBADCOOKIE;
}
@@ -307,7 +300,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
if (array->eof_index >= 0) {
status = -EBADCOOKIE;
if (*desc->dir_cookie == array->last_cookie)
- desc->eof = 1;
+ desc->eof = true;
}
out:
return status;
@@ -761,7 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
- desc->eof = 1;
+ desc->eof = true;
break;
}
desc->ctx->pos++;
@@ -773,7 +766,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ctx->duped = 1;
}
if (array->eof_index >= 0)
- desc->eof = 1;
+ desc->eof = true;
kunmap(desc->page);
cache_page_release(desc);
@@ -873,7 +866,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res == -EBADCOOKIE) {
res = 0;
/* This means either end of directory */
- if (*desc->dir_cookie && desc->eof == 0) {
+ if (*desc->dir_cookie && !desc->eof) {
/* Or that the server has 'lost' a cookie */
res = uncached_readdir(desc);
if (res == 0)
@@ -1241,8 +1234,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
- if (nfs_mapping_need_revalidate_inode(inode))
- error = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ error = nfs_lookup_verify_inode(inode, flags);
dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
__func__, inode->i_ino, error ? "invalid" : "valid");
return !error;
@@ -1264,7 +1256,7 @@ static int nfs_dentry_delete(const struct dentry *dentry)
/* Unhash it, so that ->d_iput() would be called */
return 1;
}
- if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
+ if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
/* Unhash it, so that ancestors of killed async unlink
* files will be cleaned up during umount */
return 1;
@@ -1393,6 +1385,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
+ .d_weak_revalidate = nfs_weak_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
@@ -2064,7 +2057,7 @@ out:
* should mark the directories for revalidation.
*/
d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
+ nfs_set_verifier(old_dentry,
nfs_save_change_attribute(new_dir));
} else if (error == -ENOENT)
nfs_dentry_handle_enoent(old_dentry);
@@ -2369,15 +2362,15 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
}
EXPORT_SYMBOL_GPL(nfs_access_add_cache);
-#define NFS_MAY_READ (NFS4_ACCESS_READ)
-#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \
- NFS4_ACCESS_EXTEND | \
- NFS4_ACCESS_DELETE)
-#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \
- NFS4_ACCESS_EXTEND)
+#define NFS_MAY_READ (NFS_ACCESS_READ)
+#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
+ NFS_ACCESS_EXTEND | \
+ NFS_ACCESS_DELETE)
+#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
+ NFS_ACCESS_EXTEND)
#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
-#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP)
-#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE)
+#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
+#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
static int
nfs_access_calc_mask(u32 access_result, umode_t umode)
{
@@ -2425,9 +2418,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
if (!may_block)
goto out;
- /* Be clever: ask server to check for all possible rights */
- cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE
- | NFS_MAY_WRITE | NFS_MAY_READ;
+ /*
+ * Determine which access bits we want to ask for...
+ */
+ cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
+ if (S_ISDIR(inode->i_mode))
+ cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
+ else
+ cache.mask |= NFS_ACCESS_EXECUTE;
cache.cred = cred;
status = NFS_PROTO(inode)->access(inode, &cache);
if (status != 0) {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0214dd1e1060..81cca49a8375 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -829,23 +829,9 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
- /*
- * VFS doesn't require the open mode to match a flock() lock's type.
- * NFS, however, may simulate flock() locking with posix locking which
- * requires the open mode to match the lock type.
- */
- switch (fl->fl_type) {
- case F_UNLCK:
+ /* We're simulating flock() locks using posix locks on the server */
+ if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl, is_local);
- case F_RDLCK:
- if (!(filp->f_mode & FMODE_READ))
- return -EBADF;
- break;
- case F_WRLCK:
- if (!(filp->f_mode & FMODE_WRITE))
- return -EBADF;
- }
-
return do_setlk(filp, cmd, fl, is_local);
}
EXPORT_SYMBOL_GPL(nfs_flock);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 508126eb49f9..4e54d8b5413a 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -471,10 +471,10 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
return PNFS_NOT_ATTEMPTED;
dprintk("%s USE DS: %s cl_count %d\n", __func__,
- ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
+ ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
/* No multipath support. Use first DS */
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -515,10 +515,10 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
- offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
+ offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
hdr->pgio_done_cb = filelayout_write_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -1064,9 +1064,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
goto out_err;
dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
- data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
+ data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count));
data->commit_done_cb = filelayout_commit_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index b0fa83a60754..c75ad982bcfc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -187,7 +187,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
continue;
if (!ff_mirror_match_fh(mirror, pos))
continue;
- if (atomic_inc_not_zero(&pos->ref)) {
+ if (refcount_inc_not_zero(&pos->ref)) {
spin_unlock(&inode->i_lock);
return pos;
}
@@ -218,7 +218,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
mirror = kzalloc(sizeof(*mirror), gfp_flags);
if (mirror != NULL) {
spin_lock_init(&mirror->lock);
- atomic_set(&mirror->ref, 1);
+ refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors);
}
return mirror;
@@ -242,7 +242,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- if (mirror != NULL && atomic_dec_and_test(&mirror->ref))
+ if (mirror != NULL && refcount_dec_and_test(&mirror->ref))
ff_layout_free_mirror(mirror);
}
@@ -1726,10 +1726,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
vers = nfs4_ff_layout_ds_version(lseg, idx);
dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
- ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers);
+ ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
hdr->pgio_done_cb = ff_layout_read_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh)
@@ -1785,11 +1785,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
- offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count),
+ offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count),
vers);
hdr->pgio_done_cb = ff_layout_write_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
@@ -1863,11 +1863,11 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
vers = nfs4_ff_layout_ds_version(lseg, idx);
dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
- data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count),
+ data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
vers);
data->commit_done_cb = ff_layout_commit_done_cb;
data->cred = ds_cred;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
@@ -2286,7 +2286,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))
continue;
/* mirror refcount put in cleanup_layoutstats */
- if (!atomic_inc_not_zero(&mirror->ref))
+ if (!refcount_inc_not_zero(&mirror->ref))
continue;
dev = &mirror->mirror_ds->id_node;
memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 679cb087ef3f..411798346e48 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -14,6 +14,7 @@
#define FF_FLAGS_NO_IO_THRU_MDS 2
#define FF_FLAGS_NO_READ_IO 4
+#include <linux/refcount.h>
#include "../pnfs.h"
/* XXX: Let's filter out insanely large mirror count for now to avoid oom
@@ -82,7 +83,7 @@ struct nfs4_ff_layout_mirror {
nfs4_stateid stateid;
struct rpc_cred __rcu *ro_cred;
struct rpc_cred __rcu *rw_cred;
- atomic_t ref;
+ refcount_t ref;
spinlock_t lock;
unsigned long flags;
struct nfs4_ff_layoutstat read_stat;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1629056aa2c9..b992d2382ffa 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -752,7 +752,7 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
* Note that we only have to check the vfsmount flags here:
* - NFS always sets S_NOATIME by so checking it would give a
* bogus result
- * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
+ * - NFS never sets SB_NOATIME or SB_NODIRATIME so there is
* no point in checking those.
*/
if ((path->mnt->mnt_flags & MNT_NOATIME) ||
@@ -783,7 +783,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr);
static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
{
- atomic_set(&l_ctx->count, 1);
+ refcount_set(&l_ctx->count, 1);
l_ctx->lockowner = current->files;
INIT_LIST_HEAD(&l_ctx->list);
atomic_set(&l_ctx->io_count, 0);
@@ -797,7 +797,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
do {
if (pos->lockowner != current->files)
continue;
- atomic_inc(&pos->count);
+ refcount_inc(&pos->count);
return pos;
} while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
return NULL;
@@ -836,7 +836,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
struct nfs_open_context *ctx = l_ctx->open_context;
struct inode *inode = d_inode(ctx->dentry);
- if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
+ if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock))
return;
list_del(&l_ctx->list);
spin_unlock(&inode->i_lock);
@@ -913,7 +913,7 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
{
if (ctx != NULL)
- atomic_inc(&ctx->lock_context.count);
+ refcount_inc(&ctx->lock_context.count);
return ctx;
}
EXPORT_SYMBOL_GPL(get_nfs_open_context);
@@ -924,11 +924,11 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
struct super_block *sb = ctx->dentry->d_sb;
if (!list_empty(&ctx->list)) {
- if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
return;
list_del(&ctx->list);
spin_unlock(&inode->i_lock);
- } else if (!atomic_dec_and_test(&ctx->lock_context.count))
+ } else if (!refcount_dec_and_test(&ctx->lock_context.count))
return;
if (inode != NULL)
NFS_PROTO(inode)->close_context(ctx, is_sync);
@@ -2084,8 +2084,12 @@ static int nfs_net_init(struct net *net)
static void nfs_net_exit(struct net *net)
{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
nfs_fs_proc_net_exit(net);
nfs_cleanup_cb_ident_idr(net);
+ WARN_ON_ONCE(!list_empty(&nn->nfs_client_list));
+ WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list));
}
static struct pernet_operations nfs_net_ops = {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5ab17fd4700a..8357ff69962f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -10,7 +10,7 @@
#include <linux/nfs_page.h>
#include <linux/wait_bit.h>
-#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
+#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index bc673fb47fb3..49f848fd1f04 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -188,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs3_accessargs arg = {
.fh = NFS_FH(inode),
+ .access = entry->mask,
};
struct nfs3_accessres res;
struct rpc_message msg = {
@@ -196,25 +197,9 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_resp = &res,
.rpc_cred = entry->cred,
};
- int mode = entry->mask;
int status = -ENOMEM;
dprintk("NFS call access\n");
-
- if (mode & MAY_READ)
- arg.access |= NFS3_ACCESS_READ;
- if (S_ISDIR(inode->i_mode)) {
- if (mode & MAY_WRITE)
- arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
- if (mode & MAY_EXEC)
- arg.access |= NFS3_ACCESS_LOOKUP;
- } else {
- if (mode & MAY_WRITE)
- arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
- if (mode & MAY_EXEC)
- arg.access |= NFS3_ACCESS_EXECUTE;
- }
-
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
goto out;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index dcfcf7fd7438..b374f680830c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -145,7 +145,7 @@ struct nfs4_lock_state {
unsigned long ls_flags;
struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid;
- atomic_t ls_count;
+ refcount_t ls_count;
fl_owner_t ls_owner;
};
@@ -162,6 +162,7 @@ enum {
NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */
+ NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
};
struct nfs4_state {
@@ -185,6 +186,8 @@ struct nfs4_state {
unsigned int n_rdwr; /* Number of read/write references */
fmode_t state; /* State on the server (R,W, or RW) */
atomic_t count;
+
+ wait_queue_head_t waitq;
};
@@ -458,6 +461,10 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t,
const struct nfs_lock_context *, nfs4_stateid *,
struct rpc_cred **);
+extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst,
+ struct nfs4_state *state);
+extern bool nfs4_copy_open_stateid(nfs4_stateid *dst,
+ struct nfs4_state *state);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -465,7 +472,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_release_seqid(struct nfs_seqid *seqid);
extern void nfs_free_seqid(struct nfs_seqid *seqid);
-extern int nfs4_setup_sequence(const struct nfs_client *client,
+extern int nfs4_setup_sequence(struct nfs_client *client,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
struct rpc_task *task);
@@ -475,6 +482,7 @@ extern int nfs4_sequence_done(struct rpc_task *task,
extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
extern const nfs4_stateid zero_stateid;
+extern const nfs4_stateid invalid_stateid;
/* nfs4super.c */
struct nfs_mount_info;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index e9bea90dc017..12bbab0becb4 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -483,7 +483,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new,
* ID and serverowner fields. Wait for CREATE_SESSION
* to finish. */
if (pos->cl_cons_state > NFS_CS_READY) {
- atomic_inc(&pos->cl_count);
+ refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
nfs_put_client(*prev);
@@ -559,7 +559,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
* way that a SETCLIENTID_CONFIRM to pos can succeed is
* if new and pos point to the same server:
*/
- atomic_inc(&pos->cl_count);
+ refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
nfs_put_client(prev);
@@ -715,7 +715,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
continue;
found:
- atomic_inc(&pos->cl_count);
+ refcount_inc(&pos->cl_count);
*result = pos;
status = 0;
break;
@@ -749,7 +749,7 @@ nfs4_find_client_ident(struct net *net, int cb_ident)
spin_lock(&nn->nfs_client_lock);
clp = idr_find(&nn->cb_ident_idr, cb_ident);
if (clp)
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
return clp;
}
@@ -793,7 +793,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
- if (nfs4_cb_match_client(addr, clp, minorversion) == false)
+ if (!nfs4_cb_match_client(addr, clp, minorversion))
continue;
if (!nfs4_has_session(clp))
@@ -804,7 +804,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
continue;
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
return clp;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f90090e8c959..56fa5a16e097 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_open_context *ctx, struct nfs4_label *ilabel,
struct nfs4_label *olabel);
#ifdef CONFIG_NFS_V4_1
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
+ struct rpc_cred *cred,
+ struct nfs4_slot *slot,
+ bool is_privileged);
static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
struct rpc_cred *);
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
@@ -254,15 +258,12 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
};
const u32 nfs4_fs_locations_bitmap[3] = {
- FATTR4_WORD0_TYPE
- | FATTR4_WORD0_CHANGE
+ FATTR4_WORD0_CHANGE
| FATTR4_WORD0_SIZE
| FATTR4_WORD0_FSID
| FATTR4_WORD0_FILEID
| FATTR4_WORD0_FS_LOCATIONS,
- FATTR4_WORD1_MODE
- | FATTR4_WORD1_NUMLINKS
- | FATTR4_WORD1_OWNER
+ FATTR4_WORD1_OWNER
| FATTR4_WORD1_OWNER_GROUP
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
@@ -644,13 +645,14 @@ static int nfs40_sequence_done(struct rpc_task *task,
#if defined(CONFIG_NFS_V4_1)
-static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+static void nfs41_release_slot(struct nfs4_slot *slot)
{
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
- struct nfs4_slot *slot = res->sr_slot;
bool send_new_highest_used_slotid = false;
+ if (!slot)
+ return;
tbl = slot->table;
session = tbl->session;
@@ -676,13 +678,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
send_new_highest_used_slotid = false;
out_unlock:
spin_unlock(&tbl->slot_tbl_lock);
- res->sr_slot = NULL;
if (send_new_highest_used_slotid)
nfs41_notify_server(session->clp);
if (waitqueue_active(&tbl->slot_waitq))
wake_up_all(&tbl->slot_waitq);
}
+static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+ nfs41_release_slot(res->sr_slot);
+ res->sr_slot = NULL;
+}
+
static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -710,13 +717,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
/* Check the SEQUENCE operation status */
switch (res->sr_status) {
case 0:
- /* If previous op on slot was interrupted and we reused
- * the seq# and got a reply from the cache, then retry
- */
- if (task->tk_status == -EREMOTEIO && interrupted) {
- ++slot->seq_nr;
- goto retry_nowait;
- }
/* Update the slot's sequence and clientid lease timer */
slot->seq_done = 1;
clp = session->clp;
@@ -750,16 +750,16 @@ static int nfs41_sequence_process(struct rpc_task *task,
* The slot id we used was probably retired. Try again
* using a different slot id.
*/
+ if (slot->seq_nr < slot->table->target_highest_slotid)
+ goto session_recover;
goto retry_nowait;
case -NFS4ERR_SEQ_MISORDERED:
/*
* Was the last operation on this sequence interrupted?
* If so, retry after bumping the sequence number.
*/
- if (interrupted) {
- ++slot->seq_nr;
- goto retry_nowait;
- }
+ if (interrupted)
+ goto retry_new_seq;
/*
* Could this slot have been previously retired?
* If so, then the server may be expecting seq_nr = 1!
@@ -768,10 +768,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
slot->seq_nr = 1;
goto retry_nowait;
}
- break;
+ goto session_recover;
case -NFS4ERR_SEQ_FALSE_RETRY:
- ++slot->seq_nr;
- goto retry_nowait;
+ if (interrupted)
+ goto retry_new_seq;
+ goto session_recover;
default:
/* Just update the slot sequence no. */
slot->seq_done = 1;
@@ -781,6 +782,11 @@ out:
dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
out_noaction:
return ret;
+session_recover:
+ nfs4_schedule_session_recovery(session, res->sr_status);
+ goto retry_nowait;
+retry_new_seq:
+ ++slot->seq_nr;
retry_nowait:
if (rpc_restart_call_prepare(task)) {
nfs41_sequence_free_slot(res);
@@ -857,6 +863,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
.rpc_call_done = nfs41_call_sync_done,
};
+static void
+nfs4_sequence_process_interrupted(struct nfs_client *client,
+ struct nfs4_slot *slot, struct rpc_cred *cred)
+{
+ struct rpc_task *task;
+
+ task = _nfs41_proc_sequence(client, cred, slot, true);
+ if (!IS_ERR(task))
+ rpc_put_task_async(task);
+}
+
#else /* !CONFIG_NFS_V4_1 */
static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@@ -877,9 +894,34 @@ int nfs4_sequence_done(struct rpc_task *task,
}
EXPORT_SYMBOL_GPL(nfs4_sequence_done);
+static void
+nfs4_sequence_process_interrupted(struct nfs_client *client,
+ struct nfs4_slot *slot, struct rpc_cred *cred)
+{
+ WARN_ON_ONCE(1);
+ slot->interrupted = 0;
+}
+
#endif /* !CONFIG_NFS_V4_1 */
-int nfs4_setup_sequence(const struct nfs_client *client,
+static
+void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ struct nfs4_slot *slot)
+{
+ if (!slot)
+ return;
+ slot->privileged = args->sa_privileged ? 1 : 0;
+ args->sa_slot = slot;
+
+ res->sr_slot = slot;
+ res->sr_timestamp = jiffies;
+ res->sr_status_flags = 0;
+ res->sr_status = 1;
+
+}
+
+int nfs4_setup_sequence(struct nfs_client *client,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
struct rpc_task *task)
@@ -897,29 +939,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
task->tk_timeout = 0;
}
- spin_lock(&tbl->slot_tbl_lock);
- /* The state manager will wait until the slot table is empty */
- if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
- goto out_sleep;
+ for (;;) {
+ spin_lock(&tbl->slot_tbl_lock);
+ /* The state manager will wait until the slot table is empty */
+ if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+ goto out_sleep;
+
+ slot = nfs4_alloc_slot(tbl);
+ if (IS_ERR(slot)) {
+ /* Try again in 1/4 second */
+ if (slot == ERR_PTR(-ENOMEM))
+ task->tk_timeout = HZ >> 2;
+ goto out_sleep;
+ }
+ spin_unlock(&tbl->slot_tbl_lock);
- slot = nfs4_alloc_slot(tbl);
- if (IS_ERR(slot)) {
- /* Try again in 1/4 second */
- if (slot == ERR_PTR(-ENOMEM))
- task->tk_timeout = HZ >> 2;
- goto out_sleep;
+ if (likely(!slot->interrupted))
+ break;
+ nfs4_sequence_process_interrupted(client,
+ slot, task->tk_msg.rpc_cred);
}
- spin_unlock(&tbl->slot_tbl_lock);
-
- slot->privileged = args->sa_privileged ? 1 : 0;
- args->sa_slot = slot;
- res->sr_slot = slot;
- if (session) {
- res->sr_timestamp = jiffies;
- res->sr_status_flags = 0;
- res->sr_status = 1;
- }
+ nfs4_sequence_attach_slot(args, res, slot);
trace_nfs4_setup_sequence(session, args);
out_start:
@@ -1044,6 +1085,12 @@ struct nfs4_opendata {
int rpc_status;
};
+struct nfs4_open_createattrs {
+ struct nfs4_label *label;
+ struct iattr *sattr;
+ const __u32 verf[2];
+};
+
static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
int err, struct nfs4_exception *exception)
{
@@ -1113,8 +1160,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
struct nfs4_state_owner *sp, fmode_t fmode, int flags,
- const struct iattr *attrs,
- struct nfs4_label *label,
+ const struct nfs4_open_createattrs *c,
enum open_claim_type4 claim,
gfp_t gfp_mask)
{
@@ -1122,6 +1168,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
struct inode *dir = d_inode(parent);
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
+ struct nfs4_label *label = (c != NULL) ? c->label : NULL;
struct nfs4_opendata *p;
p = kzalloc(sizeof(*p), gfp_mask);
@@ -1187,15 +1234,11 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
p->o_arg.fh = NFS_FH(d_inode(dentry));
}
- if (attrs != NULL && attrs->ia_valid != 0) {
- __u32 verf[2];
-
+ if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) {
p->o_arg.u.attrs = &p->attrs;
- memcpy(&p->attrs, attrs, sizeof(p->attrs));
+ memcpy(&p->attrs, c->sattr, sizeof(p->attrs));
- verf[0] = jiffies;
- verf[1] = current->pid;
- memcpy(p->o_arg.u.verifier.data, verf,
+ memcpy(p->o_arg.u.verifier.data, c->verf,
sizeof(p->o_arg.u.verifier.data));
}
p->c_arg.fh = &p->o_res.fh;
@@ -1334,6 +1377,25 @@ static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state)
}
#endif /* CONFIG_NFS_V4_1 */
+static void nfs_state_log_update_open_stateid(struct nfs4_state *state)
+{
+ if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
+ wake_up_all(&state->waitq);
+}
+
+static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state,
+ const nfs4_stateid *stateid)
+{
+ u32 state_seqid = be32_to_cpu(state->open_stateid.seqid);
+ u32 stateid_seqid = be32_to_cpu(stateid->seqid);
+
+ if (stateid_seqid == state_seqid + 1U ||
+ (stateid_seqid == 1U && state_seqid == 0xffffffffU))
+ nfs_state_log_update_open_stateid(state);
+ else
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
+}
+
static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
{
struct nfs_client *clp = state->owner->so_server->nfs_client;
@@ -1349,18 +1411,32 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
nfs4_state_mark_reclaim_nograce(clp, state);
}
+/*
+ * Check for whether or not the caller may update the open stateid
+ * to the value passed in by stateid.
+ *
+ * Note: This function relies heavily on the server implementing
+ * RFC7530 Section 9.1.4.2, and RFC5661 Section 8.2.2
+ * correctly.
+ * i.e. The stateid seqids have to be initialised to 1, and
+ * are then incremented on every state transition.
+ */
static bool nfs_need_update_open_stateid(struct nfs4_state *state,
- const nfs4_stateid *stateid, nfs4_stateid *freeme)
+ const nfs4_stateid *stateid)
{
- if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
- return true;
- if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
- nfs4_stateid_copy(freeme, &state->open_stateid);
- nfs_test_and_clear_all_open_stateid(state);
+ if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 ||
+ !nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ if (stateid->seqid == cpu_to_be32(1))
+ nfs_state_log_update_open_stateid(state);
+ else
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
return true;
}
- if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
+
+ if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
+ nfs_state_log_out_of_order_open_stateid(state, stateid);
return true;
+ }
return false;
}
@@ -1399,11 +1475,14 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
!nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
nfs_resync_open_stateid_locked(state);
- return;
+ goto out;
}
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
nfs4_stateid_copy(&state->stateid, stateid);
nfs4_stateid_copy(&state->open_stateid, stateid);
+ trace_nfs4_open_stateid_update(state->inode, stateid, 0);
+out:
+ nfs_state_log_update_open_stateid(state);
}
static void nfs_clear_open_stateid(struct nfs4_state *state,
@@ -1420,29 +1499,60 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
}
static void nfs_set_open_stateid_locked(struct nfs4_state *state,
- const nfs4_stateid *stateid, fmode_t fmode,
- nfs4_stateid *freeme)
+ const nfs4_stateid *stateid, nfs4_stateid *freeme)
{
- switch (fmode) {
- case FMODE_READ:
- set_bit(NFS_O_RDONLY_STATE, &state->flags);
+ DEFINE_WAIT(wait);
+ int status = 0;
+ for (;;) {
+
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
+ if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
break;
- case FMODE_WRITE:
- set_bit(NFS_O_WRONLY_STATE, &state->flags);
+ if (status)
break;
- case FMODE_READ|FMODE_WRITE:
- set_bit(NFS_O_RDWR_STATE, &state->flags);
+ /* Rely on seqids for serialisation with NFSv4.0 */
+ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client))
+ break;
+
+ prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
+ /*
+ * Ensure we process the state changes in the same order
+ * in which the server processed them by delaying the
+ * update of the stateid until we are in sequence.
+ */
+ write_sequnlock(&state->seqlock);
+ spin_unlock(&state->owner->so_lock);
+ rcu_read_unlock();
+ trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
+ if (!signal_pending(current)) {
+ if (schedule_timeout(5*HZ) == 0)
+ status = -EAGAIN;
+ else
+ status = 0;
+ } else
+ status = -EINTR;
+ finish_wait(&state->waitq, &wait);
+ rcu_read_lock();
+ spin_lock(&state->owner->so_lock);
+ write_seqlock(&state->seqlock);
}
- if (!nfs_need_update_open_stateid(state, stateid, freeme))
- return;
+
+ if (test_bit(NFS_OPEN_STATE, &state->flags) &&
+ !nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ nfs4_stateid_copy(freeme, &state->open_stateid);
+ nfs_test_and_clear_all_open_stateid(state);
+ }
+
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
nfs4_stateid_copy(&state->stateid, stateid);
nfs4_stateid_copy(&state->open_stateid, stateid);
+ trace_nfs4_open_stateid_update(state->inode, stateid, status);
+ nfs_state_log_update_open_stateid(state);
}
-static void __update_open_stateid(struct nfs4_state *state,
+static void nfs_state_set_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
- const nfs4_stateid *deleg_stateid,
fmode_t fmode,
nfs4_stateid *freeme)
{
@@ -1450,17 +1560,34 @@ static void __update_open_stateid(struct nfs4_state *state,
* Protect the call to nfs4_state_set_mode_locked and
* serialise the stateid update
*/
- spin_lock(&state->owner->so_lock);
write_seqlock(&state->seqlock);
- if (deleg_stateid != NULL) {
- nfs4_stateid_copy(&state->stateid, deleg_stateid);
- set_bit(NFS_DELEGATED_STATE, &state->flags);
+ nfs_set_open_stateid_locked(state, open_stateid, freeme);
+ switch (fmode) {
+ case FMODE_READ:
+ set_bit(NFS_O_RDONLY_STATE, &state->flags);
+ break;
+ case FMODE_WRITE:
+ set_bit(NFS_O_WRONLY_STATE, &state->flags);
+ break;
+ case FMODE_READ|FMODE_WRITE:
+ set_bit(NFS_O_RDWR_STATE, &state->flags);
}
- if (open_stateid != NULL)
- nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme);
+ set_bit(NFS_OPEN_STATE, &state->flags);
+ write_sequnlock(&state->seqlock);
+}
+
+static void nfs_state_set_delegation(struct nfs4_state *state,
+ const nfs4_stateid *deleg_stateid,
+ fmode_t fmode)
+{
+ /*
+ * Protect the call to nfs4_state_set_mode_locked and
+ * serialise the stateid update
+ */
+ write_seqlock(&state->seqlock);
+ nfs4_stateid_copy(&state->stateid, deleg_stateid);
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
write_sequnlock(&state->seqlock);
- update_open_stateflags(state, fmode);
- spin_unlock(&state->owner->so_lock);
}
static int update_open_stateid(struct nfs4_state *state,
@@ -1478,6 +1605,12 @@ static int update_open_stateid(struct nfs4_state *state,
fmode &= (FMODE_READ|FMODE_WRITE);
rcu_read_lock();
+ spin_lock(&state->owner->so_lock);
+ if (open_stateid != NULL) {
+ nfs_state_set_open_stateid(state, open_stateid, fmode, &freeme);
+ ret = 1;
+ }
+
deleg_cur = rcu_dereference(nfsi->delegation);
if (deleg_cur == NULL)
goto no_delegation;
@@ -1494,18 +1627,16 @@ static int update_open_stateid(struct nfs4_state *state,
goto no_delegation_unlock;
nfs_mark_delegation_referenced(deleg_cur);
- __update_open_stateid(state, open_stateid, &deleg_cur->stateid,
- fmode, &freeme);
+ nfs_state_set_delegation(state, &deleg_cur->stateid, fmode);
ret = 1;
no_delegation_unlock:
spin_unlock(&deleg_cur->lock);
no_delegation:
+ if (ret)
+ update_open_stateflags(state, fmode);
+ spin_unlock(&state->owner->so_lock);
rcu_read_unlock();
- if (!ret && open_stateid != NULL) {
- __update_open_stateid(state, open_stateid, NULL, fmode, &freeme);
- ret = 1;
- }
if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
nfs4_schedule_state_manager(clp);
if (freeme.type != 0)
@@ -1761,7 +1892,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
struct nfs4_opendata *opendata;
opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0,
- NULL, NULL, claim, GFP_NOFS);
+ NULL, claim, GFP_NOFS);
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
@@ -2518,7 +2649,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
spin_unlock(&state->state_lock);
nfs4_put_lock_state(prev);
@@ -2692,8 +2823,7 @@ out:
static int _nfs4_do_open(struct inode *dir,
struct nfs_open_context *ctx,
int flags,
- struct iattr *sattr,
- struct nfs4_label *label,
+ const struct nfs4_open_createattrs *c,
int *opened)
{
struct nfs4_state_owner *sp;
@@ -2705,6 +2835,8 @@ static int _nfs4_do_open(struct inode *dir,
struct nfs4_threshold **ctx_th = &ctx->mdsthreshold;
fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC);
enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
+ struct iattr *sattr = c->sattr;
+ struct nfs4_label *label = c->label;
struct nfs4_label *olabel = NULL;
int status;
@@ -2723,8 +2855,8 @@ static int _nfs4_do_open(struct inode *dir,
status = -ENOMEM;
if (d_really_is_positive(dentry))
claim = NFS4_OPEN_CLAIM_FH;
- opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
- label, claim, GFP_KERNEL);
+ opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags,
+ c, claim, GFP_KERNEL);
if (opendata == NULL)
goto err_put_state_owner;
@@ -2805,10 +2937,18 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = { };
struct nfs4_state *res;
+ struct nfs4_open_createattrs c = {
+ .label = label,
+ .sattr = sattr,
+ .verf = {
+ [0] = (__u32)jiffies,
+ [1] = (__u32)current->pid,
+ },
+ };
int status;
do {
- status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened);
+ status = _nfs4_do_open(dir, ctx, flags, &c, opened);
res = ctx->state;
trace_nfs4_open_file(ctx, flags, status);
if (status == 0)
@@ -3024,18 +3164,20 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
calldata->arg.lr_args = NULL;
calldata->res.lr_res = NULL;
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid,
+ calldata->inode))
+ goto lr_restart;
+ /* Fallthrough */
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
case -NFS4ERR_WRONG_CRED:
calldata->arg.lr_args = NULL;
calldata->res.lr_res = NULL;
- calldata->res.lr_ret = 0;
- rpc_restart_call_prepare(task);
- return;
+ goto lr_restart;
}
}
@@ -3051,39 +3193,43 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.bitmask != NULL) {
calldata->arg.bitmask = NULL;
calldata->res.fattr = NULL;
- task->tk_status = 0;
- rpc_restart_call_prepare(task);
- goto out_release;
+ goto out_restart;
}
break;
+ case -NFS4ERR_OLD_STATEID:
+ /* Did we race with OPEN? */
+ if (nfs4_refresh_open_stateid(&calldata->arg.stateid,
+ state))
+ goto out_restart;
+ goto out_release;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_free_revoked_stateid(server,
&calldata->arg.stateid,
task->tk_msg.rpc_cred);
- case -NFS4ERR_OLD_STATEID:
+ /* Fallthrough */
case -NFS4ERR_BAD_STATEID:
- if (!nfs4_stateid_match(&calldata->arg.stateid,
- &state->open_stateid)) {
- rpc_restart_call_prepare(task);
- goto out_release;
- }
- if (calldata->arg.fmode == 0)
- break;
+ break;
default:
- if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) {
- rpc_restart_call_prepare(task);
- goto out_release;
- }
+ if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ goto out_restart;
}
nfs_clear_open_stateid(state, &calldata->arg.stateid,
res_stateid, calldata->arg.fmode);
out_release:
+ task->tk_status = 0;
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, &calldata->fattr);
dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
+ return;
+lr_restart:
+ calldata->res.lr_ret = 0;
+out_restart:
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ goto out_release;
}
static void nfs4_close_prepare(struct rpc_task *task, void *data)
@@ -3103,7 +3249,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
- nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
/* Calculate the change in open mode */
calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
@@ -3121,7 +3266,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
if (!nfs4_valid_open_stateid(state) ||
- test_bit(NFS_OPEN_STATE, &state->flags) == 0)
+ !nfs4_refresh_open_stateid(&calldata->arg.stateid, state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -3215,6 +3360,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->inode = state->inode;
calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode);
+ if (!nfs4_copy_open_stateid(&calldata->arg.stateid, state))
+ goto out_free_calldata;
/* Serialization for the sequence id */
alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask);
@@ -3889,6 +4036,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
struct nfs4_accessargs args = {
.fh = NFS_FH(inode),
.bitmask = server->cache_consistency_bitmask,
+ .access = entry->mask,
};
struct nfs4_accessres res = {
.server = server,
@@ -3899,26 +4047,8 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
.rpc_resp = &res,
.rpc_cred = entry->cred,
};
- int mode = entry->mask;
int status = 0;
- /*
- * Determine which access bits we want to ask for...
- */
- if (mode & MAY_READ)
- args.access |= NFS4_ACCESS_READ;
- if (S_ISDIR(inode->i_mode)) {
- if (mode & MAY_WRITE)
- args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
- if (mode & MAY_EXEC)
- args.access |= NFS4_ACCESS_LOOKUP;
- } else {
- if (mode & MAY_WRITE)
- args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
- if (mode & MAY_EXEC)
- args.access |= NFS4_ACCESS_EXECUTE;
- }
-
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
return -ENOMEM;
@@ -4843,7 +4973,7 @@ static void nfs4_renew_release(void *calldata)
struct nfs4_renewdata *data = calldata;
struct nfs_client *clp = data->client;
- if (atomic_read(&clp->cl_count) > 1)
+ if (refcount_read(&clp->cl_count) > 1)
nfs4_schedule_state_renewal(clp);
nfs_put_client(clp);
kfree(data);
@@ -4891,7 +5021,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
if (renew_flags == 0)
return 0;
- if (!atomic_inc_not_zero(&clp->cl_count))
+ if (!refcount_inc_not_zero(&clp->cl_count))
return -EIO;
data = kmalloc(sizeof(*data), GFP_NOFS);
if (data == NULL) {
@@ -5643,18 +5773,20 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
data->args.lr_args = NULL;
data->res.lr_res = NULL;
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid,
+ data->inode))
+ goto lr_restart;
+ /* Fallthrough */
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
case -NFS4ERR_WRONG_CRED:
data->args.lr_args = NULL;
data->res.lr_res = NULL;
- data->res.lr_ret = 0;
- rpc_restart_call_prepare(task);
- return;
+ goto lr_restart;
}
}
@@ -5668,27 +5800,36 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
nfs4_free_revoked_stateid(data->res.server,
data->args.stateid,
task->tk_msg.rpc_cred);
+ /* Fallthrough */
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
task->tk_status = 0;
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
+ goto out_restart;
+ task->tk_status = 0;
+ break;
case -NFS4ERR_ACCESS:
if (data->args.bitmask) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
- task->tk_status = 0;
- rpc_restart_call_prepare(task);
- return;
+ goto out_restart;
}
+ /* Fallthrough */
default:
if (nfs4_async_handle_error(task, data->res.server,
NULL, NULL) == -EAGAIN) {
- rpc_restart_call_prepare(task);
- return;
+ goto out_restart;
}
}
data->rpc_status = task->tk_status;
+ return;
+lr_restart:
+ data->res.lr_ret = 0;
+out_restart:
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
}
static void nfs4_delegreturn_release(void *calldata)
@@ -5896,7 +6037,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->arg.seqid = seqid;
p->res.seqid = seqid;
p->lsp = lsp;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
p->l_ctx = nfs_get_lock_context(ctx);
@@ -6112,7 +6253,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->res.lock_seqid = p->arg.lock_seqid;
p->lsp = lsp;
p->server = server;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
@@ -6568,6 +6709,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
!test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
return -ENOLCK;
+ /*
+ * Don't rely on the VFS having checked the file open mode,
+ * since it won't do this for flock() locks.
+ */
+ switch (request->fl_type) {
+ case F_RDLCK:
+ if (!(filp->f_mode & FMODE_READ))
+ return -EBADF;
+ break;
+ case F_WRLCK:
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+ }
+
status = nfs4_set_lock_state(state, request);
if (status != 0)
return status;
@@ -6763,9 +6918,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
struct page *page)
{
struct nfs_server *server = NFS_SERVER(dir);
- u32 bitmask[3] = {
- [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
- };
+ u32 bitmask[3];
struct nfs4_fs_locations_arg args = {
.dir_fh = NFS_FH(dir),
.name = name,
@@ -6784,12 +6937,15 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
dprintk("%s: start\n", __func__);
+ bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS;
+ bitmask[1] = nfs4_fattr_bitmap[1];
+
/* Ask for the fileid of the absent filesystem if mounted_on_fileid
* is not supported */
if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
- bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+ bitmask[0] &= ~FATTR4_WORD0_FILEID;
else
- bitmask[0] |= FATTR4_WORD0_FILEID;
+ bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
nfs_fattr_init(&fs_locations->fattr);
fs_locations->server = server;
@@ -7472,7 +7628,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
struct nfs41_exchange_id_data *calldata;
int status;
- if (!atomic_inc_not_zero(&clp->cl_count))
+ if (!refcount_inc_not_zero(&clp->cl_count))
return ERR_PTR(-EIO);
status = -ENOMEM;
@@ -8072,7 +8228,7 @@ static void nfs41_sequence_release(void *data)
struct nfs4_sequence_data *calldata = data;
struct nfs_client *clp = calldata->clp;
- if (atomic_read(&clp->cl_count) > 1)
+ if (refcount_read(&clp->cl_count) > 1)
nfs4_schedule_state_renewal(clp);
nfs_put_client(clp);
kfree(calldata);
@@ -8101,7 +8257,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
trace_nfs4_sequence(clp, task->tk_status);
if (task->tk_status < 0) {
dprintk("%s ERROR %d\n", __func__, task->tk_status);
- if (atomic_read(&clp->cl_count) == 1)
+ if (refcount_read(&clp->cl_count) == 1)
goto out;
if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
@@ -8135,6 +8291,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
struct rpc_cred *cred,
+ struct nfs4_slot *slot,
bool is_privileged)
{
struct nfs4_sequence_data *calldata;
@@ -8148,15 +8305,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
.callback_ops = &nfs41_sequence_ops,
.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
};
+ struct rpc_task *ret;
- if (!atomic_inc_not_zero(&clp->cl_count))
- return ERR_PTR(-EIO);
+ ret = ERR_PTR(-EIO);
+ if (!refcount_inc_not_zero(&clp->cl_count))
+ goto out_err;
+
+ ret = ERR_PTR(-ENOMEM);
calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
- if (calldata == NULL) {
- nfs_put_client(clp);
- return ERR_PTR(-ENOMEM);
- }
+ if (calldata == NULL)
+ goto out_put_clp;
nfs4_init_sequence(&calldata->args, &calldata->res, 0);
+ nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
if (is_privileged)
nfs4_set_sequence_privileged(&calldata->args);
msg.rpc_argp = &calldata->args;
@@ -8164,7 +8324,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
calldata->clp = clp;
task_setup_data.callback_data = calldata;
- return rpc_run_task(&task_setup_data);
+ ret = rpc_run_task(&task_setup_data);
+ if (IS_ERR(ret))
+ goto out_err;
+ return ret;
+out_put_clp:
+ nfs_put_client(clp);
+out_err:
+ nfs41_release_slot(slot);
+ return ret;
}
static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
@@ -8174,7 +8342,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
return -EAGAIN;
- task = _nfs41_proc_sequence(clp, cred, false);
+ task = _nfs41_proc_sequence(clp, cred, NULL, false);
if (IS_ERR(task))
ret = PTR_ERR(task);
else
@@ -8188,7 +8356,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
struct rpc_task *task;
int ret;
- task = _nfs41_proc_sequence(clp, cred, true);
+ task = _nfs41_proc_sequence(clp, cred, NULL, true);
if (IS_ERR(task)) {
ret = PTR_ERR(task);
goto out;
@@ -8588,18 +8756,27 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
server = NFS_SERVER(lrp->args.inode);
switch (task->tk_status) {
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_layout_stateid(&lrp->args.stateid,
+ lrp->args.inode))
+ goto out_restart;
+ /* Fallthrough */
default:
task->tk_status = 0;
+ /* Fallthrough */
case 0:
break;
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
break;
- nfs4_sequence_free_slot(&lrp->res.seq_res);
- rpc_restart_call_prepare(task);
- return;
+ goto out_restart;
}
dprintk("<-- %s\n", __func__);
+ return;
+out_restart:
+ task->tk_status = 0;
+ nfs4_sequence_free_slot(&lrp->res.seq_res);
+ rpc_restart_call_prepare(task);
}
static void nfs4_layoutreturn_release(void *calldata)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0378e2257ca7..e4f4a09ed9f4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -69,6 +69,14 @@ const nfs4_stateid zero_stateid = {
{ .data = { 0 } },
.type = NFS4_SPECIAL_STATEID_TYPE,
};
+const nfs4_stateid invalid_stateid = {
+ {
+ /* Funky initialiser keeps older gcc versions happy */
+ .data = { 0xff, 0xff, 0xff, 0xff, 0 },
+ },
+ .type = NFS4_INVALID_STATEID_TYPE,
+};
+
static DEFINE_MUTEX(nfs_clid_init_mutex);
int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
@@ -645,6 +653,7 @@ nfs4_alloc_open_state(void)
INIT_LIST_HEAD(&state->lock_states);
spin_lock_init(&state->state_lock);
seqlock_init(&state->seqlock);
+ init_waitqueue_head(&state->waitq);
return state;
}
@@ -825,7 +834,7 @@ __nfs4_find_lock_state(struct nfs4_state *state,
ret = pos;
}
if (ret)
- atomic_inc(&ret->ls_count);
+ refcount_inc(&ret->ls_count);
return ret;
}
@@ -843,7 +852,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
if (lsp == NULL)
return NULL;
nfs4_init_seqid_counter(&lsp->ls_seqid);
- atomic_set(&lsp->ls_count, 1);
+ refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
lsp->ls_owner = fl_owner;
lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
@@ -907,7 +916,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
if (lsp == NULL)
return;
state = lsp->ls_state;
- if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
+ if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock))
return;
list_del(&lsp->ls_locks);
if (list_empty(&state->lock_states))
@@ -927,7 +936,7 @@ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
dst->fl_u.nfs4_fl.owner = lsp;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
}
static void nfs4_fl_release_lock(struct file_lock *fl)
@@ -985,18 +994,39 @@ out:
return ret;
}
-static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
+bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
+{
+ bool ret;
+ int seq;
+
+ do {
+ ret = false;
+ seq = read_seqbegin(&state->seqlock);
+ if (nfs4_state_match_open_stateid_other(state, dst)) {
+ dst->seqid = state->open_stateid.seqid;
+ ret = true;
+ }
+ } while (read_seqretry(&state->seqlock, seq));
+ return ret;
+}
+
+bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
{
+ bool ret;
const nfs4_stateid *src;
int seq;
do {
+ ret = false;
src = &zero_stateid;
seq = read_seqbegin(&state->seqlock);
- if (test_bit(NFS_OPEN_STATE, &state->flags))
+ if (test_bit(NFS_OPEN_STATE, &state->flags)) {
src = &state->open_stateid;
+ ret = true;
+ }
nfs4_stateid_copy(dst, src);
} while (read_seqretry(&state->seqlock, seq));
+ return ret;
}
/*
@@ -1177,7 +1207,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return;
__module_get(THIS_MODULE);
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
/* The rcu_read_lock() is not strictly necessary, as the state
* manager is the only thread that ever changes the rpc_xprt
@@ -1269,7 +1299,7 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep();
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
@@ -1409,6 +1439,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
found = true;
continue;
}
+ if (nfs4_stateid_match_other(&state->open_stateid, stateid) &&
+ nfs4_state_mark_reclaim_nograce(clp, state)) {
+ found = true;
+ continue;
+ }
if (nfs_state_lock_state_matches_stateid(state, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state))
found = true;
@@ -2510,7 +2545,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
break;
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
break;
- } while (atomic_read(&clp->cl_count) > 1);
+ } while (refcount_read(&clp->cl_count) > 1);
return;
out_error:
if (strlen(section))
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index e7c6275519b0..a275fba93170 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -202,17 +202,13 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_ARGS(clp, error),
TP_STRUCT__entry(
- __string(dstaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR))
+ __string(dstaddr, clp->cl_hostname)
__field(int, error)
),
TP_fast_assign(
__entry->error = error;
- __assign_str(dstaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR));
+ __assign_str(dstaddr, clp->cl_hostname);
),
TP_printk(
@@ -1066,6 +1062,8 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait);
DECLARE_EVENT_CLASS(nfs4_getattr_event,
TP_PROTO(
@@ -1133,9 +1131,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
- __string(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __string(dstaddr, clp ? clp->cl_hostname : "unknown")
),
TP_fast_assign(
@@ -1148,9 +1144,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown")
),
TP_printk(
@@ -1192,9 +1186,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
- __string(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __string(dstaddr, clp ? clp->cl_hostname : "unknown")
__field(int, stateid_seq)
__field(u32, stateid_hash)
),
@@ -1209,9 +1201,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown")
__entry->stateid_seq =
be32_to_cpu(stateid->seqid);
__entry->stateid_hash =
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 14ed9791ec9c..77c6729e57f0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4385,6 +4385,14 @@ static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *state
return decode_stateid(xdr, stateid);
}
+static int decode_invalid_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+{
+ nfs4_stateid dummy;
+
+ nfs4_stateid_copy(stateid, &invalid_stateid);
+ return decode_stateid(xdr, &dummy);
+}
+
static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
{
int status;
@@ -4393,7 +4401,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
if (!status)
- status = decode_open_stateid(xdr, &res->stateid);
+ status = decode_invalid_stateid(xdr, &res->stateid);
return status;
}
@@ -6108,6 +6116,8 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
res->lrs_present = be32_to_cpup(p);
if (res->lrs_present)
status = decode_layout_stateid(xdr, &res->stateid);
+ else
+ nfs4_stateid_copy(&res->stateid, &invalid_stateid);
return status;
out_overflow:
print_overflow_msg(__func__, xdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3bcd669a3152..d602fe9e1ac8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -251,7 +251,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
void
pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
{
- atomic_inc(&lo->plh_refcount);
+ refcount_inc(&lo->plh_refcount);
}
static struct pnfs_layout_hdr *
@@ -296,7 +296,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
pnfs_layoutreturn_before_put_layout_hdr(lo);
- if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
+ if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
pnfs_detach_layout_hdr(lo);
@@ -355,6 +355,24 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
}
/*
+ * Update the seqid of a layout stateid
+ */
+bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct pnfs_layout_hdr *lo;
+ bool ret = false;
+
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
+ dst->seqid = lo->plh_stateid.seqid;
+ ret = true;
+ }
+ spin_unlock(&inode->i_lock);
+ return ret;
+}
+
+/*
* Mark a pnfs_layout_hdr and all associated layout segments as invalid
*
* In order to continue using the pnfs_layout_hdr, a full recovery
@@ -395,14 +413,14 @@ pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
{
lo->plh_retry_timestamp = jiffies;
if (!test_and_set_bit(fail_bit, &lo->plh_flags))
- atomic_inc(&lo->plh_refcount);
+ refcount_inc(&lo->plh_refcount);
}
static void
pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
{
if (test_and_clear_bit(fail_bit, &lo->plh_flags))
- atomic_dec(&lo->plh_refcount);
+ refcount_dec(&lo->plh_refcount);
}
static void
@@ -450,7 +468,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
{
INIT_LIST_HEAD(&lseg->pls_list);
INIT_LIST_HEAD(&lseg->pls_lc_list);
- atomic_set(&lseg->pls_refcount, 1);
+ refcount_set(&lseg->pls_refcount, 1);
set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
lseg->pls_layout = lo;
lseg->pls_range = *range;
@@ -472,7 +490,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list);
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
- atomic_dec(&lo->plh_refcount);
+ refcount_dec(&lo->plh_refcount);
if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
return;
if (list_empty(&lo->plh_segs) &&
@@ -507,13 +525,13 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
return;
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
- atomic_read(&lseg->pls_refcount),
+ refcount_read(&lseg->pls_refcount),
test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
lo = lseg->pls_layout;
inode = lo->plh_inode;
- if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
+ if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
spin_unlock(&inode->i_lock);
return;
@@ -551,7 +569,7 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
{
- if (!atomic_dec_and_test(&lseg->pls_refcount))
+ if (!refcount_dec_and_test(&lseg->pls_refcount))
return false;
pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
list_add(&lseg->pls_list, tmp_list);
@@ -570,7 +588,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
* outstanding io is finished.
*/
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
- atomic_read(&lseg->pls_refcount));
+ refcount_read(&lseg->pls_refcount));
if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1;
}
@@ -1451,7 +1469,7 @@ alloc_init_layout_hdr(struct inode *ino,
lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
if (!lo)
return NULL;
- atomic_set(&lo->plh_refcount, 1);
+ refcount_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->plh_layouts);
INIT_LIST_HEAD(&lo->plh_segs);
INIT_LIST_HEAD(&lo->plh_return_segs);
@@ -1513,7 +1531,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
if ((range->iomode == IOMODE_RW &&
ls_range->iomode != IOMODE_RW) ||
(range->iomode != ls_range->iomode &&
- strict_iomode == true) ||
+ strict_iomode) ||
!pnfs_lseg_range_intersecting(ls_range, range))
return 0;
@@ -1546,7 +1564,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
}
dprintk("%s:Return lseg %p ref %d\n",
- __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
+ __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
return ret;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 87f144f14d1e..8d507c361d98 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,7 @@
#ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H
+#include <linux/refcount.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/workqueue.h>
@@ -54,7 +55,7 @@ struct nfs4_pnfs_ds {
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
struct nfs_client *ds_clp;
- atomic_t ds_count;
+ refcount_t ds_count;
unsigned long ds_state;
#define NFS4DS_CONNECTING 0 /* ds is establishing connection */
};
@@ -63,7 +64,7 @@ struct pnfs_layout_segment {
struct list_head pls_list;
struct list_head pls_lc_list;
struct pnfs_layout_range pls_range;
- atomic_t pls_refcount;
+ refcount_t pls_refcount;
u32 pls_seq;
unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout;
@@ -179,7 +180,7 @@ struct pnfs_layoutdriver_type {
};
struct pnfs_layout_hdr {
- atomic_t plh_refcount;
+ refcount_t plh_refcount;
atomic_t plh_outstanding; /* number of RPCs out */
struct list_head plh_layouts; /* other client layouts */
struct list_head plh_bulk_destroy;
@@ -251,6 +252,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
bool is_recall);
int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
bool is_recall);
+bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
@@ -393,7 +395,7 @@ static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
if (lseg) {
- atomic_inc(&lseg->pls_refcount);
+ refcount_inc(&lseg->pls_refcount);
smp_mb__after_atomic();
}
return lseg;
@@ -764,6 +766,11 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
{
}
+static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst,
+ struct inode *inode)
+{
+ return false;
+}
#endif /* CONFIG_NFS_V4_1 */
#if IS_ENABLED(CONFIG_NFS_V4_2)
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 60da59be83b6..03aaa60c7768 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -338,7 +338,7 @@ print_ds(struct nfs4_pnfs_ds *ds)
" client %p\n"
" cl_exchange_flags %x\n",
ds->ds_remotestr,
- atomic_read(&ds->ds_count), ds->ds_clp,
+ refcount_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
@@ -451,7 +451,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
{
- if (atomic_dec_and_lock(&ds->ds_count,
+ if (refcount_dec_and_lock(&ds->ds_count,
&nfs4_ds_cache_lock)) {
list_del_init(&ds->ds_node);
spin_unlock(&nfs4_ds_cache_lock);
@@ -537,7 +537,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
- atomic_set(&ds->ds_count, 1);
+ refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
@@ -546,10 +546,10 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
} else {
kfree(remotestr);
kfree(ds);
- atomic_inc(&tmp_ds->ds_count);
+ refcount_inc(&tmp_ds->ds_count);
dprintk("%s data server %s found, inc'ed ds_count to %d\n",
__func__, tmp_ds->ds_remotestr,
- atomic_read(&tmp_ds->ds_count));
+ refcount_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
spin_unlock(&nfs4_ds_cache_lock);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c9d24bae3025..29bacdc56f6a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -813,9 +813,9 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
*/
seq_printf(m, "\n\topts:\t");
seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw");
- seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
- seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : "");
- seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+ seq_puts(m, root->d_sb->s_flags & SB_SYNCHRONOUS ? ",sync" : "");
+ seq_puts(m, root->d_sb->s_flags & SB_NOATIME ? ",noatime" : "");
+ seq_puts(m, root->d_sb->s_flags & SB_NODIRATIME ? ",nodiratime" : "");
nfs_show_mount_options(m, nfss, 1);
seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
@@ -1332,7 +1332,7 @@ static int nfs_parse_mount_options(char *raw,
mnt->options |= NFS_OPTION_MIGRATION;
break;
case Opt_nomigration:
- mnt->options &= NFS_OPTION_MIGRATION;
+ mnt->options &= ~NFS_OPTION_MIGRATION;
break;
/*
@@ -1456,18 +1456,21 @@ static int nfs_parse_mount_options(char *raw,
switch (token) {
case Opt_xprt_udp6:
protofamily = AF_INET6;
+ /* fall through */
case Opt_xprt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_xprt_tcp6:
protofamily = AF_INET6;
+ /* fall through */
case Opt_xprt_tcp:
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
break;
case Opt_xprt_rdma6:
protofamily = AF_INET6;
+ /* fall through */
case Opt_xprt_rdma:
/* vector side protocols to TCP */
mnt->flags |= NFS_MOUNT_TCP;
@@ -1494,11 +1497,13 @@ static int nfs_parse_mount_options(char *raw,
switch (token) {
case Opt_xprt_udp6:
mountfamily = AF_INET6;
+ /* fall through */
case Opt_xprt_udp:
mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_xprt_tcp6:
mountfamily = AF_INET6;
+ /* fall through */
case Opt_xprt_tcp:
mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
break;
@@ -1988,9 +1993,9 @@ static int nfs23_validate_mount_data(void *options,
args->version = NFS_DEFAULT_VERSION;
switch (data->version) {
case 1:
- data->namlen = 0;
+ data->namlen = 0; /* fall through */
case 2:
- data->bsize = 0;
+ data->bsize = 0; /* fall through */
case 3:
if (data->flags & NFS_MOUNT_VER3)
goto out_no_v3;
@@ -1998,11 +2003,14 @@ static int nfs23_validate_mount_data(void *options,
memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
/* Turn off security negotiation */
extra_flags |= NFS_MOUNT_SECFLAVOUR;
+ /* fall through */
case 4:
if (data->flags & NFS_MOUNT_SECFLAVOUR)
goto out_no_sec;
+ /* fall through */
case 5:
memset(data->context, 0, sizeof(data->context));
+ /* fall through */
case 6:
if (data->flags & NFS_MOUNT_VER3) {
if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
@@ -2288,11 +2296,11 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
/*
* noac is a special case. It implies -o sync, but that's not
* necessarily reflected in the mtab options. do_remount_sb
- * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the
+ * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the
* remount options, so we have to explicitly reset it.
*/
if (data->flags & NFS_MOUNT_NOAC)
- *flags |= MS_SYNCHRONOUS;
+ *flags |= SB_SYNCHRONOUS;
/* compare new mount options with old ones */
error = nfs_compare_remount_data(nfss, data);
@@ -2341,7 +2349,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
sb->s_time_gran = 1;
sb->s_export_op = &nfs_export_ops;
}
@@ -2371,7 +2379,7 @@ static void nfs_clone_super(struct super_block *sb,
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
}
nfs_initialise_sb(sb);
@@ -2592,11 +2600,11 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
/* -o noac implies -o sync */
if (server->flags & NFS_MOUNT_NOAC)
- sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+ sb_mntdata.mntflags |= SB_SYNCHRONOUS;
if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
- if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS)
- sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+ if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS)
+ sb_mntdata.mntflags |= SB_SYNCHRONOUS;
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
@@ -2633,7 +2641,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
if (error)
goto error_splat_root;
- s->s_flags |= MS_ACTIVE;
+ s->s_flags |= SB_ACTIVE;
out:
return mntroot;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index babebbccae2a..5b5f464f6f2a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -487,10 +487,8 @@ try_again:
}
ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
- }
+ if (ret < 0)
+ goto release_request;
/* lock each request in the page group */
total_bytes = head->wb_bytes;
@@ -515,8 +513,7 @@ try_again:
if (ret < 0) {
nfs_unroll_locks(inode, head, subreq);
nfs_release_request(subreq);
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ goto release_request;
}
}
/*
@@ -532,8 +529,8 @@ try_again:
nfs_page_group_unlock(head);
nfs_unroll_locks(inode, head, subreq);
nfs_unlock_and_release_request(subreq);
- nfs_unlock_and_release_request(head);
- return ERR_PTR(-EIO);
+ ret = -EIO;
+ goto release_request;
}
}
@@ -576,6 +573,10 @@ try_again:
/* still holds ref on head from nfs_page_find_head_request
* and still has lock on head from lock loop */
return head;
+
+release_request:
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
}
static void nfs_write_error_remove_page(struct nfs_page *req)
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 420d3a0ab258..5be08f02a76b 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm)
struct list_head *grace_list = net_generic(net, grace_net_id);
spin_lock(&grace_lock);
- list_add(&lm->list, grace_list);
+ if (list_empty(&lm->list))
+ list_add(&lm->list, grace_list);
+ else
+ WARN(1, "double list_add attempt detected in net %x %s\n",
+ net->ns.inum, (net == &init_net) ? "(init_net)" : "");
spin_unlock(&grace_lock);
}
EXPORT_SYMBOL_GPL(locks_start_grace);
@@ -55,14 +59,7 @@ locks_end_grace(struct lock_manager *lm)
}
EXPORT_SYMBOL_GPL(locks_end_grace);
-/**
- * locks_in_grace
- *
- * Lock managers call this function to determine when it is OK for them
- * to answer ordinary lock requests, and when they should accept only
- * lock reclaims.
- */
-int
+static bool
__state_in_grace(struct net *net, bool open)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
@@ -78,15 +75,22 @@ __state_in_grace(struct net *net, bool open)
return false;
}
-int locks_in_grace(struct net *net)
+/**
+ * locks_in_grace
+ *
+ * Lock managers call this function to determine when it is OK for them
+ * to answer ordinary lock requests, and when they should accept only
+ * lock reclaims.
+ */
+bool locks_in_grace(struct net *net)
{
- return __state_in_grace(net, 0);
+ return __state_in_grace(net, false);
}
EXPORT_SYMBOL_GPL(locks_in_grace);
-int opens_in_grace(struct net *net)
+bool opens_in_grace(struct net *net)
{
- return __state_in_grace(net, 1);
+ return __state_in_grace(net, true);
}
EXPORT_SYMBOL_GPL(opens_in_grace);
@@ -104,7 +108,9 @@ grace_exit_net(struct net *net)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
- BUG_ON(!list_empty(grace_list));
+ WARN_ONCE(!list_empty(grace_list),
+ "net %x %s: grace_list is not empty\n",
+ net->ns.inum, __func__);
}
static struct pernet_operations grace_net_ops = {
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 46b48dbbdd32..8ceb25a10ea0 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -232,7 +232,7 @@ static struct cache_head *expkey_alloc(void)
return NULL;
}
-static struct cache_detail svc_expkey_cache_template = {
+static const struct cache_detail svc_expkey_cache_template = {
.owner = THIS_MODULE,
.hash_size = EXPKEY_HASHMAX,
.name = "nfsd.fh",
@@ -748,7 +748,7 @@ static struct cache_head *svc_export_alloc(void)
return NULL;
}
-static struct cache_detail svc_export_cache_template = {
+static const struct cache_detail svc_export_cache_template = {
.owner = THIS_MODULE,
.hash_size = EXPORT_HASHMAX,
.name = "nfsd.export",
@@ -1230,7 +1230,7 @@ nfsd_export_init(struct net *net)
int rv;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- dprintk("nfsd: initializing export module (net: %p).\n", net);
+ dprintk("nfsd: initializing export module (net: %x).\n", net->ns.inum);
nn->svc_export_cache = cache_create_net(&svc_export_cache_template, net);
if (IS_ERR(nn->svc_export_cache))
@@ -1278,7 +1278,7 @@ nfsd_export_shutdown(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- dprintk("nfsd: shutting down export module (net: %p).\n", net);
+ dprintk("nfsd: shutting down export module (net: %x).\n", net->ns.inum);
cache_unregister_net(nn->svc_expkey_cache, net);
cache_unregister_net(nn->svc_export_cache, net);
@@ -1286,5 +1286,5 @@ nfsd_export_shutdown(struct net *net)
cache_destroy_net(nn->svc_export_cache, net);
svcauth_unix_purge(net);
- dprintk("nfsd: export shutdown complete (net: %p).\n", net);
+ dprintk("nfsd: export shutdown complete (net: %x).\n", net->ns.inum);
}
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 6dfede6d172a..84831253203d 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -12,6 +12,7 @@
#include <linux/nsproxy.h>
#include <linux/sunrpc/addr.h>
#include <linux/uaccess.h>
+#include <linux/kernel.h>
#include "state.h"
#include "netns.h"
@@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = {
},
};
-#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
-
int nfsd_fault_inject_init(void)
{
unsigned int i;
@@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void)
if (!debug_dir)
goto fail;
- for (i = 0; i < NUM_INJECT_OPS; i++) {
+ for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
op = &inject_ops[i];
if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
goto fail;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3714231a9d0f..36358d435cb0 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -107,7 +107,7 @@ struct nfsd_net {
bool lockd_up;
/* Time of server startup */
- struct timeval nfssvc_boot;
+ struct timespec64 nfssvc_boot;
/*
* Max number of connections this nfsd container will allow. Defaults
@@ -119,6 +119,9 @@ struct nfsd_net {
u32 clverifier_counter;
struct svc_serv *nfsd_serv;
+
+ wait_queue_head_t ntf_wq;
+ atomic_t ntf_refcnt;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f38acd905441..2758480555fa 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
if (resp->status == 0) {
*p++ = htonl(resp->count);
*p++ = htonl(resp->committed);
- *p++ = htonl(nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_usec);
+ /* unique identifier, y2038 overflow can be ignored */
+ *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
+ *p++ = htonl(nn->nfssvc_boot.tv_nsec);
}
return xdr_ressize_check(rqstp, p);
}
@@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */
if (resp->status == 0) {
- *p++ = htonl(nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_usec);
+ /* unique identifier, y2038 overflow can be ignored */
+ *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
+ *p++ = htonl(nn->nfssvc_boot.tv_nsec);
}
return xdr_ressize_check(rqstp, p);
}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 6b9b6cca469f..a5bb76593ce7 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -178,7 +178,7 @@ static struct ent *idtoname_lookup(struct cache_detail *, struct ent *);
static struct ent *idtoname_update(struct cache_detail *, struct ent *,
struct ent *);
-static struct cache_detail idtoname_cache_template = {
+static const struct cache_detail idtoname_cache_template = {
.owner = THIS_MODULE,
.hash_size = ENT_HASHMAX,
.name = "nfs4.idtoname",
@@ -341,7 +341,7 @@ static struct ent *nametoid_update(struct cache_detail *, struct ent *,
struct ent *);
static int nametoid_parse(struct cache_detail *, char *, int);
-static struct cache_detail nametoid_cache_template = {
+static const struct cache_detail nametoid_cache_template = {
.owner = THIS_MODULE,
.hash_size = ENT_HASHMAX,
.name = "nfs4.nametoid",
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ea45d954e8d7..7d888369f85a 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
trace_layout_recall(&ls->ls_stid.sc_stateid);
- atomic_inc(&ls->ls_stid.sc_count);
+ refcount_inc(&ls->ls_stid.sc_count);
nfsd4_run_cb(&ls->ls_recall);
out_unlock:
@@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
goto done;
}
- atomic_inc(&ls->ls_stid.sc_count);
+ refcount_inc(&ls->ls_stid.sc_count);
list_add_tail(&new->lo_perstate, &ls->ls_layouts);
new = NULL;
done:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8487486ec496..008ea0b627d0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -485,9 +485,6 @@ static __be32
nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
- if (!cstate->current_fh.fh_dentry)
- return nfserr_nofilehandle;
-
u->getfh = &cstate->current_fh;
return nfs_ok;
}
@@ -535,9 +532,6 @@ static __be32
nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
- if (!cstate->current_fh.fh_dentry)
- return nfserr_nofilehandle;
-
fh_dup2(&cstate->save_fh, &cstate->current_fh);
if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
@@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
/*
* This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy
+ * __force to keep sparse happy. y2038 time_t overflow is
+ * irrelevant in this usage.
*/
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec;
+ verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
memcpy(verifier->data, verf, sizeof(verifier->data));
}
@@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_link *link = &u->link;
- __be32 status = nfserr_nofilehandle;
+ __be32 status;
- if (!cstate->save_fh.fh_dentry)
- return status;
status = nfsd_link(rqstp, &cstate->current_fh,
link->li_name, link->li_namelen, &cstate->save_fh);
if (!status)
@@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_rename *rename = &u->rename;
- __be32 status = nfserr_nofilehandle;
+ __be32 status;
- if (!cstate->save_fh.fh_dentry)
- return status;
if (opens_in_grace(SVC_NET(rqstp)) &&
!(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
return nfserr_grace;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0c04f81aa63b..b29b5a185a2c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -63,12 +63,16 @@ static const stateid_t zero_stateid = {
static const stateid_t currentstateid = {
.si_generation = 1,
};
+static const stateid_t close_stateid = {
+ .si_generation = 0xffffffffU,
+};
static u64 current_sessionid = 1;
#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
+#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t)))
/* forward declarations */
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
@@ -83,6 +87,11 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
*/
static DEFINE_SPINLOCK(state_lock);
+enum nfsd4_st_mutex_lock_subclass {
+ OPEN_STATEID_MUTEX = 0,
+ LOCK_STATEID_MUTEX = 1,
+};
+
/*
* A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for
* the refcount on the open stateid to drop.
@@ -359,7 +368,7 @@ put_nfs4_file(struct nfs4_file *fi)
{
might_lock(&state_lock);
- if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
+ if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del_rcu(&fi->fi_hash);
spin_unlock(&state_lock);
WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
@@ -568,7 +577,7 @@ alloc_clnt_odstate(struct nfs4_client *clp)
co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
if (co) {
co->co_client = clp;
- atomic_set(&co->co_odcount, 1);
+ refcount_set(&co->co_odcount, 1);
}
return co;
}
@@ -586,7 +595,7 @@ static inline void
get_clnt_odstate(struct nfs4_clnt_odstate *co)
{
if (co)
- atomic_inc(&co->co_odcount);
+ refcount_inc(&co->co_odcount);
}
static void
@@ -598,7 +607,7 @@ put_clnt_odstate(struct nfs4_clnt_odstate *co)
return;
fp = co->co_file;
- if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
+ if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
list_del(&co->co_perfile);
spin_unlock(&fp->fi_lock);
@@ -656,7 +665,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
stid->sc_stateid.si_opaque.so_id = new_id;
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
/* Will be incremented before return to client: */
- atomic_set(&stid->sc_count, 1);
+ refcount_set(&stid->sc_count, 1);
spin_lock_init(&stid->sc_lock);
/*
@@ -813,7 +822,7 @@ nfs4_put_stid(struct nfs4_stid *s)
might_lock(&clp->cl_lock);
- if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
+ if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
wake_up_all(&close_wq);
return;
}
@@ -913,7 +922,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
if (status)
return status;
++fp->fi_delegees;
- atomic_inc(&dp->dl_stid.sc_count);
+ refcount_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &clp->cl_delegations);
@@ -1214,7 +1223,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
WARN_ON_ONCE(!list_empty(&stp->st_locks));
- if (!atomic_dec_and_test(&s->sc_count)) {
+ if (!refcount_dec_and_test(&s->sc_count)) {
wake_up_all(&close_wq);
return;
}
@@ -1439,8 +1448,10 @@ free_session_slots(struct nfsd4_session *ses)
{
int i;
- for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+ free_svc_cred(&ses->se_slots[i]->sl_cred);
kfree(ses->se_slots[i]);
+ }
}
/*
@@ -1472,6 +1483,11 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
spin_lock(&nfsd_drc_lock);
avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
nfsd_drc_max_mem - nfsd_drc_mem_used);
+ /*
+ * Never use more than a third of the remaining memory,
+ * unless it's the only way to give this client a slot:
+ */
+ avail = clamp_t(int, avail, slotsize, avail/3);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
@@ -2072,7 +2088,7 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
s = find_stateid_locked(cl, t);
if (s != NULL) {
if (typemask & s->sc_type)
- atomic_inc(&s->sc_count);
+ refcount_inc(&s->sc_count);
else
s = NULL;
}
@@ -2287,14 +2303,18 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
dprintk("--> %s slot %p\n", __func__, slot);
+ slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
slot->sl_opcnt = resp->opcnt;
slot->sl_status = resp->cstate.status;
+ free_svc_cred(&slot->sl_cred);
+ copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
- slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
- if (nfsd4_not_cached(resp)) {
- slot->sl_datalen = 0;
+ if (!nfsd4_cache_this(resp)) {
+ slot->sl_flags &= ~NFSD4_SLOT_CACHED;
return;
}
+ slot->sl_flags |= NFSD4_SLOT_CACHED;
+
base = resp->cstate.data_offset;
slot->sl_datalen = buf->len - base;
if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
@@ -2321,8 +2341,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
op = &args->ops[resp->opcnt - 1];
nfsd4_encode_operation(resp, op);
- /* Return nfserr_retry_uncached_rep in next operation. */
- if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
+ if (slot->sl_flags & NFSD4_SLOT_CACHED)
+ return op->status;
+ if (args->opcnt == 1) {
+ /*
+ * The original operation wasn't a solo sequence--we
+ * always cache those--so this retry must not match the
+ * original:
+ */
+ op->status = nfserr_seq_false_retry;
+ } else {
op = &args->ops[resp->opcnt++];
op->status = nfserr_retry_uncached_rep;
nfsd4_encode_operation(resp, op);
@@ -2986,6 +3014,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
return xb->len > session->se_fchannel.maxreq_sz;
}
+static bool replay_matches_cache(struct svc_rqst *rqstp,
+ struct nfsd4_sequence *seq, struct nfsd4_slot *slot)
+{
+ struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+ if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) !=
+ (bool)seq->cachethis)
+ return false;
+ /*
+ * If there's an error than the reply can have fewer ops than
+ * the call. But if we cached a reply with *more* ops than the
+ * call you're sending us now, then this new call is clearly not
+ * really a replay of the old one:
+ */
+ if (slot->sl_opcnt < argp->opcnt)
+ return false;
+ /* This is the only check explicitly called by spec: */
+ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
+ return false;
+ /*
+ * There may be more comparisons we could actually do, but the
+ * spec doesn't require us to catch every case where the calls
+ * don't match (that would require caching the call as well as
+ * the reply), so we don't bother.
+ */
+ return true;
+}
+
__be32
nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -3045,6 +3101,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_seq_misordered;
if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
goto out_put_session;
+ status = nfserr_seq_false_retry;
+ if (!replay_matches_cache(rqstp, seq, slot))
+ goto out_put_session;
cstate->slot = slot;
cstate->session = session;
cstate->clp = clp;
@@ -3351,7 +3410,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
{
lockdep_assert_held(&state_lock);
- atomic_set(&fp->fi_ref, 1);
+ refcount_set(&fp->fi_ref, 1);
spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
@@ -3512,15 +3571,63 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
continue;
- if (local->st_stateowner == &oo->oo_owner) {
+ if (local->st_stateowner != &oo->oo_owner)
+ continue;
+ if (local->st_stid.sc_type == NFS4_OPEN_STID) {
ret = local;
- atomic_inc(&ret->st_stid.sc_count);
+ refcount_inc(&ret->st_stid.sc_count);
break;
}
}
return ret;
}
+static __be32
+nfsd4_verify_open_stid(struct nfs4_stid *s)
+{
+ __be32 ret = nfs_ok;
+
+ switch (s->sc_type) {
+ default:
+ break;
+ case NFS4_CLOSED_STID:
+ case NFS4_CLOSED_DELEG_STID:
+ ret = nfserr_bad_stateid;
+ break;
+ case NFS4_REVOKED_DELEG_STID:
+ ret = nfserr_deleg_revoked;
+ }
+ return ret;
+}
+
+/* Lock the stateid st_mutex, and deal with races with CLOSE */
+static __be32
+nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
+{
+ __be32 ret;
+
+ mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
+ ret = nfsd4_verify_open_stid(&stp->st_stid);
+ if (ret != nfs_ok)
+ mutex_unlock(&stp->st_mutex);
+ return ret;
+}
+
+static struct nfs4_ol_stateid *
+nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
+{
+ struct nfs4_ol_stateid *stp;
+ for (;;) {
+ spin_lock(&fp->fi_lock);
+ stp = nfsd4_find_existing_open(fp, open);
+ spin_unlock(&fp->fi_lock);
+ if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok)
+ break;
+ nfs4_put_stid(&stp->st_stid);
+ }
+ return stp;
+}
+
static struct nfs4_openowner *
alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
struct nfsd4_compound_state *cstate)
@@ -3563,8 +3670,9 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
stp = open->op_stp;
/* We are moving these outside of the spinlocks to avoid the warnings */
mutex_init(&stp->st_mutex);
- mutex_lock(&stp->st_mutex);
+ mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX);
+retry:
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
@@ -3573,7 +3681,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
goto out_unlock;
open->op_stp = NULL;
- atomic_inc(&stp->st_stid.sc_count);
+ refcount_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
@@ -3589,7 +3697,11 @@ out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
if (retstp) {
- mutex_lock(&retstp->st_mutex);
+ /* Handle races with CLOSE */
+ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
+ nfs4_put_stid(&retstp->st_stid);
+ goto retry;
+ }
/* To keep mutex tracking happy */
mutex_unlock(&stp->st_mutex);
stp = retstp;
@@ -3621,7 +3733,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* there should be no danger of the refcount going back up again at
* this point.
*/
- wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);
+ wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
if (s->st_stid.sc_file) {
@@ -3647,7 +3759,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
if (fh_match(&fp->fi_fhandle, fh)) {
- if (atomic_inc_not_zero(&fp->fi_ref))
+ if (refcount_inc_not_zero(&fp->fi_ref))
return fp;
}
}
@@ -3783,7 +3895,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* lock) we know the server hasn't removed the lease yet, we know
* it's safe to take a reference.
*/
- atomic_inc(&dp->dl_stid.sc_count);
+ refcount_inc(&dp->dl_stid.sc_count);
nfsd4_run_cb(&dp->dl_recall);
}
@@ -3966,7 +4078,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
{
struct nfs4_stid *ret;
- ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
+ ret = find_stateid_by_type(cl, s,
+ NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
if (!ret)
return NULL;
return delegstateid(ret);
@@ -3989,6 +4102,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
if (deleg == NULL)
goto out;
+ if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+ nfs4_put_stid(&deleg->dl_stid);
+ if (cl->cl_minorversion)
+ status = nfserr_deleg_revoked;
+ goto out;
+ }
flags = share_access_to_flags(open->op_share_access);
status = nfs4_check_delegmode(deleg, flags);
if (status) {
@@ -4392,6 +4511,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
struct nfs4_ol_stateid *stp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
+ bool new_stp = false;
/*
* Lookup file; if found, lookup stateid and check open request,
@@ -4403,9 +4523,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
- spin_lock(&fp->fi_lock);
- stp = nfsd4_find_existing_open(fp, open);
- spin_unlock(&fp->fi_lock);
+ stp = nfsd4_find_and_lock_existing_open(fp, open);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid;
@@ -4413,35 +4531,31 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
goto out;
}
+ if (!stp) {
+ stp = init_open_stateid(fp, open);
+ if (!open->op_stp)
+ new_stp = true;
+ }
+
/*
* OPEN the file, or upgrade an existing OPEN.
* If truncate fails, the OPEN fails.
+ *
+ * stp is already locked.
*/
- if (stp) {
+ if (!new_stp) {
/* Stateid was found, this is an OPEN upgrade */
- mutex_lock(&stp->st_mutex);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
mutex_unlock(&stp->st_mutex);
goto out;
}
} else {
- /* stp is returned locked. */
- stp = init_open_stateid(fp, open);
- /* See if we lost the race to some other thread */
- if (stp->st_access_bmap != 0) {
- status = nfs4_upgrade_open(rqstp, fp, current_fh,
- stp, open);
- if (status) {
- mutex_unlock(&stp->st_mutex);
- goto out;
- }
- goto upgrade_out;
- }
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
- mutex_unlock(&stp->st_mutex);
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
@@ -4450,7 +4564,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (stp->st_clnt_odstate == open->op_odstate)
open->op_odstate = NULL;
}
-upgrade_out:
+
nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
mutex_unlock(&stp->st_mutex);
@@ -4677,7 +4791,7 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_unlock(&nn->blocked_locks_lock);
while (!list_empty(&reaplist)) {
- nbl = list_first_entry(&nn->blocked_locks_lru,
+ nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
posix_unblock_lock(&nbl->nbl_lock);
@@ -4798,6 +4912,18 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
return nfserr_old_stateid;
}
+static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_stid *s, bool has_session)
+{
+ __be32 ret;
+
+ spin_lock(&s->sc_lock);
+ ret = nfsd4_verify_open_stid(s);
+ if (ret == nfs_ok)
+ ret = check_stateid_generation(in, &s->sc_stateid, has_session);
+ spin_unlock(&s->sc_lock);
+ return ret;
+}
+
static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
{
if (ols->st_stateowner->so_is_open_owner &&
@@ -4811,7 +4937,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
struct nfs4_stid *s;
__be32 status = nfserr_bad_stateid;
- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
+ CLOSE_STATEID(stateid))
return status;
/* Client debugging aid. */
if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
@@ -4826,7 +4953,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
s = find_stateid_locked(cl, stateid);
if (!s)
goto out_unlock;
- status = check_stateid_generation(stateid, &s->sc_stateid, 1);
+ status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
if (status)
goto out_unlock;
switch (s->sc_type) {
@@ -4858,8 +4985,19 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
+ bool return_revoked = false;
- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ /*
+ * only return revoked delegations if explicitly asked.
+ * otherwise we report revoked or bad_stateid status.
+ */
+ if (typemask & NFS4_REVOKED_DELEG_STID)
+ return_revoked = true;
+ else if (typemask & NFS4_DELEG_STID)
+ typemask |= NFS4_REVOKED_DELEG_STID;
+
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
+ CLOSE_STATEID(stateid))
return nfserr_bad_stateid;
status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
if (status == nfserr_stale_clientid) {
@@ -4872,6 +5010,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
*s = find_stateid_by_type(cstate->clp, stateid, typemask);
if (!*s)
return nfserr_bad_stateid;
+ if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+ nfs4_put_stid(*s);
+ if (cstate->minorversion)
+ return nfserr_deleg_revoked;
+ return nfserr_bad_stateid;
+ }
return nfs_ok;
}
@@ -4971,7 +5115,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
&s, nn);
if (status)
return status;
- status = check_stateid_generation(stateid, &s->sc_stateid,
+ status = nfsd4_stid_check_stateid_generation(stateid, s,
nfsd4_has_session(cstate));
if (status)
goto out;
@@ -5025,7 +5169,9 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
struct nfs4_ol_stateid *stp = openlockstateid(s);
__be32 ret;
- mutex_lock(&stp->st_mutex);
+ ret = nfsd4_lock_ol_stateid(stp);
+ if (ret)
+ goto out_put_stid;
ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
if (ret)
@@ -5036,11 +5182,13 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
lockowner(stp->st_stateowner)))
goto out;
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_lock_stateid(stp);
ret = nfs_ok;
out:
mutex_unlock(&stp->st_mutex);
+out_put_stid:
nfs4_put_stid(s);
return ret;
}
@@ -5060,6 +5208,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
s = find_stateid_locked(cl, stateid);
if (!s)
goto out_unlock;
+ spin_lock(&s->sc_lock);
switch (s->sc_type) {
case NFS4_DELEG_STID:
ret = nfserr_locks_held;
@@ -5071,11 +5220,13 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
ret = nfserr_locks_held;
break;
case NFS4_LOCK_STID:
- atomic_inc(&s->sc_count);
+ spin_unlock(&s->sc_lock);
+ refcount_inc(&s->sc_count);
spin_unlock(&cl->cl_lock);
ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
case NFS4_REVOKED_DELEG_STID:
+ spin_unlock(&s->sc_lock);
dp = delegstateid(s);
list_del_init(&dp->dl_recall_lru);
spin_unlock(&cl->cl_lock);
@@ -5084,6 +5235,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
/* Default falls through and returns nfserr_bad_stateid */
}
+ spin_unlock(&s->sc_lock);
out_unlock:
spin_unlock(&cl->cl_lock);
out:
@@ -5106,15 +5258,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
status = nfsd4_check_seqid(cstate, sop, seqid);
if (status)
return status;
- if (stp->st_stid.sc_type == NFS4_CLOSED_STID
- || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID)
- /*
- * "Closed" stateid's exist *only* to return
- * nfserr_replay_me from the previous step, and
- * revoked delegations are kept only for free_stateid.
- */
- return nfserr_bad_stateid;
- mutex_lock(&stp->st_mutex);
+ status = nfsd4_lock_ol_stateid(stp);
+ if (status != nfs_ok)
+ return status;
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
if (status == nfs_ok)
status = nfs4_check_fh(current_fh, &stp->st_stid);
@@ -5294,7 +5440,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
bool unhashed;
LIST_HEAD(reaplist);
- s->st_stid.sc_type = NFS4_CLOSED_STID;
spin_lock(&clp->cl_lock);
unhashed = unhash_open_stateid(s, &reaplist);
@@ -5334,10 +5479,17 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
+
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- mutex_unlock(&stp->st_mutex);
nfsd4_close_open_stateid(stp);
+ mutex_unlock(&stp->st_mutex);
+
+ /* See RFC5661 sectionm 18.2.4 */
+ if (stp->st_stid.sc_client->cl_minorversion)
+ memcpy(&close->cl_stateid, &close_stateid,
+ sizeof(close->cl_stateid));
/* put reference from nfs4_preprocess_seqid_op */
nfs4_put_stid(&stp->st_stid);
@@ -5363,7 +5515,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
dp = delegstateid(s);
- status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
+ status = nfsd4_stid_check_stateid_generation(stateid, &dp->dl_stid, nfsd4_has_session(cstate));
if (status)
goto put_stateid;
@@ -5569,16 +5721,43 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
return ret;
}
-static void
+static struct nfs4_ol_stateid *
+find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
+{
+ struct nfs4_ol_stateid *lst;
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+
+ lockdep_assert_held(&clp->cl_lock);
+
+ list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (lst->st_stid.sc_type != NFS4_LOCK_STID)
+ continue;
+ if (lst->st_stid.sc_file == fp) {
+ refcount_inc(&lst->st_stid.sc_count);
+ return lst;
+ }
+ }
+ return NULL;
+}
+
+static struct nfs4_ol_stateid *
init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
struct nfs4_file *fp, struct inode *inode,
struct nfs4_ol_stateid *open_stp)
{
struct nfs4_client *clp = lo->lo_owner.so_client;
+ struct nfs4_ol_stateid *retstp;
- lockdep_assert_held(&clp->cl_lock);
+ mutex_init(&stp->st_mutex);
+ mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX);
+retry:
+ spin_lock(&clp->cl_lock);
+ spin_lock(&fp->fi_lock);
+ retstp = find_lock_stateid(lo, fp);
+ if (retstp)
+ goto out_unlock;
- atomic_inc(&stp->st_stid.sc_count);
+ refcount_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_LOCK_STID;
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
get_nfs4_file(fp);
@@ -5586,29 +5765,22 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
- mutex_init(&stp->st_mutex);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
- spin_lock(&fp->fi_lock);
list_add(&stp->st_perfile, &fp->fi_stateids);
+out_unlock:
spin_unlock(&fp->fi_lock);
-}
-
-static struct nfs4_ol_stateid *
-find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
-{
- struct nfs4_ol_stateid *lst;
- struct nfs4_client *clp = lo->lo_owner.so_client;
-
- lockdep_assert_held(&clp->cl_lock);
-
- list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
- if (lst->st_stid.sc_file == fp) {
- atomic_inc(&lst->st_stid.sc_count);
- return lst;
+ spin_unlock(&clp->cl_lock);
+ if (retstp) {
+ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
+ nfs4_put_stid(&retstp->st_stid);
+ goto retry;
}
+ /* To keep mutex tracking happy */
+ mutex_unlock(&stp->st_mutex);
+ stp = retstp;
}
- return NULL;
+ return stp;
}
static struct nfs4_ol_stateid *
@@ -5621,26 +5793,25 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
struct nfs4_openowner *oo = openowner(ost->st_stateowner);
struct nfs4_client *clp = oo->oo_owner.so_client;
+ *new = false;
spin_lock(&clp->cl_lock);
lst = find_lock_stateid(lo, fi);
- if (lst == NULL) {
- spin_unlock(&clp->cl_lock);
- ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid);
- if (ns == NULL)
- return NULL;
-
- spin_lock(&clp->cl_lock);
- lst = find_lock_stateid(lo, fi);
- if (likely(!lst)) {
- lst = openlockstateid(ns);
- init_lock_stateid(lst, lo, fi, inode, ost);
- ns = NULL;
- *new = true;
- }
- }
spin_unlock(&clp->cl_lock);
- if (ns)
+ if (lst != NULL) {
+ if (nfsd4_lock_ol_stateid(lst) == nfs_ok)
+ goto out;
+ nfs4_put_stid(&lst->st_stid);
+ }
+ ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid);
+ if (ns == NULL)
+ return NULL;
+
+ lst = init_lock_stateid(openlockstateid(ns), lo, fi, inode, ost);
+ if (lst == openlockstateid(ns))
+ *new = true;
+ else
nfs4_put_stid(ns);
+out:
return lst;
}
@@ -5677,7 +5848,6 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
struct nfs4_lockowner *lo;
struct nfs4_ol_stateid *lst;
unsigned int strhashval;
- bool hashed;
lo = find_lockowner_str(cl, &lock->lk_new_owner);
if (!lo) {
@@ -5693,25 +5863,12 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
goto out;
}
-retry:
lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
if (lst == NULL) {
status = nfserr_jukebox;
goto out;
}
- mutex_lock(&lst->st_mutex);
-
- /* See if it's still hashed to avoid race with FREE_STATEID */
- spin_lock(&cl->cl_lock);
- hashed = !list_empty(&lst->st_perfile);
- spin_unlock(&cl->cl_lock);
-
- if (!hashed) {
- mutex_unlock(&lst->st_mutex);
- nfs4_put_stid(&lst->st_stid);
- goto retry;
- }
status = nfs_ok;
*plst = lst;
out:
@@ -5917,14 +6074,16 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
- mutex_unlock(&lock_stp->st_mutex);
-
/*
* If this is a new, never-before-used stateid, and we are
* returning an error, then just go ahead and release it.
*/
- if (status && new)
+ if (status && new) {
+ lock_stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_lock_stateid(lock_stp);
+ }
+
+ mutex_unlock(&lock_stp->st_mutex);
nfs4_put_stid(&lock_stp->st_stid);
}
@@ -6944,6 +7103,10 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
nn->conf_name_tree = RB_ROOT;
nn->unconf_name_tree = RB_ROOT;
+ nn->boot_time = get_seconds();
+ nn->grace_ended = false;
+ nn->nfsd4_manager.block_opens = true;
+ INIT_LIST_HEAD(&nn->nfsd4_manager.list);
INIT_LIST_HEAD(&nn->client_lru);
INIT_LIST_HEAD(&nn->close_lru);
INIT_LIST_HEAD(&nn->del_recall_lru);
@@ -7001,13 +7164,10 @@ nfs4_state_start_net(struct net *net)
ret = nfs4_state_create_net(net);
if (ret)
return ret;
- nn->boot_time = get_seconds();
- nn->grace_ended = false;
- nn->nfsd4_manager.block_opens = true;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
- printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
- nn->nfsd4_grace, net);
+ printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
+ nn->nfsd4_grace, net->ns.inum);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
}
@@ -7080,7 +7240,7 @@ nfs4_state_shutdown_net(struct net *net)
spin_unlock(&nn->blocked_locks_lock);
while (!list_empty(&reaplist)) {
- nbl = list_first_entry(&nn->blocked_locks_lru,
+ nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
posix_unblock_lock(&nbl->nbl_lock);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6493df6b1bd5..d107b4426f7e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1241,6 +1241,9 @@ static __net_init int nfsd_init_net(struct net *net)
nn->nfsd4_grace = 90;
nn->clverifier_counter = prandom_u32();
nn->clientid_counter = prandom_u32();
+
+ atomic_set(&nn->ntf_refcnt, 0);
+ init_waitqueue_head(&nn->ntf_wq);
return 0;
out_idmap_error:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e02bd2783124..89cb484f1cfb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -335,7 +335,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in sin;
- if (event != NETDEV_DOWN)
+ if ((event != NETDEV_DOWN) ||
+ !atomic_inc_not_zero(&nn->ntf_refcnt))
goto out;
if (nn->nfsd_serv) {
@@ -344,6 +345,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
sin.sin_addr.s_addr = ifa->ifa_local;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
}
+ atomic_dec(&nn->ntf_refcnt);
+ wake_up(&nn->ntf_wq);
out:
return NOTIFY_DONE;
@@ -363,7 +366,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in6 sin6;
- if (event != NETDEV_DOWN)
+ if ((event != NETDEV_DOWN) ||
+ !atomic_inc_not_zero(&nn->ntf_refcnt))
goto out;
if (nn->nfsd_serv) {
@@ -374,7 +378,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
-
+ atomic_dec(&nn->ntf_refcnt);
+ wake_up(&nn->ntf_wq);
out:
return NOTIFY_DONE;
}
@@ -391,6 +396,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ atomic_dec(&nn->ntf_refcnt);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -398,6 +404,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
+ wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);
/*
* write_ports can create the server without actually starting
@@ -447,7 +454,7 @@ void nfsd_reset_versions(void)
*/
static void set_max_drc(void)
{
- #define NFSD_DRC_SIZE_SHIFT 10
+ #define NFSD_DRC_SIZE_SHIFT 7
nfsd_drc_max_mem = (nr_free_buffer_pages()
>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
nfsd_drc_mem_used = 0;
@@ -517,7 +524,8 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- do_gettimeofday(&nn->nfssvc_boot); /* record boot time */
+ atomic_inc(&nn->ntf_refcnt);
+ ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
return 0;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 005c911b34ac..f3772ea8ba0d 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -36,6 +36,7 @@
#define _NFSD4_STATE_H
#include <linux/idr.h>
+#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
@@ -83,7 +84,7 @@ struct nfsd4_callback_ops {
* fields that are of general use to any stateid.
*/
struct nfs4_stid {
- atomic_t sc_count;
+ refcount_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4
@@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
struct nfsd4_slot {
u32 sl_seqid;
__be32 sl_status;
+ struct svc_cred sl_cred;
u32 sl_datalen;
u16 sl_opcnt;
#define NFSD4_SLOT_INUSE (1 << 0)
#define NFSD4_SLOT_CACHETHIS (1 << 1)
#define NFSD4_SLOT_INITIALIZED (1 << 2)
+#define NFSD4_SLOT_CACHED (1 << 3)
u8 sl_flags;
char sl_data[];
};
@@ -465,7 +468,7 @@ struct nfs4_clnt_odstate {
struct nfs4_client *co_client;
struct nfs4_file *co_file;
struct list_head co_perfile;
- atomic_t co_odcount;
+ refcount_t co_odcount;
};
/*
@@ -481,7 +484,7 @@ struct nfs4_clnt_odstate {
* the global state_lock spinlock.
*/
struct nfs4_file {
- atomic_t fi_ref;
+ refcount_t fi_ref;
spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash on fi_fhandle */
struct list_head fi_stateids;
@@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh);
void put_nfs4_file(struct nfs4_file *fi);
static inline void get_nfs4_file(struct nfs4_file *fi)
{
- atomic_inc(&fi->fi_ref);
+ refcount_inc(&fi->fi_ref);
}
struct file *find_any_file(struct nfs4_file *f);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 1e4edbf70052..bc29511b6405 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
}
-static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
+/*
+ * The session reply cache only needs to cache replies that the client
+ * actually asked us to. But it's almost free for us to cache compounds
+ * consisting of only a SEQUENCE op, so we may as well cache those too.
+ * Also, the protocol doesn't give us a convenient response in the case
+ * of a replay of a solo SEQUENCE op that wasn't cached
+ * (RETRY_UNCACHED_REP can only be returned in the second op of a
+ * compound).
+ */
+static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
{
- return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+ return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
|| nfsd4_is_solo_sequence(resp);
}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 515d13c196da..1a2894aa0194 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -150,7 +150,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
if (err)
return err;
- inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+ inode = nilfs_new_inode(dir, S_IFLNK | 0777);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index f65392fecb5c..9f3ffba41533 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1954,8 +1954,6 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
err, ii->vfs_inode.i_ino);
return err;
}
- mark_buffer_dirty(ibh);
- nilfs_mdt_mark_dirty(ifile);
spin_lock(&nilfs->ns_inode_lock);
if (likely(!ii->i_bh))
ii->i_bh = ibh;
@@ -1964,6 +1962,10 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
goto retry;
}
+ // Always redirty the buffer to avoid race condition
+ mark_buffer_dirty(ii->i_bh);
+ nilfs_mdt_mark_dirty(ifile);
+
clear_bit(NILFS_I_QUEUED, &ii->i_state);
set_bit(NILFS_I_BUSY, &ii->i_state);
list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
@@ -1977,7 +1979,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
struct nilfs_inode_info *ii, *n;
- int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
+ int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE);
int defer_iput = false;
spin_lock(&nilfs->ns_inode_lock);
@@ -2400,11 +2402,11 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
return err;
}
-static void nilfs_construction_timeout(unsigned long data)
+static void nilfs_construction_timeout(struct timer_list *t)
{
- struct task_struct *p = (struct task_struct *)data;
+ struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer);
- wake_up_process(p);
+ wake_up_process(sci->sc_timer_task);
}
static void
@@ -2542,8 +2544,7 @@ static int nilfs_segctor_thread(void *arg)
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
int timeout = 0;
- sci->sc_timer.data = (unsigned long)current;
- sci->sc_timer.function = nilfs_construction_timeout;
+ sci->sc_timer_task = current;
/* start sync. */
sci->sc_task = current;
@@ -2674,7 +2675,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_iput_queue);
INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
- init_timer(&sci->sc_timer);
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 1060949d7dd2..84084a4d9b3e 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -180,6 +180,7 @@ struct nilfs_sc_info {
unsigned long sc_watermark;
struct timer_list sc_timer;
+ struct task_struct *sc_timer_task;
struct task_struct *sc_task;
};
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1541a1e9221a..1341a41e7b43 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -630,22 +630,22 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
}
/**
- * nilfs_sufile_truncate_range - truncate range of segment array
- * @sufile: inode of segment usage file
- * @start: start segment number (inclusive)
- * @end: end segment number (inclusive)
- *
- * Return Value: On success, 0 is returned. On error, one of the
- * following negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid number of segments specified
- *
- * %-EBUSY - Dirty or active segments are present in the range
- */
+ * nilfs_sufile_truncate_range - truncate range of segment array
+ * @sufile: inode of segment usage file
+ * @start: start segment number (inclusive)
+ * @end: end segment number (inclusive)
+ *
+ * Return Value: On success, 0 is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EINVAL - Invalid number of segments specified
+ *
+ * %-EBUSY - Dirty or active segments are present in the range
+ */
static int nilfs_sufile_truncate_range(struct inode *sufile,
__u64 start, __u64 end)
{
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 4fc018dfcfae..3073b646e1ba 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -141,7 +141,7 @@ void __nilfs_error(struct super_block *sb, const char *function,
if (nilfs_test_opt(nilfs, ERRORS_RO)) {
printk(KERN_CRIT "Remounting filesystem read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
}
@@ -160,7 +160,6 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
ii->i_bh = NULL;
ii->i_state = 0;
ii->i_cno = 0;
- ii->vfs_inode.i_version = 1;
nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode);
return &ii->vfs_inode;
}
@@ -870,7 +869,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,
/* FS independent flags */
#ifdef NILFS_ATIME_DISABLE
- sb->s_flags |= MS_NOATIME;
+ sb->s_flags |= SB_NOATIME;
#endif
nilfs_set_default_options(sb, sbp);
@@ -1134,7 +1133,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL);
err = -EINVAL;
@@ -1144,12 +1143,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
- if (*flags & MS_RDONLY) {
+ if (*flags & SB_RDONLY) {
/* Shutting down log writer */
nilfs_detach_log_writer(sb);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
/*
* Remounting a valid RW partition RDONLY, so set
@@ -1179,7 +1178,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
root = NILFS_I(d_inode(sb->s_root))->i_root;
err = nilfs_attach_log_writer(sb, root);
@@ -1213,7 +1212,7 @@ static int nilfs_parse_snapshot_option(const char *option,
const char *msg = NULL;
int err;
- if (!(sd->flags & MS_RDONLY)) {
+ if (!(sd->flags & SB_RDONLY)) {
msg = "read-only option is not specified";
goto parse_error;
}
@@ -1287,7 +1286,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
struct dentry *root_dentry;
int err, s_new = false;
- if (!(flags & MS_RDONLY))
+ if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
@@ -1328,14 +1327,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
sb_set_blocksize(s, block_size(sd.bdev));
- err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+ err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
if (err)
goto failed_super;
- s->s_flags |= MS_ACTIVE;
+ s->s_flags |= SB_ACTIVE;
} else if (!sd.cno) {
if (nilfs_tree_is_busy(s->s_root)) {
- if ((flags ^ s->s_flags) & MS_RDONLY) {
+ if ((flags ^ s->s_flags) & SB_RDONLY) {
nilfs_msg(s, KERN_ERR,
"the device already has a %s mount.",
sb_rdonly(s) ? "read-only" : "read/write");
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 2dd75bf619ad..1a85317e83f0 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -220,7 +220,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
if (!valid_fs) {
nilfs_msg(sb, KERN_WARNING, "mounting unchecked fs");
- if (s_flags & MS_RDONLY) {
+ if (s_flags & SB_RDONLY) {
nilfs_msg(sb, KERN_INFO,
"recovery required for readonly filesystem");
nilfs_msg(sb, KERN_INFO,
@@ -286,7 +286,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
if (valid_fs)
goto skip_recovery;
- if (s_flags & MS_RDONLY) {
+ if (s_flags & SB_RDONLY) {
__u64 features;
if (nilfs_test_opt(nilfs, NORECOVERY)) {
@@ -309,7 +309,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
err = -EROFS;
goto failed_unload;
}
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
} else if (nilfs_test_opt(nilfs, NORECOVERY)) {
nilfs_msg(sb, KERN_ERR,
"recovery cancelled because norecovery option was specified for a read/write mount");
@@ -737,7 +737,7 @@ struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno)
} else if (cno > root->cno) {
n = n->rb_right;
} else {
- atomic_inc(&root->count);
+ refcount_inc(&root->count);
spin_unlock(&nilfs->ns_cptree_lock);
return root;
}
@@ -776,7 +776,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
} else if (cno > root->cno) {
p = &(*p)->rb_right;
} else {
- atomic_inc(&root->count);
+ refcount_inc(&root->count);
spin_unlock(&nilfs->ns_cptree_lock);
kfree(new);
return root;
@@ -786,7 +786,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
new->cno = cno;
new->ifile = NULL;
new->nilfs = nilfs;
- atomic_set(&new->count, 1);
+ refcount_set(&new->count, 1);
atomic64_set(&new->inodes_count, 0);
atomic64_set(&new->blocks_count, 0);
@@ -806,7 +806,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
void nilfs_put_root(struct nilfs_root *root)
{
- if (atomic_dec_and_test(&root->count)) {
+ if (refcount_dec_and_test(&root->count)) {
struct the_nilfs *nilfs = root->nilfs;
nilfs_sysfs_delete_snapshot_group(root);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index b305c6f033e7..883d732b0259 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -27,6 +27,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/slab.h>
+#include <linux/refcount.h>
struct nilfs_sc_info;
struct nilfs_sysfs_dev_subgroups;
@@ -246,7 +247,7 @@ struct nilfs_root {
__u64 cno;
struct rb_node rb_node;
- atomic_t count;
+ refcount_t count;
struct the_nilfs *nilfs;
struct inode *ifile;
@@ -299,7 +300,7 @@ void nilfs_swap_super_block(struct the_nilfs *);
static inline void nilfs_get_root(struct nilfs_root *root)
{
- atomic_inc(&root->count);
+ refcount_inc(&root->count);
}
static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 81d8959b6aef..219b269c737e 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -67,7 +67,7 @@ void fsnotify_unmount_inodes(struct super_block *sb)
/*
* If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
+ * doing an __iget/iput with SB_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
diff --git a/fs/nsfs.c b/fs/nsfs.c
index ef243e14b6eb..7c6f76d29f56 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -255,5 +255,5 @@ void __init nsfs_init(void)
nsfs_mnt = kern_mount(&nsfs);
if (IS_ERR(nsfs_mnt))
panic("can't set nsfs up\n");
- nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
+ nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3f70f041dbe9..bb7159f697f2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -473,7 +473,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
#ifndef NTFS_RW
/* For read-only compiled driver, enforce read-only flag. */
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
#else /* NTFS_RW */
/*
* For the read-write compiled driver, if we are remounting read-write,
@@ -487,7 +487,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
* When remounting read-only, mark the volume clean if no volume errors
* have occurred.
*/
- if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) {
+ if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) {
static const char *es = ". Cannot remount read-write.";
/* Remounting read-write. */
@@ -548,7 +548,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
NVolSetErrors(vol);
return -EROFS;
}
- } else if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) {
+ } else if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) {
/* Remounting read-only. */
if (!NVolErrors(vol)) {
if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY))
@@ -1799,7 +1799,7 @@ static bool load_system_files(ntfs_volume *vol)
es3);
goto iput_mirr_err_out;
}
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s",
!vol->mftmirr_ino ? es1 : es2, es3);
} else
@@ -1937,7 +1937,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_vol_err_out;
}
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -1974,7 +1974,7 @@ get_ctx_vol_failed:
}
goto iput_logfile_err_out;
}
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2019,7 +2019,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_root_err_out;
}
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2042,7 +2042,7 @@ get_ctx_vol_failed:
goto iput_root_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
/*
* Do not set NVolErrors() because ntfs_remount() might manage
* to set the dirty flag in which case all would be well.
@@ -2055,7 +2055,7 @@ get_ctx_vol_failed:
* If (still) a read-write mount, set the NT4 compatibility flag on
* newer NTFS version volumes.
*/
- if (!(sb->s_flags & MS_RDONLY) && (vol->major_ver > 1) &&
+ if (!(sb->s_flags & SB_RDONLY) && (vol->major_ver > 1) &&
ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) {
static const char *es1 = "Failed to set NT4 compatibility flag";
static const char *es2 = ". Run chkdsk.";
@@ -2069,7 +2069,7 @@ get_ctx_vol_failed:
goto iput_root_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
NVolSetErrors(vol);
}
#endif
@@ -2087,7 +2087,7 @@ get_ctx_vol_failed:
goto iput_root_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
NVolSetErrors(vol);
}
#endif /* NTFS_RW */
@@ -2128,7 +2128,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_quota_err_out;
}
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2150,7 +2150,7 @@ get_ctx_vol_failed:
goto iput_quota_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
NVolSetErrors(vol);
}
/*
@@ -2171,7 +2171,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_usnjrnl_err_out;
}
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2194,7 +2194,7 @@ get_ctx_vol_failed:
goto iput_usnjrnl_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
NVolSetErrors(vol);
}
#endif /* NTFS_RW */
@@ -2728,7 +2728,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
lockdep_off();
ntfs_debug("Entering.");
#ifndef NTFS_RW
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
#endif /* ! NTFS_RW */
/* Allocate a new ntfs_volume and place it in sb->s_fs_info. */
sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 8d779227370a..bebe59feca58 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -140,7 +140,7 @@ static void o2net_rx_until_empty(struct work_struct *work);
static void o2net_shutdown_sc(struct work_struct *work);
static void o2net_listen_data_ready(struct sock *sk);
static void o2net_sc_send_keep_req(struct work_struct *work);
-static void o2net_idle_timer(unsigned long data);
+static void o2net_idle_timer(struct timer_list *t);
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
@@ -450,8 +450,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
INIT_DELAYED_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req);
- setup_timer(&sc->sc_idle_timeout, o2net_idle_timer,
- (unsigned long)sc);
+ timer_setup(&sc->sc_idle_timeout, o2net_idle_timer, 0);
sclog(sc, "alloced\n");
@@ -1517,9 +1516,9 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
/* socket shutdown does a del_timer_sync against this as it tears down.
* we can't start this timer until we've got to the point in sc buildup
* where shutdown is going to be involved */
-static void o2net_idle_timer(unsigned long data)
+static void o2net_idle_timer(struct timer_list *t)
{
- struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
+ struct o2net_sock_container *sc = from_timer(sc, t, sc_idle_timeout);
struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
#ifdef CONFIG_DEBUG_FS
unsigned long msecs = ktime_to_ms(ktime_get()) -
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index dc455d45a66a..a1d051055472 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -227,7 +227,7 @@ int ocfs2_should_update_atime(struct inode *inode,
return 0;
if ((inode->i_flags & S_NOATIME) ||
- ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
+ ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)))
return 0;
/*
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 040bbb6a6e4b..80efa5699fb0 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -675,9 +675,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
}
/* We're going to/from readonly mode. */
- if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) {
+ if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
/* Disable quota accounting before remounting RO */
- if (*flags & MS_RDONLY) {
+ if (*flags & SB_RDONLY) {
ret = ocfs2_susp_quotas(osb, 0);
if (ret < 0)
goto out;
@@ -691,8 +691,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto unlock_osb;
}
- if (*flags & MS_RDONLY) {
- sb->s_flags |= MS_RDONLY;
+ if (*flags & SB_RDONLY) {
+ sb->s_flags |= SB_RDONLY;
osb->osb_flags |= OCFS2_OSB_SOFT_RO;
} else {
if (osb->osb_flags & OCFS2_OSB_ERROR_FS) {
@@ -709,14 +709,14 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
ret = -EINVAL;
goto unlock_osb;
}
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
}
trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);
unlock_osb:
spin_unlock(&osb->osb_lock);
/* Enable quota accounting after remounting RW */
- if (!ret && !(*flags & MS_RDONLY)) {
+ if (!ret && !(*flags & SB_RDONLY)) {
if (sb_any_quota_suspended(sb))
ret = ocfs2_susp_quotas(osb, 1);
else
@@ -724,7 +724,7 @@ unlock_osb:
if (ret < 0) {
/* Return back changes... */
spin_lock(&osb->osb_lock);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
osb->osb_flags |= OCFS2_OSB_SOFT_RO;
spin_unlock(&osb->osb_lock);
goto out;
@@ -744,9 +744,9 @@ unlock_osb:
if (!ocfs2_is_hard_readonly(osb))
ocfs2_set_journal_params(osb);
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
- MS_POSIXACL : 0);
+ SB_POSIXACL : 0);
}
out:
return ret;
@@ -1057,10 +1057,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = OCFS2_SUPER_MAGIC;
- sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) |
- ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~(SB_POSIXACL | SB_NOSEC)) |
+ ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0);
- /* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
+ /* Hard readonly mode only if: bdev_read_only, SB_RDONLY,
* heartbeat=none */
if (bdev_read_only(sb->s_bdev)) {
if (!sb_rdonly(sb)) {
@@ -2057,7 +2057,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
sb->s_xattr = ocfs2_xattr_handlers;
sb->s_time_gran = 1;
- sb->s_flags |= MS_NOATIME;
+ sb->s_flags |= SB_NOATIME;
/* this is needed to support O_LARGEFILE */
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
@@ -2568,7 +2568,7 @@ static int ocfs2_handle_error(struct super_block *sb)
return rv;
pr_crit("OCFS2: File system is now read-only.\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
ocfs2_set_ro_flag(osb, 0);
}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 5fdf269ba82e..c5898c59d411 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -901,7 +901,7 @@ static int ocfs2_xattr_list_entry(struct super_block *sb,
case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
- if (!(sb->s_flags & MS_POSIXACL))
+ if (!(sb->s_flags & SB_POSIXACL))
return 0;
break;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 13215f26e321..2200662a9bf1 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -369,7 +369,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
static int openprom_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_NOATIME;
+ *flags |= SB_NOATIME;
return 0;
}
@@ -386,7 +386,7 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
struct op_inode_info *oi;
int ret;
- s->s_flags |= MS_NOATIME;
+ s->s_flags |= SB_NOATIME;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = OPENPROM_SUPER_MAGIC;
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
index c2d8233b1e82..480ea059a680 100644
--- a/fs/orangefs/acl.c
+++ b/fs/orangefs/acl.c
@@ -155,13 +155,11 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
int orangefs_init_acl(struct inode *inode, struct inode *dir)
{
- struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
struct posix_acl *default_acl, *acl;
umode_t mode = inode->i_mode;
+ struct iattr iattr;
int error = 0;
- ClearModeFlag(orangefs_inode);
-
error = posix_acl_create(dir, &mode, &default_acl, &acl);
if (error)
return error;
@@ -180,9 +178,11 @@ int orangefs_init_acl(struct inode *inode, struct inode *dir)
/* If mode of the inode was changed, then do a forcible ->setattr */
if (mode != inode->i_mode) {
- SetModeFlag(orangefs_inode);
+ memset(&iattr, 0, sizeof iattr);
inode->i_mode = mode;
- orangefs_flush_inode(inode);
+ iattr.ia_mode = mode;
+ iattr.ia_valid |= ATTR_MODE;
+ orangefs_inode_setattr(inode, &iattr);
}
return error;
diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c
index a8cc588d6224..e2c2699d8016 100644
--- a/fs/orangefs/dir.c
+++ b/fs/orangefs/dir.c
@@ -386,7 +386,6 @@ static int orangefs_dir_release(struct inode *inode, struct file *file)
{
struct orangefs_dir *od = file->private_data;
struct orangefs_dir_part *part = od->part;
- orangefs_flush_inode(inode);
while (part) {
struct orangefs_dir_part *next = part->next;
vfree(part);
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index e4a8e6a7eb17..1668fd645c45 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -383,9 +383,15 @@ out:
if (type == ORANGEFS_IO_READ) {
file_accessed(file);
} else {
- SetMtimeFlag(orangefs_inode);
- inode->i_mtime = current_time(inode);
- mark_inode_dirty_sync(inode);
+ file_update_time(file);
+ /*
+ * Must invalidate to ensure write loop doesn't
+ * prevent kernel from reading updated
+ * attribute. Size probably changed because of
+ * the write, and other clients could update
+ * any other attribute.
+ */
+ orangefs_inode->getattr_time = jiffies - 1;
}
}
@@ -615,8 +621,6 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
"orangefs_file_release: called on %pD\n",
file);
- orangefs_flush_inode(inode);
-
/*
* remove all associated inode pages from the page cache and
* readahead cache (if any); this forces an expensive refresh of
@@ -666,8 +670,6 @@ static int orangefs_fsync(struct file *file,
ret);
op_release(new_op);
-
- orangefs_flush_inode(file_inode(file));
return ret;
}
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 28825a5b6d09..fe1d705ad91f 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -290,6 +290,22 @@ int orangefs_permission(struct inode *inode, int mask)
return generic_permission(inode, mask);
}
+int orangefs_update_time(struct inode *inode, struct timespec *time, int flags)
+{
+ struct iattr iattr;
+ gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n",
+ get_khandle_from_ino(inode));
+ generic_update_time(inode, time, flags);
+ memset(&iattr, 0, sizeof iattr);
+ if (flags & S_ATIME)
+ iattr.ia_valid |= ATTR_ATIME;
+ if (flags & S_CTIME)
+ iattr.ia_valid |= ATTR_CTIME;
+ if (flags & S_MTIME)
+ iattr.ia_valid |= ATTR_MTIME;
+ return orangefs_inode_setattr(inode, &iattr);
+}
+
/* ORANGEDS2 implementation of VFS inode operations for files */
const struct inode_operations orangefs_file_inode_operations = {
.get_acl = orangefs_get_acl,
@@ -298,6 +314,7 @@ const struct inode_operations orangefs_file_inode_operations = {
.getattr = orangefs_getattr,
.listxattr = orangefs_listxattr,
.permission = orangefs_permission,
+ .update_time = orangefs_update_time,
};
static int orangefs_init_iops(struct inode *inode)
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 7e9e5d0ea3bc..c98bba2dbc94 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -22,7 +22,9 @@ static int orangefs_create(struct inode *dir,
{
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
struct orangefs_kernel_op_s *new_op;
+ struct orangefs_object_kref ref;
struct inode *inode;
+ struct iattr iattr;
int ret;
gossip_debug(GOSSIP_NAME_DEBUG, "%s: %pd\n",
@@ -55,8 +57,10 @@ static int orangefs_create(struct inode *dir,
if (ret < 0)
goto out;
- inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
- &new_op->downcall.resp.create.refn);
+ ref = new_op->downcall.resp.create.refn;
+ op_release(new_op);
+
+ inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, &ref);
if (IS_ERR(inode)) {
gossip_err("%s: Failed to allocate inode for file :%pd:\n",
__func__,
@@ -82,12 +86,13 @@ static int orangefs_create(struct inode *dir,
__func__,
dentry);
- SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_time(dir);
+ memset(&iattr, 0, sizeof iattr);
+ iattr.ia_valid |= ATTR_MTIME;
+ orangefs_inode_setattr(dir, &iattr);
mark_inode_dirty_sync(dir);
ret = 0;
out:
- op_release(new_op);
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: %pd: returning %d\n",
__func__,
@@ -221,6 +226,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
struct orangefs_kernel_op_s *new_op;
+ struct iattr iattr;
int ret;
gossip_debug(GOSSIP_NAME_DEBUG,
@@ -253,8 +259,10 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
if (!ret) {
drop_nlink(inode);
- SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_time(dir);
+ memset(&iattr, 0, sizeof iattr);
+ iattr.ia_valid |= ATTR_MTIME;
+ orangefs_inode_setattr(dir, &iattr);
mark_inode_dirty_sync(dir);
}
return ret;
@@ -266,7 +274,9 @@ static int orangefs_symlink(struct inode *dir,
{
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
struct orangefs_kernel_op_s *new_op;
+ struct orangefs_object_kref ref;
struct inode *inode;
+ struct iattr iattr;
int mode = 755;
int ret;
@@ -307,8 +317,10 @@ static int orangefs_symlink(struct inode *dir,
goto out;
}
- inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0,
- &new_op->downcall.resp.sym.refn);
+ ref = new_op->downcall.resp.sym.refn;
+ op_release(new_op);
+
+ inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0, &ref);
if (IS_ERR(inode)) {
gossip_err
("*** Failed to allocate orangefs symlink inode\n");
@@ -331,12 +343,13 @@ static int orangefs_symlink(struct inode *dir,
get_khandle_from_ino(inode),
dentry);
- SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_time(dir);
+ memset(&iattr, 0, sizeof iattr);
+ iattr.ia_valid |= ATTR_MTIME;
+ orangefs_inode_setattr(dir, &iattr);
mark_inode_dirty_sync(dir);
ret = 0;
out:
- op_release(new_op);
return ret;
}
@@ -344,7 +357,9 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
{
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
struct orangefs_kernel_op_s *new_op;
+ struct orangefs_object_kref ref;
struct inode *inode;
+ struct iattr iattr;
int ret;
new_op = op_alloc(ORANGEFS_VFS_OP_MKDIR);
@@ -373,8 +388,10 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
goto out;
}
- inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0,
- &new_op->downcall.resp.mkdir.refn);
+ ref = new_op->downcall.resp.mkdir.refn;
+ op_release(new_op);
+
+ inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0, &ref);
if (IS_ERR(inode)) {
gossip_err("*** Failed to allocate orangefs dir inode\n");
ret = PTR_ERR(inode);
@@ -400,11 +417,12 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
* NOTE: we have no good way to keep nlink consistent for directories
* across clients; keep constant at 1.
*/
- SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_time(dir);
+ memset(&iattr, 0, sizeof iattr);
+ iattr.ia_valid |= ATTR_MTIME;
+ orangefs_inode_setattr(dir, &iattr);
mark_inode_dirty_sync(dir);
out:
- op_release(new_op);
return ret;
}
@@ -470,4 +488,5 @@ const struct inode_operations orangefs_dir_inode_operations = {
.getattr = orangefs_getattr,
.listxattr = orangefs_listxattr,
.permission = orangefs_permission,
+ .update_time = orangefs_update_time,
};
diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h
index b6001bb28f5a..c7db56a31b92 100644
--- a/fs/orangefs/orangefs-debug.h
+++ b/fs/orangefs/orangefs-debug.h
@@ -15,8 +15,10 @@
#ifdef __KERNEL__
#include <linux/types.h>
+#include <linux/kernel.h>
#else
#include <stdint.h>
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
#define GOSSIP_NO_DEBUG (__u64)0
@@ -88,6 +90,6 @@ static struct __keyword_mask_s s_kmod_keyword_mask_map[] = {
};
static const int num_kmod_keyword_mask_map = (int)
- (sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s));
+ (ARRAY_SIZE(s_kmod_keyword_mask_map));
#endif /* __ORANGEFS_DEBUG_H */
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 004af348fb80..97adf7d100b5 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -209,37 +209,10 @@ struct orangefs_inode_s {
struct inode vfs_inode;
sector_t last_failed_block_index_read;
- /*
- * State of in-memory attributes not yet flushed to disk associated
- * with this object
- */
- unsigned long pinode_flags;
-
unsigned long getattr_time;
u32 getattr_mask;
};
-#define P_ATIME_FLAG 0
-#define P_MTIME_FLAG 1
-#define P_CTIME_FLAG 2
-#define P_MODE_FLAG 3
-
-#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
-#define SetAtimeFlag(pinode) set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
-#define AtimeFlag(pinode) test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
-
-#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
-#define SetMtimeFlag(pinode) set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
-#define MtimeFlag(pinode) test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
-
-#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
-#define SetCtimeFlag(pinode) set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
-#define CtimeFlag(pinode) test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
-
-#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
-#define SetModeFlag(pinode) set_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
-#define ModeFlag(pinode) test_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
-
/* per superblock private orangefs info */
struct orangefs_sb_info_s {
struct orangefs_khandle root_khandle;
@@ -275,12 +248,6 @@ struct orangefs_kiocb_s {
/* orangefs kernel operation type */
struct orangefs_kernel_op_s *op;
- /* The user space buffers from/to which I/O is being staged */
- struct iovec *iov;
-
- /* number of elements in the iovector */
- unsigned long nr_segs;
-
/* set to indicate the type of the operation */
int rw;
@@ -442,6 +409,8 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
int orangefs_permission(struct inode *inode, int mask);
+int orangefs_update_time(struct inode *, struct timespec *, int);
+
/*
* defined in xattr.c
*/
@@ -484,8 +453,6 @@ bool __is_daemon_in_service(void);
*/
__s32 fsid_of_op(struct orangefs_kernel_op_s *op);
-int orangefs_flush_inode(struct inode *inode);
-
ssize_t orangefs_inode_getxattr(struct inode *inode,
const char *name,
void *buffer,
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index f82336496311..97fe93129f38 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -4,6 +4,7 @@
*
* See COPYING in top-level directory.
*/
+#include <linux/kernel.h>
#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-dev-proto.h"
@@ -437,89 +438,8 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
op_release(new_op);
- /*
- * successful setattr should clear the atime, mtime and
- * ctime flags.
- */
- if (ret == 0) {
- ClearAtimeFlag(orangefs_inode);
- ClearMtimeFlag(orangefs_inode);
- ClearCtimeFlag(orangefs_inode);
- ClearModeFlag(orangefs_inode);
+ if (ret == 0)
orangefs_inode->getattr_time = jiffies - 1;
- }
-
- return ret;
-}
-
-int orangefs_flush_inode(struct inode *inode)
-{
- /*
- * If it is a dirty inode, this function gets called.
- * Gather all the information that needs to be setattr'ed
- * Right now, this will only be used for mode, atime, mtime
- * and/or ctime.
- */
- struct iattr wbattr;
- int ret;
- int mtime_flag;
- int ctime_flag;
- int atime_flag;
- int mode_flag;
- struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
-
- memset(&wbattr, 0, sizeof(wbattr));
-
- /*
- * check inode flags up front, and clear them if they are set. This
- * will prevent multiple processes from all trying to flush the same
- * inode if they call close() simultaneously
- */
- mtime_flag = MtimeFlag(orangefs_inode);
- ClearMtimeFlag(orangefs_inode);
- ctime_flag = CtimeFlag(orangefs_inode);
- ClearCtimeFlag(orangefs_inode);
- atime_flag = AtimeFlag(orangefs_inode);
- ClearAtimeFlag(orangefs_inode);
- mode_flag = ModeFlag(orangefs_inode);
- ClearModeFlag(orangefs_inode);
-
- /* -- Lazy atime,mtime and ctime update --
- * Note: all times are dictated by server in the new scheme
- * and not by the clients
- *
- * Also mode updates are being handled now..
- */
-
- if (mtime_flag)
- wbattr.ia_valid |= ATTR_MTIME;
- if (ctime_flag)
- wbattr.ia_valid |= ATTR_CTIME;
- if (atime_flag)
- wbattr.ia_valid |= ATTR_ATIME;
-
- if (mode_flag) {
- wbattr.ia_mode = inode->i_mode;
- wbattr.ia_valid |= ATTR_MODE;
- }
-
- gossip_debug(GOSSIP_UTILS_DEBUG,
- "*********** orangefs_flush_inode: %pU "
- "(ia_valid %d)\n",
- get_khandle_from_ino(inode),
- wbattr.ia_valid);
- if (wbattr.ia_valid == 0) {
- gossip_debug(GOSSIP_UTILS_DEBUG,
- "orangefs_flush_inode skipping setattr()\n");
- return 0;
- }
-
- gossip_debug(GOSSIP_UTILS_DEBUG,
- "orangefs_flush_inode (%pU) writing mode %o\n",
- get_khandle_from_ino(inode),
- inode->i_mode);
-
- ret = orangefs_inode_setattr(inode, &wbattr);
return ret;
}
@@ -606,7 +526,7 @@ int orangefs_normalize_to_errno(__s32 error_code)
/* Convert ORANGEFS encoded errno values into regular errno values. */
} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
- if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping))
+ if (i < ARRAY_SIZE(PINT_errno_mapping))
error_code = -PINT_errno_mapping[i];
else
error_code = -EINVAL;
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 47ebd9bfd1a1..36f1390b5ed7 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -40,7 +40,7 @@ static int orangefs_show_options(struct seq_file *m, struct dentry *root)
{
struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(root->d_sb);
- if (root->d_sb->s_flags & MS_POSIXACL)
+ if (root->d_sb->s_flags & SB_POSIXACL)
seq_puts(m, ",acl");
if (orangefs_sb->flags & ORANGEFS_OPT_INTR)
seq_puts(m, ",intr");
@@ -60,7 +60,7 @@ static int parse_mount_options(struct super_block *sb, char *options,
* Force any potential flags that might be set from the mount
* to zero, ie, initialize to unset.
*/
- sb->s_flags &= ~MS_POSIXACL;
+ sb->s_flags &= ~SB_POSIXACL;
orangefs_sb->flags &= ~ORANGEFS_OPT_INTR;
orangefs_sb->flags &= ~ORANGEFS_OPT_LOCAL_LOCK;
@@ -73,7 +73,7 @@ static int parse_mount_options(struct super_block *sb, char *options,
token = match_token(p, tokens, args);
switch (token) {
case Opt_acl:
- sb->s_flags |= MS_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
break;
case Opt_intr:
orangefs_sb->flags |= ORANGEFS_OPT_INTR;
@@ -99,8 +99,6 @@ static void orangefs_inode_cache_ctor(void *req)
inode_init_once(&orangefs_inode->vfs_inode);
init_rwsem(&orangefs_inode->xattr_sem);
-
- orangefs_inode->vfs_inode.i_version = 1;
}
static struct inode *orangefs_alloc_inode(struct super_block *sb)
@@ -119,7 +117,6 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb)
orangefs_inode->refn.fs_id = ORANGEFS_FS_ID_NULL;
orangefs_inode->last_failed_block_index_read = 0;
memset(orangefs_inode->link_target, 0, sizeof(orangefs_inode->link_target));
- orangefs_inode->pinode_flags = 0;
gossip_debug(GOSSIP_SUPER_DEBUG,
"orangefs_alloc_inode: allocated %p\n",
@@ -299,21 +296,9 @@ void fsid_key_table_finalize(void)
{
}
-/* Called whenever the VFS dirties the inode in response to atime updates */
-static void orangefs_dirty_inode(struct inode *inode, int flags)
-{
- struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
-
- gossip_debug(GOSSIP_SUPER_DEBUG,
- "orangefs_dirty_inode: %pU\n",
- get_khandle_from_ino(inode));
- SetAtimeFlag(orangefs_inode);
-}
-
static const struct super_operations orangefs_s_ops = {
.alloc_inode = orangefs_alloc_inode,
.destroy_inode = orangefs_destroy_inode,
- .dirty_inode = orangefs_dirty_inode,
.drop_inode = generic_delete_inode,
.statfs = orangefs_statfs,
.remount_fs = orangefs_remount_fs,
@@ -522,7 +507,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
ret = orangefs_fill_sb(sb,
&new_op->downcall.resp.fs_mount, data,
- flags & MS_SILENT ? 1 : 0);
+ flags & SB_SILENT ? 1 : 0);
if (ret) {
d = ERR_PTR(ret);
diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c
index d856cdf91763..db107fe91ab3 100644
--- a/fs/orangefs/symlink.c
+++ b/fs/orangefs/symlink.c
@@ -15,4 +15,5 @@ const struct inode_operations orangefs_symlink_inode_operations = {
.getattr = orangefs_getattr,
.listxattr = orangefs_listxattr,
.permission = orangefs_permission,
+ .update_time = orangefs_update_time,
};
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index c441f9387a1b..eb3b8d39fb61 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -22,7 +22,6 @@
#include <linux/ratelimit.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
@@ -486,6 +485,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
{
struct inode *udir = c->destdir->d_inode;
+ struct inode *inode;
struct dentry *newdentry = NULL;
struct dentry *temp = NULL;
int err;
@@ -508,7 +508,11 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
if (err)
goto out_cleanup;
- ovl_inode_update(d_inode(c->dentry), newdentry);
+ inode = d_inode(c->dentry);
+ ovl_inode_update(inode, newdentry);
+ if (S_ISDIR(inode->i_mode))
+ ovl_set_flag(OVL_WHITEOUTS, inode);
+
out:
dput(temp);
return err;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index cc961a3bd3bd..e13921824c70 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -181,6 +181,11 @@ static bool ovl_type_origin(struct dentry *dentry)
return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
}
+static bool ovl_may_have_whiteouts(struct dentry *dentry)
+{
+ return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
+}
+
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink)
{
@@ -300,7 +305,6 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
{
int err;
struct dentry *ret = NULL;
- enum ovl_path_type type = ovl_path_type(dentry);
LIST_HEAD(list);
err = ovl_check_empty_dir(dentry, &list);
@@ -313,13 +317,13 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
* When removing an empty opaque directory, then it makes no sense to
* replace it with an exact replica of itself.
*
- * If no upperdentry then skip clearing whiteouts.
+ * If upperdentry has whiteouts, clear them.
*
* Can race with copy-up, since we don't hold the upperdir mutex.
* Doesn't matter, since copy-up can't create a non-empty directory
* from an empty one.
*/
- if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
+ if (!list_empty(&list))
ret = ovl_clear_empty(dentry, &list);
out_free:
@@ -698,8 +702,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
struct dentry *opaquedir = NULL;
int err;
- /* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */
- if (is_dir && ovl_dentry_get_redirect(dentry)) {
+ /* Redirect/origin dir can be !ovl_lower_positive && not clean */
+ if (is_dir && (ovl_dentry_get_redirect(dentry) ||
+ ovl_may_have_whiteouts(dentry))) {
opaquedir = ovl_check_empty_and_clear(dentry);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
@@ -946,7 +951,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
old_cred = ovl_override_creds(old->d_sb);
- if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) {
+ if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) ||
+ ovl_may_have_whiteouts(new))) {
opaquedir = ovl_check_empty_and_clear(new);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir)) {
@@ -1069,9 +1075,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
drop_nlink(d_inode(new));
}
- ovl_dentry_version_inc(old->d_parent,
- !overwrite && ovl_type_origin(new));
- ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old));
+ ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) ||
+ (!overwrite && ovl_type_origin(new)));
+ ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) ||
+ (d_inode(new) && ovl_type_origin(new)));
out_dput:
dput(newdentry);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 321511ed8c42..00b6b294272a 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -15,6 +15,14 @@
#include <linux/ratelimit.h>
#include "overlayfs.h"
+
+static dev_t ovl_get_pseudo_dev(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->lowerstack[0].layer->pseudo_dev;
+}
+
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
{
int err;
@@ -66,6 +74,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
struct path realpath;
const struct cred *old_cred;
bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
+ bool samefs = ovl_same_sb(dentry->d_sb);
int err;
type = ovl_path_real(dentry, &realpath);
@@ -75,16 +84,13 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
goto out;
/*
- * When all layers are on the same fs, all real inode number are
- * unique, so we use the overlay st_dev, which is friendly to du -x.
- *
- * We also use st_ino of the copy up origin, if we know it.
- * This guaranties constant st_dev/st_ino across copy up.
+ * For non-dir or same fs, we use st_ino of the copy up origin, if we
+ * know it. This guaranties constant st_dev/st_ino across copy up.
*
* If filesystem supports NFS export ops, this also guaranties
* persistent st_ino across mount cycle.
*/
- if (ovl_same_sb(dentry->d_sb)) {
+ if (!is_dir || samefs) {
if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
@@ -95,7 +101,6 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (err)
goto out;
- WARN_ON_ONCE(stat->dev != lowerstat.dev);
/*
* Lower hardlinks may be broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
@@ -107,17 +112,36 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (is_dir || lowerstat.nlink == 1 ||
ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->ino = lowerstat.ino;
+
+ if (samefs)
+ WARN_ON_ONCE(stat->dev != lowerstat.dev);
+ else
+ stat->dev = ovl_get_pseudo_dev(dentry);
}
- stat->dev = dentry->d_sb->s_dev;
- } else if (is_dir) {
+ if (samefs) {
+ /*
+ * When all layers are on the same fs, all real inode
+ * number are unique, so we use the overlay st_dev,
+ * which is friendly to du -x.
+ */
+ stat->dev = dentry->d_sb->s_dev;
+ } else if (!OVL_TYPE_UPPER(type)) {
+ /*
+ * For non-samefs setup, to make sure that st_dev/st_ino
+ * pair is unique across the system, we use a unique
+ * anonymous st_dev for lower layer inode.
+ */
+ stat->dev = ovl_get_pseudo_dev(dentry);
+ }
+ } else {
/*
- * If not all layers are on the same fs the pair {real st_ino;
- * overlay st_dev} is not unique, so use the non persistent
- * overlay st_ino.
- *
* Always use the overlay st_dev for directories, so 'find
* -xdev' will scan the entire overlay mount and won't cross the
* overlay mount boundaries.
+ *
+ * If not all layers are on the same fs the pair {real st_ino;
+ * overlay st_dev} is not unique, so use the non persistent
+ * overlay st_ino for directories.
*/
stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;
@@ -409,6 +433,7 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
#ifdef CONFIG_LOCKDEP
static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+ static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING];
int depth = inode->i_sb->s_stack_depth - 1;
@@ -419,6 +444,8 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
else
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+
+ lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]);
#endif
}
@@ -657,6 +684,16 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
if (upperdentry && ovl_is_impuredir(upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
+ /* Check for non-merge dir that may have whiteouts */
+ if (S_ISDIR(realinode->i_mode)) {
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
+ ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
+ ovl_set_flag(OVL_WHITEOUTS, inode);
+ }
+ }
+
if (inode->i_state & I_NEW)
unlock_new_inode(inode);
out:
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index a12dc10bf726..625ed8066570 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -15,7 +15,6 @@
#include <linux/mount.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
struct ovl_lookup_data {
struct qstr name;
@@ -286,16 +285,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
static int ovl_check_origin(struct dentry *upperdentry,
- struct path *lowerstack, unsigned int numlower,
- struct path **stackp, unsigned int *ctrp)
+ struct ovl_path *lower, unsigned int numlower,
+ struct ovl_path **stackp, unsigned int *ctrp)
{
struct vfsmount *mnt;
struct dentry *origin = NULL;
int i;
-
for (i = 0; i < numlower; i++) {
- mnt = lowerstack[i].mnt;
+ mnt = lower[i].layer->mnt;
origin = ovl_get_origin(upperdentry, mnt);
if (IS_ERR(origin))
return PTR_ERR(origin);
@@ -309,12 +307,12 @@ static int ovl_check_origin(struct dentry *upperdentry,
BUG_ON(*ctrp);
if (!*stackp)
- *stackp = kmalloc(sizeof(struct path), GFP_KERNEL);
+ *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
if (!*stackp) {
dput(origin);
return -ENOMEM;
}
- **stackp = (struct path) { .dentry = origin, .mnt = mnt };
+ **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer};
*ctrp = 1;
return 0;
@@ -350,8 +348,8 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
*
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
-int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
- struct dentry *origin, bool is_upper, bool set)
+int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
+ bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
@@ -384,13 +382,13 @@ fail:
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
* Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
*/
-int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
unsigned int numlower)
{
struct ovl_fh *fh = NULL;
size_t len;
- struct path origin = { };
- struct path *stack = &origin;
+ struct ovl_path origin = { };
+ struct ovl_path *stack = &origin;
unsigned int ctr = 0;
int err;
@@ -429,7 +427,7 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack,
if (err)
goto fail;
- err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
+ err = ovl_check_origin(index, lower, numlower, &stack, &ctr);
if (!err && !ctr)
err = -ESTALE;
if (err)
@@ -568,11 +566,24 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
idx++;
}
BUG_ON(idx > oe->numlower);
- *path = oe->lowerstack[idx - 1];
+ path->dentry = oe->lowerstack[idx - 1].dentry;
+ path->mnt = oe->lowerstack[idx - 1].layer->mnt;
return (idx < oe->numlower) ? idx + 1 : -1;
}
+static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path)
+{
+ int i;
+
+ for (i = 0; i < ofs->numlower; i++) {
+ if (ofs->lower_layers[i].mnt == path->layer->mnt)
+ break;
+ }
+
+ return i;
+}
+
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -581,7 +592,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
- struct path *stack = NULL;
+ struct ovl_path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
struct dentry *index = NULL;
unsigned int ctr = 0;
@@ -630,7 +641,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
err = ovl_check_origin(upperdentry, roe->lowerstack,
roe->numlower, &stack, &ctr);
if (err)
- goto out;
+ goto out_put_upper;
}
if (d.redirect) {
@@ -646,17 +657,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (!d.stop && poe->numlower) {
err = -ENOMEM;
- stack = kcalloc(ofs->numlower, sizeof(struct path),
+ stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
GFP_KERNEL);
if (!stack)
goto out_put_upper;
}
for (i = 0; !d.stop && i < poe->numlower; i++) {
- struct path lowerpath = poe->lowerstack[i];
+ struct ovl_path lower = poe->lowerstack[i];
d.last = i == poe->numlower - 1;
- err = ovl_lookup_layer(lowerpath.dentry, &d, &this);
+ err = ovl_lookup_layer(lower.dentry, &d, &this);
if (err)
goto out_put;
@@ -664,7 +675,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
continue;
stack[ctr].dentry = this;
- stack[ctr].mnt = lowerpath.mnt;
+ stack[ctr].layer = lower.layer;
ctr++;
if (d.stop)
@@ -674,10 +685,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
poe = roe;
/* Find the current layer on the root dentry */
- for (i = 0; i < poe->numlower; i++)
- if (poe->lowerstack[i].mnt == lowerpath.mnt)
- break;
- if (WARN_ON(i == poe->numlower))
+ i = ovl_find_layer(ofs, &lower);
+ if (WARN_ON(i == ofs->numlower))
break;
}
}
@@ -700,7 +709,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
goto out_put;
oe->opaque = upperopaque;
- memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+ memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
dentry->d_fsdata = oe;
if (upperdentry)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d9a0edd4e57e..13eab09a6b6f 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/uuid.h>
+#include "ovl_entry.h"
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
@@ -28,7 +29,10 @@ enum ovl_path_type {
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
enum ovl_flag {
+ /* Pure upper dir that may contain non pure upper entries */
OVL_IMPURE,
+ /* Non-merge dir that may contain whiteout entries */
+ OVL_WHITEOUTS,
OVL_INDEX,
};
@@ -223,6 +227,7 @@ bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry);
void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_check_origin_xattr(struct dentry *dentry);
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
const char *name, const void *value, size_t size,
@@ -244,9 +249,9 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
/* namei.c */
-int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
- struct dentry *origin, bool is_upper, bool set);
-int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
+ bool is_upper, bool set);
+int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
unsigned int numlower);
int ovl_get_index_name(struct dentry *origin, struct qstr *name);
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
@@ -263,7 +268,7 @@ int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
- struct path *lowerstack, unsigned int numlower);
+ struct ovl_path *lower, unsigned int numlower);
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 36b49bd09264..752bab645879 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,11 +17,21 @@ struct ovl_config {
bool index;
};
+struct ovl_layer {
+ struct vfsmount *mnt;
+ dev_t pseudo_dev;
+};
+
+struct ovl_path {
+ struct ovl_layer *layer;
+ struct dentry *dentry;
+};
+
/* private information held for overlayfs's superblock */
struct ovl_fs {
struct vfsmount *upper_mnt;
unsigned numlower;
- struct vfsmount **lower_mnt;
+ struct ovl_layer *lower_layers;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
/* workdir is the 'work' directory under workbasedir */
@@ -52,7 +62,7 @@ struct ovl_entry {
struct rcu_head rcu;
};
unsigned numlower;
- struct path lowerstack[];
+ struct ovl_path lowerstack[];
};
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index c310e3ff7f3f..0daa4354fec4 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -26,6 +26,7 @@ struct ovl_cache_entry {
struct list_head l_node;
struct rb_node node;
struct ovl_cache_entry *next_maybe_whiteout;
+ bool is_upper;
bool is_whiteout;
char name[];
};
@@ -158,6 +159,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
/* Defer setting d_ino for upper entry to ovl_iterate() */
if (ovl_calc_d_ino(rdd, p))
p->ino = 0;
+ p->is_upper = rdd->is_upper;
p->is_whiteout = false;
if (d_type == DT_CHR) {
@@ -316,21 +318,37 @@ static inline int ovl_dir_read(struct path *realpath,
return err;
}
+/*
+ * Can we iterate real dir directly?
+ *
+ * Non-merge dir may contain whiteouts from a time it was a merge upper, before
+ * lower dir was removed under it and possibly before it was rotated from upper
+ * to lower layer.
+ */
+static bool ovl_dir_is_real(struct dentry *dir)
+{
+ return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
+}
+
static void ovl_dir_reset(struct file *file)
{
struct ovl_dir_file *od = file->private_data;
struct ovl_dir_cache *cache = od->cache;
struct dentry *dentry = file->f_path.dentry;
- enum ovl_path_type type = ovl_path_type(dentry);
+ bool is_real;
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
ovl_cache_put(od, dentry);
od->cache = NULL;
od->cursor = NULL;
}
- WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
- if (od->is_real && OVL_TYPE_MERGE(type))
+ is_real = ovl_dir_is_real(dentry);
+ if (od->is_real != is_real) {
+ /* is_real can only become false when dir is copied up */
+ if (WARN_ON(is_real))
+ return;
od->is_real = false;
+ }
}
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
@@ -816,7 +834,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
return PTR_ERR(realfile);
}
od->realfile = realfile;
- od->is_real = !OVL_TYPE_MERGE(type);
+ od->is_real = ovl_dir_is_real(file->f_path.dentry);
od->is_upper = OVL_TYPE_UPPER(type);
file->private_data = od;
@@ -835,7 +853,7 @@ const struct file_operations ovl_dir_operations = {
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
{
int err;
- struct ovl_cache_entry *p;
+ struct ovl_cache_entry *p, *n;
struct rb_root root = RB_ROOT;
err = ovl_dir_read_merged(dentry, list, &root);
@@ -844,18 +862,29 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
err = 0;
- list_for_each_entry(p, list, l_node) {
- if (p->is_whiteout)
- continue;
+ list_for_each_entry_safe(p, n, list, l_node) {
+ /*
+ * Select whiteouts in upperdir, they should
+ * be cleared when deleting this directory.
+ */
+ if (p->is_whiteout) {
+ if (p->is_upper)
+ continue;
+ goto del_entry;
+ }
if (p->name[0] == '.') {
if (p->len == 1)
- continue;
+ goto del_entry;
if (p->len == 2 && p->name[1] == '.')
- continue;
+ goto del_entry;
}
err = -ENOTEMPTY;
break;
+
+del_entry:
+ list_del(&p->l_node);
+ kfree(p);
}
return err;
@@ -869,7 +898,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
list_for_each_entry(p, list, l_node) {
struct dentry *dentry;
- if (!p->is_whiteout)
+ if (WARN_ON(!p->is_whiteout || !p->is_upper))
continue;
dentry = lookup_one_len(p->name, upper, p->len);
@@ -985,7 +1014,7 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
}
int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
- struct path *lowerstack, unsigned int numlower)
+ struct ovl_path *lower, unsigned int numlower)
{
int err;
struct dentry *index = NULL;
@@ -1020,7 +1049,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
index = NULL;
break;
}
- err = ovl_verify_index(index, lowerstack, numlower);
+ err = ovl_verify_index(index, lower, numlower);
/* Cleanup stale and orphan index entries */
if (err && (err == -ESTALE || err == -ENOENT))
err = ovl_cleanup(dir, index);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f5738e96a052..288d20f9a55a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -18,7 +18,6 @@
#include <linux/seq_file.h>
#include <linux/posix_acl_xattr.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
@@ -39,15 +38,20 @@ module_param_named(index, ovl_index_def, bool, 0644);
MODULE_PARM_DESC(ovl_index_def,
"Default to on or off for the inodes index feature");
+static void ovl_entry_stack_free(struct ovl_entry *oe)
+{
+ unsigned int i;
+
+ for (i = 0; i < oe->numlower; i++)
+ dput(oe->lowerstack[i].dentry);
+}
+
static void ovl_dentry_release(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
if (oe) {
- unsigned int i;
-
- for (i = 0; i < oe->numlower; i++)
- dput(oe->lowerstack[i].dentry);
+ ovl_entry_stack_free(oe);
kfree_rcu(oe, rcu);
}
}
@@ -207,39 +211,48 @@ static void ovl_destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, ovl_i_callback);
}
-static void ovl_put_super(struct super_block *sb)
+static void ovl_free_fs(struct ovl_fs *ofs)
{
- struct ovl_fs *ufs = sb->s_fs_info;
unsigned i;
- dput(ufs->indexdir);
- dput(ufs->workdir);
- if (ufs->workdir_locked)
- ovl_inuse_unlock(ufs->workbasedir);
- dput(ufs->workbasedir);
- if (ufs->upper_mnt && ufs->upperdir_locked)
- ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
- mntput(ufs->upper_mnt);
- for (i = 0; i < ufs->numlower; i++)
- mntput(ufs->lower_mnt[i]);
- kfree(ufs->lower_mnt);
-
- kfree(ufs->config.lowerdir);
- kfree(ufs->config.upperdir);
- kfree(ufs->config.workdir);
- put_cred(ufs->creator_cred);
- kfree(ufs);
+ dput(ofs->indexdir);
+ dput(ofs->workdir);
+ if (ofs->workdir_locked)
+ ovl_inuse_unlock(ofs->workbasedir);
+ dput(ofs->workbasedir);
+ if (ofs->upperdir_locked)
+ ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
+ mntput(ofs->upper_mnt);
+ for (i = 0; i < ofs->numlower; i++) {
+ mntput(ofs->lower_layers[i].mnt);
+ free_anon_bdev(ofs->lower_layers[i].pseudo_dev);
+ }
+ kfree(ofs->lower_layers);
+
+ kfree(ofs->config.lowerdir);
+ kfree(ofs->config.upperdir);
+ kfree(ofs->config.workdir);
+ if (ofs->creator_cred)
+ put_cred(ofs->creator_cred);
+ kfree(ofs);
+}
+
+static void ovl_put_super(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ ovl_free_fs(ofs);
}
static int ovl_sync_fs(struct super_block *sb, int wait)
{
- struct ovl_fs *ufs = sb->s_fs_info;
+ struct ovl_fs *ofs = sb->s_fs_info;
struct super_block *upper_sb;
int ret;
- if (!ufs->upper_mnt)
+ if (!ofs->upper_mnt)
return 0;
- upper_sb = ufs->upper_mnt->mnt_sb;
+ upper_sb = ofs->upper_mnt->mnt_sb;
if (!upper_sb->s_op->sync_fs)
return 0;
@@ -277,9 +290,9 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
}
/* Will this overlay be forced to mount/remount ro? */
-static bool ovl_force_readonly(struct ovl_fs *ufs)
+static bool ovl_force_readonly(struct ovl_fs *ofs)
{
- return (!ufs->upper_mnt || !ufs->workdir);
+ return (!ofs->upper_mnt || !ofs->workdir);
}
/**
@@ -291,29 +304,29 @@ static bool ovl_force_readonly(struct ovl_fs *ufs)
static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
struct super_block *sb = dentry->d_sb;
- struct ovl_fs *ufs = sb->s_fs_info;
+ struct ovl_fs *ofs = sb->s_fs_info;
- seq_show_option(m, "lowerdir", ufs->config.lowerdir);
- if (ufs->config.upperdir) {
- seq_show_option(m, "upperdir", ufs->config.upperdir);
- seq_show_option(m, "workdir", ufs->config.workdir);
+ seq_show_option(m, "lowerdir", ofs->config.lowerdir);
+ if (ofs->config.upperdir) {
+ seq_show_option(m, "upperdir", ofs->config.upperdir);
+ seq_show_option(m, "workdir", ofs->config.workdir);
}
- if (ufs->config.default_permissions)
+ if (ofs->config.default_permissions)
seq_puts(m, ",default_permissions");
- if (ufs->config.redirect_dir != ovl_redirect_dir_def)
+ if (ofs->config.redirect_dir != ovl_redirect_dir_def)
seq_printf(m, ",redirect_dir=%s",
- ufs->config.redirect_dir ? "on" : "off");
- if (ufs->config.index != ovl_index_def)
+ ofs->config.redirect_dir ? "on" : "off");
+ if (ofs->config.index != ovl_index_def)
seq_printf(m, ",index=%s",
- ufs->config.index ? "on" : "off");
+ ofs->config.index ? "on" : "off");
return 0;
}
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
- struct ovl_fs *ufs = sb->s_fs_info;
+ struct ovl_fs *ofs = sb->s_fs_info;
- if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs))
+ if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
return -EROFS;
return 0;
@@ -451,13 +464,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
#define OVL_WORKDIR_NAME "work"
#define OVL_INDEXDIR_NAME "index"
-static struct dentry *ovl_workdir_create(struct super_block *sb,
- struct ovl_fs *ufs,
- struct dentry *dentry,
+static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
const char *name, bool persist)
{
- struct inode *dir = dentry->d_inode;
- struct vfsmount *mnt = ufs->upper_mnt;
+ struct inode *dir = ofs->workbasedir->d_inode;
+ struct vfsmount *mnt = ofs->upper_mnt;
struct dentry *work;
int err;
bool retried = false;
@@ -471,7 +482,7 @@ static struct dentry *ovl_workdir_create(struct super_block *sb,
locked = true;
retry:
- work = lookup_one_len(name, dentry, strlen(name));
+ work = lookup_one_len(name, ofs->workbasedir, strlen(name));
if (!IS_ERR(work)) {
struct iattr attr = {
@@ -541,8 +552,7 @@ out_dput:
dput(work);
out_err:
pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
- ufs->config.workdir, name, -err);
- sb->s_flags |= MS_RDONLY;
+ ofs->config.workdir, name, -err);
work = NULL;
goto out_unlock;
}
@@ -585,7 +595,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
return 0;
out_put:
- path_put(path);
+ path_put_init(path);
out:
return err;
}
@@ -603,7 +613,7 @@ static int ovl_mount_dir(const char *name, struct path *path)
if (ovl_dentry_remote(path->dentry)) {
pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
tmp);
- path_put(path);
+ path_put_init(path);
err = -EINVAL;
}
kfree(tmp);
@@ -655,7 +665,7 @@ static int ovl_lower_dir(const char *name, struct path *path,
return 0;
out_put:
- path_put(path);
+ path_put_init(path);
out:
return err;
}
@@ -826,129 +836,269 @@ static const struct xattr_handler *ovl_xattr_handlers[] = {
NULL
};
-static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
{
- struct path upperpath = { };
- struct path workpath = { };
- struct dentry *root_dentry;
- struct ovl_entry *oe;
- struct ovl_fs *ufs;
- struct path *stack = NULL;
- char *lowertmp;
- char *lower;
- unsigned int numlower;
- unsigned int stacklen = 0;
- unsigned int i;
- bool remote = false;
- struct cred *cred;
+ struct vfsmount *upper_mnt;
int err;
- err = -ENOMEM;
- ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
- if (!ufs)
+ err = ovl_mount_dir(ofs->config.upperdir, upperpath);
+ if (err)
+ goto out;
+
+ /* Upper fs should not be r/o */
+ if (sb_rdonly(upperpath->mnt->mnt_sb)) {
+ pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
+ if (err)
goto out;
- ufs->config.redirect_dir = ovl_redirect_dir_def;
- ufs->config.index = ovl_index_def;
- err = ovl_parse_opt((char *) data, &ufs->config);
+ err = -EBUSY;
+ if (ovl_inuse_trylock(upperpath->dentry)) {
+ ofs->upperdir_locked = true;
+ } else if (ofs->config.index) {
+ pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
+ goto out;
+ } else {
+ pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ }
+
+ upper_mnt = clone_private_mount(upperpath);
+ err = PTR_ERR(upper_mnt);
+ if (IS_ERR(upper_mnt)) {
+ pr_err("overlayfs: failed to clone upperpath\n");
+ goto out;
+ }
+
+ /* Don't inherit atime flags */
+ upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
+ ofs->upper_mnt = upper_mnt;
+ err = 0;
+out:
+ return err;
+}
+
+static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
+{
+ struct dentry *temp;
+ int err;
+
+ ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
+ if (!ofs->workdir)
+ return 0;
+
+ /*
+ * Upper should support d_type, else whiteouts are visible. Given
+ * workdir and upper are on same fs, we can do iterate_dir() on
+ * workdir. This check requires successful creation of workdir in
+ * previous step.
+ */
+ err = ovl_check_d_type_supported(workpath);
+ if (err < 0)
+ return err;
+
+ /*
+ * We allowed this configuration and don't want to break users over
+ * kernel upgrade. So warn instead of erroring out.
+ */
+ if (!err)
+ pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+ /* Check if upper/work fs supports O_TMPFILE */
+ temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
+ ofs->tmpfile = !IS_ERR(temp);
+ if (ofs->tmpfile)
+ dput(temp);
+ else
+ pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+ /*
+ * Check if upper/work fs supports trusted.overlay.* xattr
+ */
+ err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
+ if (err) {
+ ofs->noxattr = true;
+ pr_warn("overlayfs: upper fs does not support xattr.\n");
+ } else {
+ vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
+ }
+
+ /* Check if upper/work fs supports file handles */
+ if (ofs->config.index &&
+ !ovl_can_decode_fh(ofs->workdir->d_sb)) {
+ ofs->config.index = false;
+ pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+ }
+
+ return 0;
+}
+
+static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
+{
+ int err;
+ struct path workpath = { };
+
+ err = ovl_mount_dir(ofs->config.workdir, &workpath);
if (err)
- goto out_free_config;
+ goto out;
err = -EINVAL;
- if (!ufs->config.lowerdir) {
- if (!silent)
- pr_err("overlayfs: missing 'lowerdir'\n");
- goto out_free_config;
+ if (upperpath->mnt != workpath.mnt) {
+ pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
+ goto out;
+ }
+ if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
+ pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
+ goto out;
}
- sb->s_stack_depth = 0;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- if (ufs->config.upperdir) {
- if (!ufs->config.workdir) {
- pr_err("overlayfs: missing 'workdir'\n");
- goto out_free_config;
- }
+ err = -EBUSY;
+ if (ovl_inuse_trylock(workpath.dentry)) {
+ ofs->workdir_locked = true;
+ } else if (ofs->config.index) {
+ pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
+ goto out;
+ } else {
+ pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ }
- err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
- if (err)
- goto out_free_config;
+ ofs->workbasedir = dget(workpath.dentry);
+ err = ovl_make_workdir(ofs, &workpath);
+ if (err)
+ goto out;
- /* Upper fs should not be r/o */
- if (sb_rdonly(upperpath.mnt->mnt_sb)) {
- pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
- err = -EINVAL;
- goto out_put_upperpath;
- }
+ err = 0;
+out:
+ path_put(&workpath);
- err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir);
- if (err)
- goto out_put_upperpath;
-
- err = -EBUSY;
- if (ovl_inuse_trylock(upperpath.dentry)) {
- ufs->upperdir_locked = true;
- } else if (ufs->config.index) {
- pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
- goto out_put_upperpath;
- } else {
- pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
- }
+ return err;
+}
+
+static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
+ struct path *upperpath)
+{
+ int err;
- err = ovl_mount_dir(ufs->config.workdir, &workpath);
+ /* Verify lower root is upper root origin */
+ err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
+ false, true);
+ if (err) {
+ pr_err("overlayfs: failed to verify upper root origin\n");
+ goto out;
+ }
+
+ ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
+ if (ofs->indexdir) {
+ /* Verify upper root is index dir origin */
+ err = ovl_verify_origin(ofs->indexdir, upperpath->dentry,
+ true, true);
if (err)
- goto out_unlock_upperdentry;
+ pr_err("overlayfs: failed to verify index dir origin\n");
- err = -EINVAL;
- if (upperpath.mnt != workpath.mnt) {
- pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
- goto out_put_workpath;
- }
- if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
- pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
- goto out_put_workpath;
+ /* Cleanup bad/stale/orphan index entries */
+ if (!err)
+ err = ovl_indexdir_cleanup(ofs->indexdir,
+ ofs->upper_mnt,
+ oe->lowerstack,
+ oe->numlower);
+ }
+ if (err || !ofs->indexdir)
+ pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+
+out:
+ return err;
+}
+
+static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
+ unsigned int numlower)
+{
+ int err;
+ unsigned int i;
+
+ err = -ENOMEM;
+ ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
+ GFP_KERNEL);
+ if (ofs->lower_layers == NULL)
+ goto out;
+ for (i = 0; i < numlower; i++) {
+ struct vfsmount *mnt;
+ dev_t dev;
+
+ err = get_anon_bdev(&dev);
+ if (err) {
+ pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+ goto out;
}
- err = -EBUSY;
- if (ovl_inuse_trylock(workpath.dentry)) {
- ufs->workdir_locked = true;
- } else if (ufs->config.index) {
- pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
- goto out_put_workpath;
- } else {
- pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ mnt = clone_private_mount(&stack[i]);
+ err = PTR_ERR(mnt);
+ if (IS_ERR(mnt)) {
+ pr_err("overlayfs: failed to clone lowerpath\n");
+ free_anon_bdev(dev);
+ goto out;
}
+ /*
+ * Make lower layers R/O. That way fchmod/fchown on lower file
+ * will fail instead of modifying lower fs.
+ */
+ mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
+
+ ofs->lower_layers[ofs->numlower].mnt = mnt;
+ ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
+ ofs->numlower++;
- ufs->workbasedir = workpath.dentry;
- sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
+ /* Check if all lower layers are on same sb */
+ if (i == 0)
+ ofs->same_sb = mnt->mnt_sb;
+ else if (ofs->same_sb != mnt->mnt_sb)
+ ofs->same_sb = NULL;
}
+ err = 0;
+out:
+ return err;
+}
+
+static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
+ struct ovl_fs *ofs)
+{
+ int err;
+ char *lowertmp, *lower;
+ struct path *stack = NULL;
+ unsigned int stacklen, numlower = 0, i;
+ bool remote = false;
+ struct ovl_entry *oe;
+
err = -ENOMEM;
- lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
+ lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
if (!lowertmp)
- goto out_unlock_workdentry;
+ goto out_err;
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
if (stacklen > OVL_MAX_STACK) {
pr_err("overlayfs: too many lower directories, limit is %d\n",
OVL_MAX_STACK);
- goto out_free_lowertmp;
- } else if (!ufs->config.upperdir && stacklen == 1) {
+ goto out_err;
+ } else if (!ofs->config.upperdir && stacklen == 1) {
pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
- goto out_free_lowertmp;
+ goto out_err;
}
err = -ENOMEM;
stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
if (!stack)
- goto out_free_lowertmp;
+ goto out_err;
err = -EINVAL;
lower = lowertmp;
for (numlower = 0; numlower < stacklen; numlower++) {
- err = ovl_lower_dir(lower, &stack[numlower], ufs,
+ err = ovl_lower_dir(lower, &stack[numlower], ofs,
&sb->s_stack_depth, &remote);
if (err)
- goto out_put_lowerpath;
+ goto out_err;
lower = strchr(lower, '\0') + 1;
}
@@ -957,190 +1107,144 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_stack_depth++;
if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
pr_err("overlayfs: maximum fs stacking depth exceeded\n");
- goto out_put_lowerpath;
+ goto out_err;
}
- if (ufs->config.upperdir) {
- ufs->upper_mnt = clone_private_mount(&upperpath);
- err = PTR_ERR(ufs->upper_mnt);
- if (IS_ERR(ufs->upper_mnt)) {
- pr_err("overlayfs: failed to clone upperpath\n");
- goto out_put_lowerpath;
- }
+ err = ovl_get_lower_layers(ofs, stack, numlower);
+ if (err)
+ goto out_err;
- /* Don't inherit atime flags */
- ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
+ err = -ENOMEM;
+ oe = ovl_alloc_entry(numlower);
+ if (!oe)
+ goto out_err;
- sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
+ for (i = 0; i < numlower; i++) {
+ oe->lowerstack[i].dentry = dget(stack[i].dentry);
+ oe->lowerstack[i].layer = &ofs->lower_layers[i];
+ }
- ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry,
- OVL_WORKDIR_NAME, false);
- /*
- * Upper should support d_type, else whiteouts are visible.
- * Given workdir and upper are on same fs, we can do
- * iterate_dir() on workdir. This check requires successful
- * creation of workdir in previous step.
- */
- if (ufs->workdir) {
- struct dentry *temp;
-
- err = ovl_check_d_type_supported(&workpath);
- if (err < 0)
- goto out_put_workdir;
-
- /*
- * We allowed this configuration and don't want to
- * break users over kernel upgrade. So warn instead
- * of erroring out.
- */
- if (!err)
- pr_warn("overlayfs: upper fs needs to support d_type.\n");
-
- /* Check if upper/work fs supports O_TMPFILE */
- temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
- ufs->tmpfile = !IS_ERR(temp);
- if (ufs->tmpfile)
- dput(temp);
- else
- pr_warn("overlayfs: upper fs does not support tmpfile.\n");
-
- /*
- * Check if upper/work fs supports trusted.overlay.*
- * xattr
- */
- err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE,
- "0", 1, 0);
- if (err) {
- ufs->noxattr = true;
- pr_warn("overlayfs: upper fs does not support xattr.\n");
- } else {
- vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
- }
+ if (remote)
+ sb->s_d_op = &ovl_reval_dentry_operations;
+ else
+ sb->s_d_op = &ovl_dentry_operations;
- /* Check if upper/work fs supports file handles */
- if (ufs->config.index &&
- !ovl_can_decode_fh(ufs->workdir->d_sb)) {
- ufs->config.index = false;
- pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
- }
- }
- }
+out:
+ for (i = 0; i < numlower; i++)
+ path_put(&stack[i]);
+ kfree(stack);
+ kfree(lowertmp);
+
+ return oe;
+
+out_err:
+ oe = ERR_PTR(err);
+ goto out;
+}
+
+static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct path upperpath = { };
+ struct dentry *root_dentry;
+ struct ovl_entry *oe;
+ struct ovl_fs *ofs;
+ struct cred *cred;
+ int err;
err = -ENOMEM;
- ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL);
- if (ufs->lower_mnt == NULL)
- goto out_put_workdir;
- for (i = 0; i < numlower; i++) {
- struct vfsmount *mnt = clone_private_mount(&stack[i]);
+ ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+ if (!ofs)
+ goto out;
- err = PTR_ERR(mnt);
- if (IS_ERR(mnt)) {
- pr_err("overlayfs: failed to clone lowerpath\n");
- goto out_put_lower_mnt;
- }
- /*
- * Make lower_mnt R/O. That way fchmod/fchown on lower file
- * will fail instead of modifying lower fs.
- */
- mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
+ ofs->creator_cred = cred = prepare_creds();
+ if (!cred)
+ goto out_err;
- ufs->lower_mnt[ufs->numlower] = mnt;
- ufs->numlower++;
+ ofs->config.redirect_dir = ovl_redirect_dir_def;
+ ofs->config.index = ovl_index_def;
+ err = ovl_parse_opt((char *) data, &ofs->config);
+ if (err)
+ goto out_err;
- /* Check if all lower layers are on same sb */
- if (i == 0)
- ufs->same_sb = mnt->mnt_sb;
- else if (ufs->same_sb != mnt->mnt_sb)
- ufs->same_sb = NULL;
+ err = -EINVAL;
+ if (!ofs->config.lowerdir) {
+ if (!silent)
+ pr_err("overlayfs: missing 'lowerdir'\n");
+ goto out_err;
}
- /* If the upper fs is nonexistent, we mark overlayfs r/o too */
- if (!ufs->upper_mnt)
- sb->s_flags |= MS_RDONLY;
- else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
- ufs->same_sb = NULL;
-
- if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
- /* Verify lower root is upper root origin */
- err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
- stack[0].dentry, false, true);
- if (err) {
- pr_err("overlayfs: failed to verify upper root origin\n");
- goto out_put_lower_mnt;
+ sb->s_stack_depth = 0;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ if (ofs->config.upperdir) {
+ if (!ofs->config.workdir) {
+ pr_err("overlayfs: missing 'workdir'\n");
+ goto out_err;
}
- ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
- OVL_INDEXDIR_NAME, true);
- if (ufs->indexdir) {
- /* Verify upper root is index dir origin */
- err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
- upperpath.dentry, true, true);
- if (err)
- pr_err("overlayfs: failed to verify index dir origin\n");
+ err = ovl_get_upper(ofs, &upperpath);
+ if (err)
+ goto out_err;
- /* Cleanup bad/stale/orphan index entries */
- if (!err)
- err = ovl_indexdir_cleanup(ufs->indexdir,
- ufs->upper_mnt,
- stack, numlower);
- }
- if (err || !ufs->indexdir)
- pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+ err = ovl_get_workdir(ofs, &upperpath);
if (err)
- goto out_put_indexdir;
+ goto out_err;
+
+ if (!ofs->workdir)
+ sb->s_flags |= SB_RDONLY;
+
+ sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
+ sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
+
}
+ oe = ovl_get_lowerstack(sb, ofs);
+ err = PTR_ERR(oe);
+ if (IS_ERR(oe))
+ goto out_err;
- /* Show index=off/on in /proc/mounts for any of the reasons above */
- if (!ufs->indexdir)
- ufs->config.index = false;
+ /* If the upper fs is nonexistent, we mark overlayfs r/o too */
+ if (!ofs->upper_mnt)
+ sb->s_flags |= SB_RDONLY;
+ else if (ofs->upper_mnt->mnt_sb != ofs->same_sb)
+ ofs->same_sb = NULL;
- if (remote)
- sb->s_d_op = &ovl_reval_dentry_operations;
- else
- sb->s_d_op = &ovl_dentry_operations;
+ if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
+ err = ovl_get_indexdir(ofs, oe, &upperpath);
+ if (err)
+ goto out_free_oe;
- err = -ENOMEM;
- ufs->creator_cred = cred = prepare_creds();
- if (!cred)
- goto out_put_indexdir;
+ if (!ofs->indexdir)
+ sb->s_flags |= SB_RDONLY;
+ }
+
+ /* Show index=off/on in /proc/mounts for any of the reasons above */
+ if (!ofs->indexdir)
+ ofs->config.index = false;
/* Never override disk quota limits or use reserved space */
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
- err = -ENOMEM;
- oe = ovl_alloc_entry(numlower);
- if (!oe)
- goto out_put_cred;
-
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
sb->s_op = &ovl_super_operations;
sb->s_xattr = ovl_xattr_handlers;
- sb->s_fs_info = ufs;
- sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
+ sb->s_fs_info = ofs;
+ sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
+ err = -ENOMEM;
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
if (!root_dentry)
goto out_free_oe;
mntput(upperpath.mnt);
- for (i = 0; i < numlower; i++)
- mntput(stack[i].mnt);
- mntput(workpath.mnt);
- kfree(lowertmp);
-
if (upperpath.dentry) {
oe->has_upper = true;
if (ovl_is_impuredir(upperpath.dentry))
ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
}
- for (i = 0; i < numlower; i++) {
- oe->lowerstack[i].dentry = stack[i].dentry;
- oe->lowerstack[i].mnt = ufs->lower_mnt[i];
- }
- kfree(stack);
root_dentry->d_fsdata = oe;
+ /* Root is always merge -> can have whiteouts */
+ ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
ovl_dentry_lower(root_dentry));
@@ -1149,39 +1253,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
return 0;
out_free_oe:
+ ovl_entry_stack_free(oe);
kfree(oe);
-out_put_cred:
- put_cred(ufs->creator_cred);
-out_put_indexdir:
- dput(ufs->indexdir);
-out_put_lower_mnt:
- for (i = 0; i < ufs->numlower; i++)
- mntput(ufs->lower_mnt[i]);
- kfree(ufs->lower_mnt);
-out_put_workdir:
- dput(ufs->workdir);
- mntput(ufs->upper_mnt);
-out_put_lowerpath:
- for (i = 0; i < numlower; i++)
- path_put(&stack[i]);
- kfree(stack);
-out_free_lowertmp:
- kfree(lowertmp);
-out_unlock_workdentry:
- if (ufs->workdir_locked)
- ovl_inuse_unlock(workpath.dentry);
-out_put_workpath:
- path_put(&workpath);
-out_unlock_upperdentry:
- if (ufs->upperdir_locked)
- ovl_inuse_unlock(upperpath.dentry);
-out_put_upperpath:
+out_err:
path_put(&upperpath);
-out_free_config:
- kfree(ufs->config.lowerdir);
- kfree(ufs->config.upperdir);
- kfree(ufs->config.workdir);
- kfree(ufs);
+ ovl_free_fs(ofs);
out:
return err;
}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index b9b239fa5cfd..d6bb1c9f5e7a 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -17,7 +17,6 @@
#include <linux/namei.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
int ovl_want_write(struct dentry *dentry)
{
@@ -125,7 +124,12 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
{
struct ovl_entry *oe = dentry->d_fsdata;
- *path = oe->numlower ? oe->lowerstack[0] : (struct path) { };
+ if (oe->numlower) {
+ path->mnt = oe->lowerstack[0].layer->mnt;
+ path->dentry = oe->lowerstack[0].dentry;
+ } else {
+ *path = (struct path) { };
+ }
}
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
@@ -329,6 +333,19 @@ void ovl_copy_up_end(struct dentry *dentry)
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
+bool ovl_check_origin_xattr(struct dentry *dentry)
+{
+ int res;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+
+ /* Zero size value means "copied up but origin unknown" */
+ if (res >= 0)
+ return true;
+
+ return false;
+}
+
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
{
int res;
diff --git a/fs/pipe.c b/fs/pipe.c
index 349c9d56d4b3..6d98566201ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1018,13 +1018,19 @@ const struct file_operations pipefifo_fops = {
/*
* Currently we rely on the pipe array holding a power-of-2 number
- * of pages.
+ * of pages. Returns 0 on error.
*/
-static inline unsigned int round_pipe_size(unsigned int size)
+unsigned int round_pipe_size(unsigned int size)
{
unsigned long nr_pages;
+ if (size < pipe_min_size)
+ size = pipe_min_size;
+
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (nr_pages == 0)
+ return 0;
+
return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
}
@@ -1040,6 +1046,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
long ret = 0;
size = round_pipe_size(arg);
+ if (size == 0)
+ return -EINVAL;
nr_pages = size >> PAGE_SHIFT;
if (!nr_pages)
@@ -1117,20 +1125,13 @@ out_revert_acct:
}
/*
- * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
+ * This should work even if CONFIG_PROC_FS isn't set, as proc_dopipe_max_size
* will return an error.
*/
int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
size_t *lenp, loff_t *ppos)
{
- int ret;
-
- ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
- if (ret < 0 || !write)
- return ret;
-
- pipe_max_size = round_pipe_size(pipe_max_size);
- return ret;
+ return proc_dopipe_max_size(table, write, buf, lenp, ppos);
}
/*
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index f7456c4e7d0f..ead487e80510 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -21,6 +21,7 @@ proc-y += loadavg.o
proc-y += meminfo.o
proc-y += stat.o
proc-y += uptime.o
+proc-y += util.o
proc-y += version.o
proc-y += softirqs.o
proc-y += namespaces.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6f6fc1672ad1..79375fc115d2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -366,6 +366,11 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
cpumask_pr_args(&task->cpus_allowed));
}
+static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
+{
+ seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state);
+}
+
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -376,6 +381,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
if (mm) {
task_mem(m, mm);
+ task_core_dumping(m, mm);
mmput(mm);
}
task_sig(m, task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d357b2ea6cb..28fa85276eec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -443,8 +443,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
- seq_printf(m, "[<%pK>] %pB\n",
- (void *)entries[i], (void *)entries[i]);
+ seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
}
unlock_trace(task);
}
@@ -1682,7 +1681,7 @@ const struct inode_operations proc_pid_link_inode_operations = {
/* building an inode */
-void task_dump_owner(struct task_struct *task, mode_t mode,
+void task_dump_owner(struct task_struct *task, umode_t mode,
kuid_t *ruid, kgid_t *rgid)
{
/* Depending on the state of dumpable compute who should own a
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index e0f867cd8553..96f1087e372c 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -1,12 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpufreq.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+__weak void arch_freq_prepare_all(void)
+{
+}
+
extern const struct seq_operations cpuinfo_op;
static int cpuinfo_open(struct inode *inode, struct file *file)
{
+ arch_freq_prepare_all();
return seq_open(file, &cpuinfo_op);
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 225f541f7078..dd0f82622427 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -483,7 +483,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
/* User space would break if executables or devices appear on proc */
s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
- s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
+ s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index a34195e92b20..4a67188c8d74 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -100,31 +100,10 @@ static inline struct task_struct *get_proc_task(struct inode *inode)
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
-void task_dump_owner(struct task_struct *task, mode_t mode,
+void task_dump_owner(struct task_struct *task, umode_t mode,
kuid_t *ruid, kgid_t *rgid);
-static inline unsigned name_to_int(const struct qstr *qstr)
-{
- const char *name = qstr->name;
- int len = qstr->len;
- unsigned n = 0;
-
- if (len > 1 && *name == '0')
- goto out;
- while (len-- > 0) {
- unsigned c = *name++ - '0';
- if (c > 9)
- goto out;
- if (n >= (~0U-9)/10)
- goto out;
- n *= 10;
- n += c;
- }
- return n;
-out:
- return ~0U;
-}
-
+unsigned name_to_int(const struct qstr *qstr);
/*
* Offset of the first process in the /proc root directory..
*/
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 9bc5c58c00ee..a000d7547479 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -24,7 +24,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
nr_running(), nr_threads,
- task_active_pid_ns(current)->last_pid);
+ idr_get_cursor(&task_active_pid_ns(current)->idr));
return 0;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 4e42aba97f2e..ede8e64974be 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -91,7 +91,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
{
struct pid_namespace *ns;
- if (flags & MS_KERNMOUNT) {
+ if (flags & SB_KERNMOUNT) {
ns = data;
data = NULL;
} else {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 875231c36cb3..339e4c1c044d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -661,6 +661,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_ACCOUNT)] = "ac",
[ilog2(VM_NORESERVE)] = "nr",
[ilog2(VM_HUGETLB)] = "ht",
+ [ilog2(VM_SYNC)] = "sf",
[ilog2(VM_ARCH_1)] = "ar",
[ilog2(VM_WIPEONFORK)] = "wf",
[ilog2(VM_DONTDUMP)] = "dd",
diff --git a/fs/proc/util.c b/fs/proc/util.c
new file mode 100644
index 000000000000..b161cfa0f9fa
--- /dev/null
+++ b/fs/proc/util.c
@@ -0,0 +1,23 @@
+#include <linux/dcache.h>
+
+unsigned name_to_int(const struct qstr *qstr)
+{
+ const char *name = qstr->name;
+ int len = qstr->len;
+ unsigned n = 0;
+
+ if (len > 1 && *name == '0')
+ goto out;
+ do {
+ unsigned c = *name++ - '0';
+ if (c > 9)
+ goto out;
+ if (n >= (~0U-9)/10)
+ goto out;
+ n *= 10;
+ n += c;
+ } while (--len > 0);
+ return n;
+out:
+ return ~0U;
+}
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 7b635d173213..b786840facd9 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -45,10 +45,10 @@ struct proc_fs_info {
static int show_sb_opts(struct seq_file *m, struct super_block *sb)
{
static const struct proc_fs_info fs_info[] = {
- { MS_SYNCHRONOUS, ",sync" },
- { MS_DIRSYNC, ",dirsync" },
- { MS_MANDLOCK, ",mand" },
- { MS_LAZYTIME, ",lazytime" },
+ { SB_SYNCHRONOUS, ",sync" },
+ { SB_DIRSYNC, ",dirsync" },
+ { SB_MANDLOCK, ",mand" },
+ { SB_LAZYTIME, ",lazytime" },
{ 0, NULL }
};
const struct proc_fs_info *fs_infop;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 086e491faf04..691032107f8c 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -61,7 +61,7 @@ MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content "
static int pstore_new_entry;
-static void pstore_timefunc(unsigned long);
+static void pstore_timefunc(struct timer_list *);
static DEFINE_TIMER(pstore_timer, pstore_timefunc);
static void pstore_dowork(struct work_struct *);
@@ -651,7 +651,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
return -EINVAL;
record->buf = memdup_user(buf, record->size);
- if (unlikely(IS_ERR(record->buf))) {
+ if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
}
@@ -890,7 +890,7 @@ static void pstore_dowork(struct work_struct *work)
pstore_get_records(1);
}
-static void pstore_timefunc(unsigned long dummy)
+static void pstore_timefunc(struct timer_list *unused)
{
if (pstore_new_entry) {
pstore_new_entry = 0;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 3a67cfb142d8..3d46fe302fcb 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -47,7 +47,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
sync_filesystem(sb);
qs = qnx4_sb(sb);
qs->Version = QNX4_VERSION;
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
@@ -199,7 +199,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
s->s_op = &qnx4_sops;
s->s_magic = QNX4_SUPER_MAGIC;
- s->s_flags |= MS_RDONLY; /* Yup, read-only yet */
+ s->s_flags |= SB_RDONLY; /* Yup, read-only yet */
/* Check the superblock signature. Since the qnx4 code is
dangerous, we should leave as quickly as possible
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 1192422a1c56..4aeb26bcb4d0 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -56,7 +56,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root)
static int qnx6_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
@@ -427,7 +427,7 @@ mmi_success:
}
s->s_op = &qnx6_sops;
s->s_magic = QNX6_SUPER_MAGIC;
- s->s_flags |= MS_RDONLY; /* Yup, read-only yet */
+ s->s_flags |= SB_RDONLY; /* Yup, read-only yet */
/* ease the later tree level calculations */
sbi = QNX6_SB(s);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 39f1b0b0c76f..020c597ef9b6 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -941,12 +941,13 @@ static int dqinit_needed(struct inode *inode, int type)
}
/* This routine is guarded by s_umount semaphore */
-static void add_dquot_ref(struct super_block *sb, int type)
+static int add_dquot_ref(struct super_block *sb, int type)
{
struct inode *inode, *old_inode = NULL;
#ifdef CONFIG_QUOTA_DEBUG
int reserved = 0;
#endif
+ int err = 0;
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -966,7 +967,11 @@ static void add_dquot_ref(struct super_block *sb, int type)
reserved = 1;
#endif
iput(old_inode);
- __dquot_initialize(inode, type);
+ err = __dquot_initialize(inode, type);
+ if (err) {
+ iput(inode);
+ goto out;
+ }
/*
* We hold a reference to 'inode' so it couldn't have been
@@ -981,7 +986,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
}
spin_unlock(&sb->s_inode_list_lock);
iput(old_inode);
-
+out:
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
quota_error(sb, "Writes happened before quota was turned on "
@@ -989,6 +994,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
"Please run quotacheck(8)");
}
#endif
+ return err;
}
/*
@@ -2379,10 +2385,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
dqopt->flags |= dquot_state_flag(flags, type);
spin_unlock(&dq_state_lock);
- add_dquot_ref(sb, type);
-
- return 0;
+ error = add_dquot_ref(sb, type);
+ if (error)
+ dquot_disable(sb, type, flags);
+ return error;
out_file_init:
dqopt->files[type] = NULL;
iput(inode);
@@ -2985,7 +2992,8 @@ static int __init dquot_init(void)
pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld,"
" %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order));
- register_shrinker(&dqcache_shrinker);
+ if (register_shrinker(&dqcache_shrinker))
+ panic("Cannot register dquot shrinker");
return 0;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 0046d72efe94..f8547b82dfb3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -635,27 +635,6 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
return ret;
}
-/*
- * Reduce an iovec's length in-place. Return the resulting number of segments
- */
-unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
-{
- unsigned long seg = 0;
- size_t len = 0;
-
- while (seg < nr_segs) {
- seg++;
- if (len + iov->iov_len >= to) {
- iov->iov_len = to - len;
- break;
- }
- len += iov->iov_len;
- iov++;
- }
- return seg;
-}
-EXPORT_SYMBOL(iov_shorten);
-
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, int type, rwf_t flags)
{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 11a48affa882..b13fc024d2ee 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2106,7 +2106,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
journal_end(th);
goto out_inserted_sd;
}
- } else if (inode->i_sb->s_flags & MS_POSIXACL) {
+ } else if (inode->i_sb->s_flags & SB_POSIXACL) {
reiserfs_warning(inode->i_sb, "jdm-13090",
"ACLs aren't enabled in the fs, "
"but vfs thinks they are!");
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 69ff280bdfe8..70057359fbaf 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1960,7 +1960,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
/*
* Cancel flushing of old commits. Note that neither of these works
* will be requeued because superblock is being shutdown and doesn't
- * have MS_ACTIVE set.
+ * have SB_ACTIVE set.
*/
reiserfs_cancel_old_flush(sb);
/* wait for all commits to finish */
@@ -4302,7 +4302,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
* Avoid queueing work when sb is being shut down. Transaction
* will be flushed on journal shutdown.
*/
- if (sb->s_flags & MS_ACTIVE)
+ if (sb->s_flags & SB_ACTIVE)
queue_delayed_work(REISERFS_SB(sb)->commit_wq,
&journal->j_work, HZ / 10);
}
@@ -4393,7 +4393,7 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)
if (!journal->j_errno)
journal->j_errno = errno;
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
set_bit(J_ABORTED, &journal->j_state);
#ifdef CONFIG_REISERFS_CHECK
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 64f49cafbc5b..7e288d97adcb 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -390,7 +390,7 @@ void __reiserfs_error(struct super_block *sb, const char *id,
return;
reiserfs_info(sb, "Remounting filesystem read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
reiserfs_abort_journal(sb, -EIO);
}
@@ -409,7 +409,7 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id,
error_buf);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
reiserfs_abort_journal(sb, errno);
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 5464ec517702..1fc934d24459 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -121,7 +121,7 @@ void reiserfs_schedule_old_flush(struct super_block *s)
* Avoid scheduling flush when sb is being shut down. It can race
* with journal shutdown and free still queued delayed work.
*/
- if (sb_rdonly(s) || !(s->s_flags & MS_ACTIVE))
+ if (sb_rdonly(s) || !(s->s_flags & SB_ACTIVE))
return;
spin_lock(&sbi->old_work_lock);
@@ -252,11 +252,11 @@ static int finish_unfinished(struct super_block *s)
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
- if (s->s_flags & MS_ACTIVE) {
+ if (s->s_flags & SB_ACTIVE) {
ms_active_set = 0;
} else {
ms_active_set = 1;
- s->s_flags |= MS_ACTIVE;
+ s->s_flags |= SB_ACTIVE;
}
/* Turn on quotas so that they are updated correctly */
for (i = 0; i < REISERFS_MAXQUOTAS; i++) {
@@ -411,7 +411,7 @@ static int finish_unfinished(struct super_block *s)
reiserfs_write_lock(s);
if (ms_active_set)
/* Restore the flag back */
- s->s_flags &= ~MS_ACTIVE;
+ s->s_flags &= ~SB_ACTIVE;
#endif
pathrelse(&path);
if (done)
@@ -1521,7 +1521,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
goto out_err_unlock;
}
- if (*mount_flags & MS_RDONLY) {
+ if (*mount_flags & SB_RDONLY) {
reiserfs_write_unlock(s);
reiserfs_xattr_init(s, *mount_flags);
/* remount read-only */
@@ -1567,7 +1567,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
/* now it is safe to call journal_begin */
- s->s_flags &= ~MS_RDONLY;
+ s->s_flags &= ~SB_RDONLY;
err = journal_begin(&th, s, 10);
if (err)
goto out_err_unlock;
@@ -1575,7 +1575,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
/* Mount a partition which is read-only, read-write */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
- s->s_flags &= ~MS_RDONLY;
+ s->s_flags &= ~SB_RDONLY;
set_sb_umount_state(rs, REISERFS_ERROR_FS);
if (!old_format_only(s))
set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
@@ -1590,7 +1590,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
goto out_err_unlock;
reiserfs_write_unlock(s);
- if (!(*mount_flags & MS_RDONLY)) {
+ if (!(*mount_flags & SB_RDONLY)) {
dquot_resume(s, -1);
reiserfs_write_lock(s);
finish_unfinished(s);
@@ -2055,7 +2055,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) {
SWARN(silent, s, "clm-7000",
"Detected readonly device, marking FS readonly");
- s->s_flags |= MS_RDONLY;
+ s->s_flags |= SB_RDONLY;
}
args.objectid = REISERFS_ROOT_OBJECTID;
args.dirid = REISERFS_ROOT_PARENT_OBJECTID;
@@ -2591,7 +2591,6 @@ out:
return err;
if (inode->i_size < off + len - towrite)
i_size_write(inode, off + len - towrite);
- inode->i_version++;
inode->i_mtime = inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
return len - towrite;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 46492fb37a4c..5dbf5324bdda 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -959,7 +959,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
/*
* We need to take a copy of the mount flags since things like
- * MS_RDONLY don't get set until *after* we're called.
+ * SB_RDONLY don't get set until *after* we're called.
* mount_flags != mount_options
*/
int reiserfs_xattr_init(struct super_block *s, int mount_flags)
@@ -971,7 +971,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
if (err)
goto error;
- if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
+ if (d_really_is_negative(privroot) && !(mount_flags & SB_RDONLY)) {
inode_lock(d_inode(s->s_root));
err = create_privroot(REISERFS_SB(s)->priv_root);
inode_unlock(d_inode(s->s_root));
@@ -999,11 +999,11 @@ error:
clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt);
}
- /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
+ /* The super_block SB_POSIXACL must mirror the (no)acl mount option. */
if (reiserfs_posixacl(s))
- s->s_flags |= MS_POSIXACL;
+ s->s_flags |= SB_POSIXACL;
else
- s->s_flags &= ~MS_POSIXACL;
+ s->s_flags &= ~SB_POSIXACL;
return err;
}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 0186fe6d39f3..8f06fd1f3d69 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -451,7 +451,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static int romfs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
@@ -502,7 +502,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_maxbytes = 0xFFFFFFFF;
sb->s_magic = ROMFS_MAGIC;
- sb->s_flags |= MS_RDONLY | MS_NOATIME;
+ sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_op = &romfs_super_ops;
#ifdef CONFIG_ROMFS_ON_MTD
diff --git a/fs/select.c b/fs/select.c
index 063067e606ca..6de493bb42a4 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -292,8 +292,7 @@ static int poll_select_copy_remaining(struct timespec64 *end_time,
void __user *p,
int timeval, int ret)
{
- struct timespec64 rts64;
- struct timespec rts;
+ struct timespec64 rts;
struct timeval rtv;
if (!p)
@@ -306,23 +305,22 @@ static int poll_select_copy_remaining(struct timespec64 *end_time,
if (!end_time->tv_sec && !end_time->tv_nsec)
return ret;
- ktime_get_ts64(&rts64);
- rts64 = timespec64_sub(*end_time, rts64);
- if (rts64.tv_sec < 0)
- rts64.tv_sec = rts64.tv_nsec = 0;
+ ktime_get_ts64(&rts);
+ rts = timespec64_sub(*end_time, rts);
+ if (rts.tv_sec < 0)
+ rts.tv_sec = rts.tv_nsec = 0;
- rts = timespec64_to_timespec(rts64);
if (timeval) {
if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
memset(&rtv, 0, sizeof(rtv));
- rtv.tv_sec = rts64.tv_sec;
- rtv.tv_usec = rts64.tv_nsec / NSEC_PER_USEC;
+ rtv.tv_sec = rts.tv_sec;
+ rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
if (!copy_to_user(p, &rtv, sizeof(rtv)))
return ret;
- } else if (!copy_to_user(p, &rts, sizeof(rts)))
+ } else if (!put_timespec64(&rts, p))
return ret;
/*
@@ -705,17 +703,15 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
const sigset_t __user *sigmask, size_t sigsetsize)
{
sigset_t ksigmask, sigsaved;
- struct timespec ts;
- struct timespec64 ts64, end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (get_timespec64(&ts, tsp))
return -EFAULT;
- ts64 = timespec_to_timespec64(ts);
to = &end_time;
- if (poll_select_set_timeout(to, ts64.tv_sec, ts64.tv_nsec))
+ if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
return -EINVAL;
}
@@ -1052,12 +1048,11 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
size_t, sigsetsize)
{
sigset_t ksigmask, sigsaved;
- struct timespec ts;
- struct timespec64 end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (get_timespec64(&ts, tsp))
return -EFAULT;
to = &end_time;
@@ -1103,10 +1098,10 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
static
-int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p,
+int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p,
int timeval, int ret)
{
- struct timespec ts;
+ struct timespec64 ts;
if (!p)
return ret;
@@ -1118,8 +1113,8 @@ int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p,
if (!end_time->tv_sec && !end_time->tv_nsec)
return ret;
- ktime_get_ts(&ts);
- ts = timespec_sub(*end_time, ts);
+ ktime_get_ts64(&ts);
+ ts = timespec64_sub(*end_time, ts);
if (ts.tv_sec < 0)
ts.tv_sec = ts.tv_nsec = 0;
@@ -1132,12 +1127,7 @@ int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p,
if (!copy_to_user(p, &rtv, sizeof(rtv)))
return ret;
} else {
- struct compat_timespec rts;
-
- rts.tv_sec = ts.tv_sec;
- rts.tv_nsec = ts.tv_nsec;
-
- if (!copy_to_user(p, &rts, sizeof(rts)))
+ if (!compat_put_timespec64(&ts, p))
return ret;
}
/*
@@ -1195,7 +1185,7 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
*/
static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
- struct timespec *end_time)
+ struct timespec64 *end_time)
{
fd_set_bits fds;
void *bits;
@@ -1268,7 +1258,7 @@ COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
struct compat_timeval __user *, tvp)
{
- struct timespec end_time, *to = NULL;
+ struct timespec64 end_time, *to = NULL;
struct compat_timeval tv;
int ret;
@@ -1312,14 +1302,12 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
compat_size_t sigsetsize)
{
- compat_sigset_t ss32;
sigset_t ksigmask, sigsaved;
- struct compat_timespec ts;
- struct timespec end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (compat_get_timespec64(&ts, tsp))
return -EFAULT;
to = &end_time;
@@ -1330,9 +1318,8 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
if (sigmask) {
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ if (get_compat_sigset(&ksigmask, sigmask))
return -EFAULT;
- sigset_from_compat(&ksigmask, &ss32);
sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
@@ -1381,14 +1368,12 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
unsigned int, nfds, struct compat_timespec __user *, tsp,
const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
{
- compat_sigset_t ss32;
sigset_t ksigmask, sigsaved;
- struct compat_timespec ts;
- struct timespec end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (compat_get_timespec64(&ts, tsp))
return -EFAULT;
to = &end_time;
@@ -1399,9 +1384,8 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
if (sigmask) {
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ if (get_compat_sigset(&ksigmask, sigmask))
return -EFAULT;
- sigset_from_compat(&ksigmask, &ss32);
sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 1c667af86da5..5f1ff8756595 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -313,15 +313,13 @@ COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
compat_size_t, sigsetsize,
int, flags)
{
- compat_sigset_t ss32;
sigset_t tmp;
sigset_t __user *ksigmask;
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ if (get_compat_sigset(&tmp, sigmask))
return -EFAULT;
- sigset_from_compat(&tmp, &ss32);
ksigmask = compat_alloc_user_space(sizeof(sigset_t));
if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
return -EFAULT;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index cf01e15a7b16..8a73b97217c8 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -195,7 +195,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
(u64) le64_to_cpu(sblk->id_table_start));
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
sb->s_op = &squashfs_super_ops;
err = -ENOMEM;
@@ -373,7 +373,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static int squashfs_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
diff --git a/fs/statfs.c b/fs/statfs.c
index c25dd9a26cc1..5b2a24f0f263 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -35,11 +35,11 @@ static int flags_by_mnt(int mnt_flags)
static int flags_by_sb(int s_flags)
{
int flags = 0;
- if (s_flags & MS_SYNCHRONOUS)
+ if (s_flags & SB_SYNCHRONOUS)
flags |= ST_SYNCHRONOUS;
- if (s_flags & MS_MANDLOCK)
+ if (s_flags & SB_MANDLOCK)
flags |= ST_MANDLOCK;
- if (s_flags & MS_RDONLY)
+ if (s_flags & SB_RDONLY)
flags |= ST_RDONLY;
return flags;
}
@@ -217,7 +217,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
return error;
}
-int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
+static int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
{
struct super_block *s = user_get_super(dev);
int err;
diff --git a/fs/super.c b/fs/super.c
index 994db21f59bf..d4e33e8f1e6f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -155,21 +155,19 @@ static void destroy_super_rcu(struct rcu_head *head)
schedule_work(&s->destroy_work);
}
-/**
- * destroy_super - frees a superblock
- * @s: superblock to free
- *
- * Frees a superblock.
- */
-static void destroy_super(struct super_block *s)
+/* Free a superblock that has never been seen by anyone */
+static void destroy_unused_super(struct super_block *s)
{
+ if (!s)
+ return;
+ up_write(&s->s_umount);
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
security_sb_free(s);
- WARN_ON(!list_empty(&s->s_mounts));
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
- call_rcu(&s->rcu, destroy_super_rcu);
+ /* no delays needed */
+ destroy_super_work(&s->destroy_work);
}
/**
@@ -257,7 +255,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
return s;
fail:
- destroy_super(s);
+ destroy_unused_super(s);
return NULL;
}
@@ -266,11 +264,17 @@ fail:
/*
* Drop a superblock's refcount. The caller must hold sb_lock.
*/
-static void __put_super(struct super_block *sb)
+static void __put_super(struct super_block *s)
{
- if (!--sb->s_count) {
- list_del_init(&sb->s_list);
- destroy_super(sb);
+ if (!--s->s_count) {
+ list_del_init(&s->s_list);
+ WARN_ON(s->s_dentry_lru.node);
+ WARN_ON(s->s_inode_lru.node);
+ WARN_ON(!list_empty(&s->s_mounts));
+ security_sb_free(s);
+ put_user_ns(s->s_user_ns);
+ kfree(s->s_subtype);
+ call_rcu(&s->rcu, destroy_super_rcu);
}
}
@@ -485,19 +489,12 @@ retry:
continue;
if (user_ns != old->s_user_ns) {
spin_unlock(&sb_lock);
- if (s) {
- up_write(&s->s_umount);
- destroy_super(s);
- }
+ destroy_unused_super(s);
return ERR_PTR(-EBUSY);
}
if (!grab_super(old))
goto retry;
- if (s) {
- up_write(&s->s_umount);
- destroy_super(s);
- s = NULL;
- }
+ destroy_unused_super(s);
return old;
}
}
@@ -512,8 +509,7 @@ retry:
err = set(s, data);
if (err) {
spin_unlock(&sb_lock);
- up_write(&s->s_umount);
- destroy_super(s);
+ destroy_unused_super(s);
return ERR_PTR(err);
}
s->s_type = type;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 20b8f82e115b..fb49510c5dcf 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -30,7 +30,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
void *ns;
bool new_sb;
- if (!(flags & MS_KERNMOUNT)) {
+ if (!(flags & SB_KERNMOUNT)) {
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
return ERR_PTR(-EPERM);
}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3c47b7d5d4cf..bec9f79adb25 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -63,7 +63,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)
sync_filesystem(sb);
if (sbi->s_forced_ro)
- *flags |= MS_RDONLY;
+ *flags |= SB_RDONLY;
return 0;
}
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 0d56e486b392..89765ddfb738 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -333,7 +333,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
/* set up enough so that it can read an inode */
sb->s_op = &sysv_sops;
if (sbi->s_forced_ro)
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
if (sbi->s_truncate)
sb->s_d_op = &sysv_dentry_operations;
root_inode = sysv_iget(sb, SYSV_ROOT_INO);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index a02aa59d1e24..dfe85069586e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1406,7 +1406,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time,
if (flags & S_MTIME)
inode->i_mtime = *time;
- if (!(inode->i_sb->s_flags & MS_LAZYTIME))
+ if (!(inode->i_sb->s_flags & SB_LAZYTIME))
iflags |= I_DIRTY_SYNC;
release = ui->dirty;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3be28900bf37..fe77e9625e84 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -84,7 +84,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
if (!c->ro_error) {
c->ro_error = 1;
c->no_chk_data_crc = 0;
- c->vfs_sb->s_flags |= MS_RDONLY;
+ c->vfs_sb->s_flags |= SB_RDONLY;
ubifs_warn(c, "switched to read-only mode, error %d", err);
dump_stack();
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 7503e7cdf870..0beb285b143d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -968,7 +968,7 @@ static int parse_standard_option(const char *option)
pr_notice("UBIFS: parse %s\n", option);
if (!strcmp(option, "sync"))
- return MS_SYNCHRONOUS;
+ return SB_SYNCHRONOUS;
return 0;
}
@@ -1160,8 +1160,8 @@ static int mount_ubifs(struct ubifs_info *c)
size_t sz;
c->ro_mount = !!sb_rdonly(c->vfs_sb);
- /* Suppress error messages while probing if MS_SILENT is set */
- c->probing = !!(c->vfs_sb->s_flags & MS_SILENT);
+ /* Suppress error messages while probing if SB_SILENT is set */
+ c->probing = !!(c->vfs_sb->s_flags & SB_SILENT);
err = init_constants_early(c);
if (err)
@@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
return err;
}
- if (c->ro_mount && !(*flags & MS_RDONLY)) {
+ if (c->ro_mount && !(*flags & SB_RDONLY)) {
if (c->ro_error) {
ubifs_msg(c, "cannot re-mount R/W due to prior errors");
return -EROFS;
@@ -1864,7 +1864,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
err = ubifs_remount_rw(c);
if (err)
return err;
- } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
+ } else if (!c->ro_mount && (*flags & SB_RDONLY)) {
if (c->ro_error) {
ubifs_msg(c, "cannot re-mount R/O due to prior errors");
return -EROFS;
@@ -2117,7 +2117,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
*/
ubi = open_ubi(name, UBI_READONLY);
if (IS_ERR(ubi)) {
- if (!(flags & MS_SILENT))
+ if (!(flags & SB_SILENT))
pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",
current->pid, name, (int)PTR_ERR(ubi));
return ERR_CAST(ubi);
@@ -2143,18 +2143,18 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
kfree(c);
/* A new mount point for already mounted UBIFS */
dbg_gen("this ubi volume is already mounted");
- if (!!(flags & MS_RDONLY) != c1->ro_mount) {
+ if (!!(flags & SB_RDONLY) != c1->ro_mount) {
err = -EBUSY;
goto out_deact;
}
} else {
- err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
+ err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0);
if (err)
goto out_deact;
/* We do not support atime */
- sb->s_flags |= MS_ACTIVE;
+ sb->s_flags |= SB_ACTIVE;
#ifndef CONFIG_UBIFS_ATIME_SUPPORT
- sb->s_flags |= MS_NOATIME;
+ sb->s_flags |= SB_NOATIME;
#else
ubifs_msg(c, "full atime support is enabled.");
#endif
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 63c7468147eb..5ee7af879cc4 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1201,7 +1201,7 @@ struct ubifs_debug_info;
* @need_recovery: %1 if the file-system needs recovery
* @replaying: %1 during journal replay
* @mounting: %1 while mounting
- * @probing: %1 while attempting to mount if MS_SILENT mount flag is set
+ * @probing: %1 while attempting to mount if SB_SILENT mount flag is set
* @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
* @replay_list: temporary list used during journal replay
* @replay_buds: list of buds to replay
@@ -1850,7 +1850,7 @@ __printf(2, 3)
void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...);
/*
* A conditional variant of 'ubifs_err()' which doesn't output anything
- * if probing (ie. MS_SILENT set).
+ * if probing (ie. SB_SILENT set).
*/
#define ubifs_errc(c, fmt, ...) \
do { \
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f80e0a0f24d3..f73239a9a97d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -650,7 +650,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
sync_filesystem(sb);
if (lvidiu) {
int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev);
- if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
+ if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & SB_RDONLY))
return -EACCES;
}
@@ -673,10 +673,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
sbi->s_dmode = uopt.dmode;
write_unlock(&sbi->s_cred_lock);
- if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out_unlock;
- if (*flags & MS_RDONLY)
+ if (*flags & SB_RDONLY)
udf_close_lvid(sb);
else
udf_open_lvid(sb);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index b5cd79065ef9..e727ee07dbe4 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -115,7 +115,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
@@ -205,7 +205,7 @@ do_more:
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
if (overflow) {
@@ -567,7 +567,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
@@ -688,7 +688,7 @@ cg_found:
succed:
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 916b4a428933..e1ef0f0a1353 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -112,7 +112,7 @@ void ufs_free_inode (struct inode * inode)
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
@@ -146,14 +146,14 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
unlock_buffer(bh);
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bh);
brelse(bh);
}
fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
UFSD("EXIT\n");
@@ -284,7 +284,7 @@ cg_found:
}
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
@@ -330,7 +330,7 @@ cg_found:
ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, ts.tv_nsec);
mark_buffer_dirty(bh);
unlock_buffer(bh);
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bh);
brelse(bh);
}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6440003f8ddc..4d497e9c6883 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -282,7 +282,7 @@ void ufs_error (struct super_block * sb, const char * function,
usb1->fs_clean = UFS_FSBAD;
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_mark_sb_dirty(sb);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
va_start(args, fmt);
vaf.fmt = fmt;
@@ -320,7 +320,7 @@ void ufs_panic (struct super_block * sb, const char * function,
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
pr_crit("panic (device %s): %s: %pV\n",
sb->s_id, function, &vaf);
va_end(args);
@@ -905,7 +905,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb_rdonly(sb)) {
if (!silent)
pr_info("ufstype=old is supported read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
break;
@@ -921,7 +921,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb_rdonly(sb)) {
if (!silent)
pr_info("ufstype=nextstep is supported read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
break;
@@ -937,7 +937,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb_rdonly(sb)) {
if (!silent)
pr_info("ufstype=nextstep-cd is supported read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
break;
@@ -953,7 +953,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb_rdonly(sb)) {
if (!silent)
pr_info("ufstype=openstep is supported read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
break;
@@ -968,7 +968,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb_rdonly(sb)) {
if (!silent)
pr_info("ufstype=hp is supported read-only\n");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
break;
default:
@@ -1125,21 +1125,21 @@ magic_found:
break;
case UFS_FSACTIVE:
pr_err("%s(): fs is active\n", __func__);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
break;
case UFS_FSBAD:
pr_err("%s(): fs is bad\n", __func__);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
break;
default:
pr_err("%s(): can't grok fs_clean 0x%x\n",
__func__, usb1->fs_clean);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
break;
}
} else {
pr_err("%s(): fs needs fsck\n", __func__);
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
}
/*
@@ -1328,7 +1328,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
return -EINVAL;
}
- if ((bool)(*mount_flags & MS_RDONLY) == sb_rdonly(sb)) {
+ if ((bool)(*mount_flags & SB_RDONLY) == sb_rdonly(sb)) {
UFS_SB(sb)->s_mount_opt = new_mount_opt;
mutex_unlock(&UFS_SB(sb)->s_lock);
return 0;
@@ -1337,7 +1337,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
/*
* fs was mouted as rw, remounting ro
*/
- if (*mount_flags & MS_RDONLY) {
+ if (*mount_flags & SB_RDONLY) {
ufs_put_super_internal(sb);
usb1->fs_time = cpu_to_fs32(sb, get_seconds());
if ((flags & UFS_ST_MASK) == UFS_ST_SUN
@@ -1346,7 +1346,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufs_set_fs_state(sb, usb1, usb3,
UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
ubh_mark_buffer_dirty (USPI_UBH(uspi));
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
} else {
/*
* fs was mounted as ro, remounting rw
@@ -1370,7 +1370,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
mutex_unlock(&UFS_SB(sb)->s_lock);
return -EPERM;
}
- sb->s_flags &= ~MS_RDONLY;
+ sb->s_flags &= ~SB_RDONLY;
#endif
}
UFS_SB(sb)->s_mount_opt = new_mount_opt;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 08df809e2315..1210f684d3c2 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents(
*done = true;
goto del_cursor;
}
- XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
+ XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
+ del_cursor);
new_startoff = got.br_startoff - offset_shift_fsb;
if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
@@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents(
goto del_cursor;
}
}
- XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
+ XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
+ del_cursor);
if (stop_fsb >= got.br_startoff + got.br_blockcount) {
error = -EIO;
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 343a94246f5b..89bf16b4d937 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -302,7 +302,7 @@ xfs_iext_rec_cmp(
xfs_fileoff_t offset)
{
uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
- u32 rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
+ uint32_t rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
if (rec_offset > offset)
return 1;
@@ -850,9 +850,9 @@ static void
xfs_iext_free_last_leaf(
struct xfs_ifork *ifp)
{
- ifp->if_u1.if_root = NULL;
ifp->if_height--;
kmem_free(ifp->if_u1.if_root);
+ ifp->if_u1.if_root = NULL;
}
void
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 1c90ec41e9df..c79a1616b79d 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -42,11 +42,6 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
-static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev)
-{
- return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
-}
-
/*
* Copy inode type and data and attr format specific information from the
* on-disk inode to the in-core inode and fork structures. For fifos, devices,
@@ -792,7 +787,8 @@ xfs_iflush_fork(
case XFS_DINODE_FMT_DEV:
if (iip->ili_fields & XFS_ILOG_DEV) {
ASSERT(whichfork == XFS_DATA_FORK);
- xfs_dinode_put_rdev(dip, sysv_encode_dev(VFS_I(ip)->i_rdev));
+ xfs_dinode_put_rdev(dip,
+ linux_to_xfs_dev_t(VFS_I(ip)->i_rdev));
}
break;
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 996f035ee205..349d9f8edb89 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -274,7 +274,7 @@ struct xfs_inode_log_format {
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
- u8 __pad[16]; /* unused */
+ uint8_t __pad[16]; /* unused */
} ilf_u;
int64_t ilf_blkno; /* blkno of inode buffer */
int32_t ilf_len; /* len of inode buffer */
@@ -295,7 +295,7 @@ struct xfs_inode_log_format_32 {
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
- u8 __pad[16]; /* unused */
+ uint8_t __pad[16]; /* unused */
} ilf_u;
int64_t ilf_blkno; /* blkno of inode buffer */
int32_t ilf_len; /* len of inode buffer */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 637b7a892313..f120fb20452f 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -318,8 +318,20 @@ xfs_scrub_dinode(
/* di_mode */
mode = be16_to_cpu(dip->di_mode);
- if (mode & ~(S_IALLUGO | S_IFMT))
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ /* mode is recognized */
+ break;
+ default:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ break;
+ }
/* v1/v2 fields */
switch (dip->di_version) {
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 8e58ba842946..3d9037eceaf1 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -107,7 +107,7 @@ xfs_scrub_quota_item(
unsigned long long rcount;
xfs_ino_t fs_icount;
- offset = id * qi->qi_dqperchunk;
+ offset = id / qi->qi_dqperchunk;
/*
* We fed $id and DQNEXT into the xfs_qm_dqget call, which means
@@ -207,7 +207,7 @@ xfs_scrub_quota(
xfs_dqid_t id = 0;
uint dqtype;
int nimaps;
- int error;
+ int error = 0;
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return -ENOENT;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a3eeaba156c5..21e2d70884e1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -399,7 +399,7 @@ xfs_map_blocks(
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + count > mp->m_super->s_maxbytes)
+ if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -896,13 +896,13 @@ xfs_writepage_map(
struct writeback_control *wbc,
struct inode *inode,
struct page *page,
- loff_t offset,
- uint64_t end_offset)
+ uint64_t end_offset)
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
ssize_t len = i_blocksize(inode);
+ uint64_t offset;
int error = 0;
int count = 0;
int uptodate = 1;
@@ -1146,7 +1146,7 @@ xfs_do_writepage(
end_offset = offset;
}
- return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
+ return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
redirty:
redirty_page_for_writepage(wbc, page);
@@ -1265,7 +1265,7 @@ xfs_map_trim_size(
if (mapping_size > size)
mapping_size = size;
if (offset < i_size_read(inode) &&
- offset + mapping_size >= i_size_read(inode)) {
+ (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
i_blocksize(inode));
@@ -1312,7 +1312,7 @@ xfs_get_blocks(
lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + size > mp->m_super->s_maxbytes)
+ if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index dd136f7275e4..e5fb008d75e8 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -389,7 +389,8 @@ xfs_bud_init(
int
xfs_bui_recover(
struct xfs_mount *mp,
- struct xfs_bui_log_item *buip)
+ struct xfs_bui_log_item *buip,
+ struct xfs_defer_ops *dfops)
{
int error = 0;
unsigned int bui_type;
@@ -404,9 +405,7 @@ xfs_bui_recover(
xfs_exntst_t state;
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
- struct xfs_defer_ops dfops;
struct xfs_bmbt_irec irec;
- xfs_fsblock_t firstfsb;
ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
@@ -464,7 +463,6 @@ xfs_bui_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- xfs_defer_init(&dfops, &firstfsb);
/* Process deferred bmap item. */
state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
@@ -479,16 +477,16 @@ xfs_bui_recover(
break;
default:
error = -EFSCORRUPTED;
- goto err_dfops;
+ goto err_inode;
}
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
+ error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type,
ip, whichfork, bmap->me_startoff,
bmap->me_startblock, &count, state);
if (error)
- goto err_dfops;
+ goto err_inode;
if (count > 0) {
ASSERT(type == XFS_BMAP_UNMAP);
@@ -496,16 +494,11 @@ xfs_bui_recover(
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
irec.br_state = state;
- error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
+ error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec);
if (error)
- goto err_dfops;
+ goto err_inode;
}
- /* Finish transaction, free inodes. */
- error = xfs_defer_finish(&tp, &dfops);
- if (error)
- goto err_dfops;
-
set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -513,8 +506,6 @@ xfs_bui_recover(
return error;
-err_dfops:
- xfs_defer_cancel(&dfops);
err_inode:
xfs_trans_cancel(tp);
if (ip) {
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index c867daae4a3c..24b354a2c836 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
struct xfs_bui_log_item *);
void xfs_bui_item_free(struct xfs_bui_log_item *);
void xfs_bui_release(struct xfs_bui_log_item *);
-int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip);
+int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip,
+ struct xfs_defer_ops *dfops);
#endif /* __XFS_BMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4db6e8d780f6..4c6e86d861fd 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1815,22 +1815,27 @@ xfs_alloc_buftarg(
btp->bt_daxdev = dax_dev;
if (xfs_setsize_buftarg_early(btp, bdev))
- goto error;
+ goto error_free;
if (list_lru_init(&btp->bt_lru))
- goto error;
+ goto error_free;
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
- goto error;
+ goto error_lru;
btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
- register_shrinker(&btp->bt_shrinker);
+ if (register_shrinker(&btp->bt_shrinker))
+ goto error_pcpu;
return btp;
-error:
+error_pcpu:
+ percpu_counter_destroy(&btp->bt_io_count);
+error_lru:
+ list_lru_destroy(&btp->bt_lru);
+error_free:
kmem_free(btp);
return NULL;
}
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index d57c2db64e59..f248708c10ff 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -970,14 +970,22 @@ xfs_qm_dqflush_done(
* holding the lock before removing the dquot from the AIL.
*/
if ((lip->li_flags & XFS_LI_IN_AIL) &&
- lip->li_lsn == qip->qli_flush_lsn) {
+ ((lip->li_lsn == qip->qli_flush_lsn) ||
+ (lip->li_flags & XFS_LI_FAILED))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->xa_lock);
- if (lip->li_lsn == qip->qli_flush_lsn)
+ if (lip->li_lsn == qip->qli_flush_lsn) {
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
- else
+ } else {
+ /*
+ * Clear the failed state since we are about to drop the
+ * flush lock
+ */
+ if (lip->li_flags & XFS_LI_FAILED)
+ xfs_clear_li_failed(lip);
spin_unlock(&ailp->xa_lock);
+ }
}
/*
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 2c7a1629e064..664dea105e76 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait(
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
+/*
+ * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
+ * have been failed during writeback
+ *
+ * this informs the AIL that the dquot is already flush locked on the next push,
+ * and acquires a hold on the buffer to ensure that it isn't reclaimed before
+ * dirty data makes it to disk.
+ */
+STATIC void
+xfs_dquot_item_error(
+ struct xfs_log_item *lip,
+ struct xfs_buf *bp)
+{
+ struct xfs_dquot *dqp;
+
+ dqp = DQUOT_ITEM(lip)->qli_dquot;
+ ASSERT(!completion_done(&dqp->q_flush));
+ xfs_set_li_failed(lip, bp);
+}
+
STATIC uint
xfs_qm_dquot_logitem_push(
struct xfs_log_item *lip,
@@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push(
__acquires(&lip->li_ailp->xa_lock)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- struct xfs_buf *bp = NULL;
+ struct xfs_buf *bp = lip->li_buf;
uint rval = XFS_ITEM_SUCCESS;
int error;
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
+ /*
+ * The buffer containing this item failed to be written back
+ * previously. Resubmit the buffer for IO
+ */
+ if (lip->li_flags & XFS_LI_FAILED) {
+ if (!xfs_buf_trylock(bp))
+ return XFS_ITEM_LOCKED;
+
+ if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+ rval = XFS_ITEM_FLUSHING;
+
+ xfs_buf_unlock(bp);
+ return rval;
+ }
+
if (!xfs_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
@@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_unlock = xfs_qm_dquot_logitem_unlock,
.iop_committed = xfs_qm_dquot_logitem_committed,
.iop_push = xfs_qm_dquot_logitem_push,
- .iop_committing = xfs_qm_dquot_logitem_committing
+ .iop_committing = xfs_qm_dquot_logitem_committing,
+ .iop_error = xfs_dquot_item_error
};
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 18146873a8b3..8601275cc5e6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -44,6 +44,7 @@
#include <linux/falloc.h>
#include <linux/pagevec.h>
#include <linux/backing-dev.h>
+#include <linux/mman.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -1045,7 +1046,11 @@ __xfs_filemap_fault(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
+ pfn_t pfn;
+
+ ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops);
+ if (ret & VM_FAULT_NEEDDSYNC)
+ ret = dax_finish_sync_fault(vmf, pe_size, pfn);
} else {
if (write_fault)
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
@@ -1090,37 +1095,16 @@ xfs_filemap_page_mkwrite(
}
/*
- * pfn_mkwrite was originally inteneded to ensure we capture time stamp
- * updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
- * to ensure we serialise the fault barrier in place.
+ * pfn_mkwrite was originally intended to ensure we capture time stamp updates
+ * on write faults. In reality, it needs to serialise against truncate and
+ * prepare memory for writing so handle is as standard write fault.
*/
static int
xfs_filemap_pfn_mkwrite(
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct xfs_inode *ip = XFS_I(inode);
- int ret = VM_FAULT_NOPAGE;
- loff_t size;
-
- trace_xfs_filemap_pfn_mkwrite(ip);
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
-
- /* check if the faulting page hasn't raced with truncate */
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (vmf->pgoff >= size)
- ret = VM_FAULT_SIGBUS;
- else if (IS_DAX(inode))
- ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
- sb_end_pagefault(inode->i_sb);
- return ret;
-
+ return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
}
static const struct vm_operations_struct xfs_file_vm_ops = {
@@ -1136,6 +1120,13 @@ xfs_file_mmap(
struct file *filp,
struct vm_area_struct *vma)
{
+ /*
+ * We don't support synchronous mappings for non-DAX files. At least
+ * until someone comes with a sensible use case.
+ */
+ if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
+ return -EOPNOTSUPP;
+
file_accessed(filp);
vma->vm_ops = &xfs_file_vm_ops;
if (IS_DAX(file_inode(filp)))
@@ -1154,6 +1145,7 @@ const struct file_operations xfs_file_operations = {
.compat_ioctl = xfs_file_compat_ioctl,
#endif
.mmap = xfs_file_mmap,
+ .mmap_supported_flags = MAP_SYNC,
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d8226f7a5dde..801274126648 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2357,6 +2357,7 @@ retry:
*/
if (ip->i_ino != inum + i) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ rcu_read_unlock();
continue;
}
}
@@ -2400,6 +2401,24 @@ retry:
}
/*
+ * Free any local-format buffers sitting around before we reset to
+ * extents format.
+ */
+static inline void
+xfs_ifree_local_data(
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xfs_ifork *ifp;
+
+ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+ return;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+}
+
+/*
* This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have
* no pages associated with it. This routine also assumes that
@@ -2436,6 +2455,9 @@ xfs_ifree(
if (error)
return error;
+ xfs_ifree_local_data(ip, XFS_DATA_FORK);
+ xfs_ifree_local_data(ip, XFS_ATTR_FORK);
+
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 18077e2189a9..33eb4fb2e3fd 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -34,6 +34,7 @@
#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_space.h"
+#include "xfs_inode_item.h"
#include "xfs_iomap.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
@@ -1089,6 +1090,10 @@ xfs_file_iomap_begin(
trace_xfs_iomap_found(ip, offset, length, 0, &imap);
}
+ if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
+ & ~XFS_ILOG_TIMESTAMP))
+ iomap->flags |= IOMAP_F_DIRTY;
+
xfs_bmbt_to_iomap(ip, iomap, &imap);
if (shared)
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 6282bfc1afa9..99562ec0de56 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -204,6 +204,16 @@ static inline kgid_t xfs_gid_to_kgid(uint32_t gid)
return make_kgid(&init_user_ns, gid);
}
+static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev)
+{
+ return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
+}
+
+static inline xfs_dev_t linux_to_xfs_dev_t(dev_t dev)
+{
+ return sysv_encode_dev(dev);
+}
+
/*
* Various platform dependent calls that don't fit anywhere else
*/
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 38d4227895ae..a503af96d780 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -781,17 +781,17 @@ xfs_log_mount_finish(
* something to an unlinked inode, the irele won't cause
* premature truncation and freeing of the inode, which results
* in log recovery failure. We have to evict the unreferenced
- * lru inodes after clearing MS_ACTIVE because we don't
+ * lru inodes after clearing SB_ACTIVE because we don't
* otherwise clean up the lru if there's a subsequent failure in
* xfs_mountfs, which leads to us leaking the inodes if nothing
* else (e.g. quotacheck) references the inodes before the
* mount failure occurs.
*/
- mp->m_super->s_flags |= MS_ACTIVE;
+ mp->m_super->s_flags |= SB_ACTIVE;
error = xlog_recover_finish(mp->m_log);
if (!error)
xfs_log_work_queue(mp);
- mp->m_super->s_flags &= ~MS_ACTIVE;
+ mp->m_super->s_flags &= ~SB_ACTIVE;
evict_inodes(mp->m_super);
/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 87b1c331f9eb..28d1abfe835e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -24,6 +24,7 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
+#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
@@ -4716,7 +4717,8 @@ STATIC int
xlog_recover_process_cui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip,
+ struct xfs_defer_ops *dfops)
{
struct xfs_cui_log_item *cuip;
int error;
@@ -4729,7 +4731,7 @@ xlog_recover_process_cui(
return 0;
spin_unlock(&ailp->xa_lock);
- error = xfs_cui_recover(mp, cuip);
+ error = xfs_cui_recover(mp, cuip, dfops);
spin_lock(&ailp->xa_lock);
return error;
@@ -4756,7 +4758,8 @@ STATIC int
xlog_recover_process_bui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip,
+ struct xfs_defer_ops *dfops)
{
struct xfs_bui_log_item *buip;
int error;
@@ -4769,7 +4772,7 @@ xlog_recover_process_bui(
return 0;
spin_unlock(&ailp->xa_lock);
- error = xfs_bui_recover(mp, buip);
+ error = xfs_bui_recover(mp, buip, dfops);
spin_lock(&ailp->xa_lock);
return error;
@@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
}
}
+/* Take all the collected deferred ops and finish them in order. */
+static int
+xlog_finish_defer_ops(
+ struct xfs_mount *mp,
+ struct xfs_defer_ops *dfops)
+{
+ struct xfs_trans *tp;
+ int64_t freeblks;
+ uint resblks;
+ int error;
+
+ /*
+ * We're finishing the defer_ops that accumulated as a result of
+ * recovering unfinished intent items during log recovery. We
+ * reserve an itruncate transaction because it is the largest
+ * permanent transaction type. Since we're the only user of the fs
+ * right now, take 93% (15/16) of the available free blocks. Use
+ * weird math to avoid a 64-bit division.
+ */
+ freeblks = percpu_counter_sum(&mp->m_fdblocks);
+ if (freeblks <= 0)
+ return -ENOSPC;
+ resblks = min_t(int64_t, UINT_MAX, freeblks);
+ resblks = (resblks * 15) >> 4;
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+ 0, XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+ error = xfs_defer_finish(&tp, dfops);
+ if (error)
+ goto out_cancel;
+
+ return xfs_trans_commit(tp);
+
+out_cancel:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
/*
* When this is called, all of the log intent items which did not have
* corresponding log done items should be in the AIL. What we do now
@@ -4825,10 +4868,12 @@ STATIC int
xlog_recover_process_intents(
struct xlog *log)
{
- struct xfs_log_item *lip;
- int error = 0;
+ struct xfs_defer_ops dfops;
struct xfs_ail_cursor cur;
+ struct xfs_log_item *lip;
struct xfs_ail *ailp;
+ xfs_fsblock_t firstfsb;
+ int error = 0;
#if defined(DEBUG) || defined(XFS_WARN)
xfs_lsn_t last_lsn;
#endif
@@ -4839,6 +4884,7 @@ xlog_recover_process_intents(
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
#endif
+ xfs_defer_init(&dfops, &firstfsb);
while (lip != NULL) {
/*
* We're done when we see something other than an intent.
@@ -4859,6 +4905,12 @@ xlog_recover_process_intents(
*/
ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
+ /*
+ * NOTE: If your intent processing routine can create more
+ * deferred ops, you /must/ attach them to the dfops in this
+ * routine or else those subsequent intents will get
+ * replayed in the wrong order!
+ */
switch (lip->li_type) {
case XFS_LI_EFI:
error = xlog_recover_process_efi(log->l_mp, ailp, lip);
@@ -4867,10 +4919,12 @@ xlog_recover_process_intents(
error = xlog_recover_process_rui(log->l_mp, ailp, lip);
break;
case XFS_LI_CUI:
- error = xlog_recover_process_cui(log->l_mp, ailp, lip);
+ error = xlog_recover_process_cui(log->l_mp, ailp, lip,
+ &dfops);
break;
case XFS_LI_BUI:
- error = xlog_recover_process_bui(log->l_mp, ailp, lip);
+ error = xlog_recover_process_bui(log->l_mp, ailp, lip,
+ &dfops);
break;
}
if (error)
@@ -4880,6 +4934,11 @@ xlog_recover_process_intents(
out:
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
+ if (error)
+ xfs_defer_cancel(&dfops);
+ else
+ error = xlog_finish_defer_ops(log->l_mp, &dfops);
+
return error;
}
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 8f2e2fac4255..3a55d6fc271b 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -393,7 +393,8 @@ xfs_cud_init(
int
xfs_cui_recover(
struct xfs_mount *mp,
- struct xfs_cui_log_item *cuip)
+ struct xfs_cui_log_item *cuip,
+ struct xfs_defer_ops *dfops)
{
int i;
int error = 0;
@@ -405,11 +406,9 @@ xfs_cui_recover(
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
enum xfs_refcount_intent_type type;
- xfs_fsblock_t firstfsb;
xfs_fsblock_t new_fsb;
xfs_extlen_t new_len;
struct xfs_bmbt_irec irec;
- struct xfs_defer_ops dfops;
bool requeue_only = false;
ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
@@ -465,7 +464,6 @@ xfs_cui_recover(
return error;
cudp = xfs_trans_get_cud(tp, cuip);
- xfs_defer_init(&dfops, &firstfsb);
for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
refc = &cuip->cui_format.cui_extents[i];
refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
@@ -485,7 +483,7 @@ xfs_cui_recover(
new_len = refc->pe_len;
} else
error = xfs_trans_log_finish_refcount_update(tp, cudp,
- &dfops, type, refc->pe_startblock, refc->pe_len,
+ dfops, type, refc->pe_startblock, refc->pe_len,
&new_fsb, &new_len, &rcur);
if (error)
goto abort_error;
@@ -497,21 +495,21 @@ xfs_cui_recover(
switch (type) {
case XFS_REFCOUNT_INCREASE:
error = xfs_refcount_increase_extent(
- tp->t_mountp, &dfops, &irec);
+ tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_DECREASE:
error = xfs_refcount_decrease_extent(
- tp->t_mountp, &dfops, &irec);
+ tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_ALLOC_COW:
error = xfs_refcount_alloc_cow_extent(
- tp->t_mountp, &dfops,
+ tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
case XFS_REFCOUNT_FREE_COW:
error = xfs_refcount_free_cow_extent(
- tp->t_mountp, &dfops,
+ tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
@@ -525,17 +523,12 @@ xfs_cui_recover(
}
xfs_refcount_finish_one_cleanup(tp, rcur, error);
- error = xfs_defer_finish(&tp, &dfops);
- if (error)
- goto abort_defer;
set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
error = xfs_trans_commit(tp);
return error;
abort_error:
xfs_refcount_finish_one_cleanup(tp, rcur, error);
-abort_defer:
- xfs_defer_cancel(&dfops);
xfs_trans_cancel(tp);
return error;
}
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index 5b74dddfa64b..0e5327349a13 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
struct xfs_cui_log_item *);
void xfs_cui_item_free(struct xfs_cui_log_item *);
void xfs_cui_release(struct xfs_cui_log_item *);
-int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip);
+int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip,
+ struct xfs_defer_ops *dfops);
#endif /* __XFS_REFCOUNT_ITEM_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f663022353c0..5122d3021117 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -212,9 +212,9 @@ xfs_parseargs(
*/
if (sb_rdonly(sb))
mp->m_flags |= XFS_MOUNT_RDONLY;
- if (sb->s_flags & MS_DIRSYNC)
+ if (sb->s_flags & SB_DIRSYNC)
mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
mp->m_flags |= XFS_MOUNT_WSYNC;
/*
@@ -1312,7 +1312,7 @@ xfs_fs_remount(
}
/* ro -> rw */
- if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
xfs_warn(mp,
"ro->rw transition prohibited on norecovery mount");
@@ -1368,7 +1368,7 @@ xfs_fs_remount(
}
/* rw -> ro */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
/* Free the per-AG metadata reservation pool. */
error = xfs_fs_unreserve_ag_blocks(mp);
if (error) {
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 5f2f32408011..fcc5dfc70aa0 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -30,7 +30,7 @@ extern void xfs_qm_exit(void);
#ifdef CONFIG_XFS_POSIX_ACL
# define XFS_ACL_STRING "ACLs, "
-# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL)
+# define set_posix_acl_flag(sb) ((sb)->s_flags |= SB_POSIXACL)
#else
# define XFS_ACL_STRING
# define set_posix_acl_flag(sb) do { } while (0)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 515ba042d75c..d718a10c2271 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -654,8 +654,6 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
-DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
-
TRACE_EVENT(xfs_filemap_fault,
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
bool write_fault),