summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c3
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/inode.c3
-rw-r--r--fs/ceph/super.c3
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/asn1.c16
-rw-r--r--fs/cifs/cifs_unicode.c8
-rw-r--r--fs/cifs/cifsacl.c5
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/cifs/cifsglob.h16
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/connect.c264
-rw-r--r--fs/cifs/fs_context.c221
-rw-r--r--fs/cifs/fs_context.h58
-rw-r--r--fs/cifs/inode.c13
-rw-r--r--fs/cifs/readdir.c60
-rw-r--r--fs/cifs/smb2glob.h1
-rw-r--r--fs/cifs/smb2inode.c11
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2ops.c83
-rw-r--r--fs/cifs/smb2pdu.c53
-rw-r--r--fs/cifs/smb2pdu.h90
-rw-r--r--fs/cifs/smb2proto.h3
-rw-r--r--fs/cifs/smb2transport.c8
-rw-r--r--fs/cifs/trace.h18
-rw-r--r--fs/cifs/transport.c5
-rw-r--r--fs/cramfs/inode.c3
-rw-r--r--fs/efs/super.c3
-rw-r--r--fs/erofs/super.c3
-rw-r--r--fs/exfat/super.c3
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext4/super.c3
-rw-r--r--fs/f2fs/super.c3
-rw-r--r--fs/fat/inode.c3
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/gfs2/aops.c68
-rw-r--r--fs/gfs2/bmap.c62
-rw-r--r--fs/gfs2/bmap.h1
-rw-r--r--fs/gfs2/glock.c52
-rw-r--r--fs/gfs2/glops.c36
-rw-r--r--fs/gfs2/incore.h29
-rw-r--r--fs/gfs2/log.c89
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/lops.h1
-rw-r--r--fs/gfs2/meta_io.c81
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c173
-rw-r--r--fs/gfs2/recovery.c108
-rw-r--r--fs/gfs2/rgrp.c19
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/gfs2/super.c220
-rw-r--r--fs/gfs2/super.h5
-rw-r--r--fs/gfs2/sys.c5
-rw-r--r--fs/gfs2/trace_gfs2.h7
-rw-r--r--fs/gfs2/util.c2
-rw-r--r--fs/gfs2/util.h10
-rw-r--r--fs/hfs/super.c3
-rw-r--r--fs/hfsplus/super.c3
-rw-r--r--fs/hpfs/super.c3
-rw-r--r--fs/io-wq.c68
-rw-r--r--fs/io-wq.h1
-rw-r--r--fs/io_uring.c186
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/minix/inode.c3
-rw-r--r--fs/namei.c3
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nilfs2/super.c3
-rw-r--r--fs/ntfs/super.c3
-rw-r--r--fs/omfs/inode.c6
-rw-r--r--fs/proc_namespace.c1
-rw-r--r--fs/qnx4/inode.c3
-rw-r--r--fs/qnx6/inode.c3
-rw-r--r--fs/read_write.c544
-rw-r--r--fs/remap_range.c571
-rw-r--r--fs/romfs/super.c3
-rw-r--r--fs/splice.c63
-rw-r--r--fs/squashfs/super.c3
-rw-r--r--fs/stat.c70
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/sysv/inode.c3
-rw-r--r--fs/udf/super.c3
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/xfs/xfs_bmap_util.c18
-rw-r--r--fs/xfs/xfs_file.c40
-rw-r--r--fs/xfs/xfs_linux.h6
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_super.c3
-rw-r--r--fs/zonefs/super.c3
92 files changed, 2281 insertions, 1329 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index e34fa20acf61..9a21269b7234 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -260,8 +260,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = rs.bavail;
buf->f_files = rs.files;
buf->f_ffree = rs.ffree;
- buf->f_fsid.val[0] = rs.fsid & 0xFFFFFFFFUL;
- buf->f_fsid.val[1] = (rs.fsid >> 32) & 0xFFFFFFFFUL;
+ buf->f_fsid = u64_to_fsid(rs.fsid);
buf->f_namelen = rs.namelen;
}
if (res != -ENOSYS)
diff --git a/fs/Makefile b/fs/Makefile
index 7bb2a05fda1f..999d1a23f036 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -14,7 +14,7 @@ obj-y := open.o read_write.o file_table.o super.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_types.o fs_context.o fs_parser.o fsopen.o init.o \
- kernel_read_file.o
+ kernel_read_file.o remap_range.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d553bb5bc17a..bdbd26e571ed 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -210,8 +210,7 @@ static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = sbi->s_namelen;
buf->f_bsize = sb->s_blocksize;
buf->f_ffree = (long)(buf->f_bfree * buf->f_files) / (long)buf->f_blocks;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index a100cd9950c8..c6c2a513ec92 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -620,8 +620,7 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = AFFS_SB(sb)->s_partition_size - AFFS_SB(sb)->s_reserved;
buf->f_bfree = free;
buf->f_bavail = free;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = AFFSNAMEMAX;
return 0;
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 2482032021ca..c1ba13d19024 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -963,8 +963,7 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = buf->f_bfree;
buf->f_files = 0; /* UNKNOWN */
buf->f_ffree = 0; /* UNKNOWN */
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = BEFS_NAME_LEN;
befs_debug(sb, "<--- %s", __func__);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f8ce1368218b..3ac7611ef7ce 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -229,8 +229,7 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bfree = buf->f_bavail = info->si_freeb;
buf->f_files = info->si_lasti + 1 - BFS_ROOT_INO;
buf->f_ffree = info->si_freei;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = BFS_NAMELEN;
return 0;
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2516304379d3..33ba6f0aa55c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -104,8 +104,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1));
mutex_unlock(&monc->mutex);
- buf->f_fsid.val[0] = fsid & 0xffffffff;
- buf->f_fsid.val[1] = fsid >> 32;
+ buf->f_fsid = u64_to_fsid(fsid);
return 0;
}
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 51bae9340842..cd17d0e50f2a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -10,7 +10,7 @@ cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
cifs_unicode.o nterr.o cifsencrypt.o \
readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
smb2ops.o smb2maperror.o smb2transport.o \
- smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o
+ smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o
cifs-$(CONFIG_CIFS_XATTR) += xattr.o
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 689162e2e175..3150c19cdc2f 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -530,8 +530,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -541,8 +541,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 1\n",
+ cls, con, tag, end);
return 0;
}
@@ -552,8 +552,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -564,8 +564,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d sequence_end = %p exit 1\n",
+ cls, con, tag, sequence_end);
return 0;
}
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 498777d859eb..9bd03a231032 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -488,7 +488,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
else if (map_chars == SFM_MAP_UNI_RSVD) {
bool end_of_string;
- if (i == srclen - 1)
+ /**
+ * Remap spaces and periods found at the end of every
+ * component of the path. The special cases of '.' and
+ * '..' do not need to be dealt with explicitly because
+ * they are addressed in namei.c:link_path_walk().
+ **/
+ if ((i == srclen - 1) || (source[i+1] == '\\'))
end_of_string = true;
else
end_of_string = false;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fcff14ef1c70..23b21e943652 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -338,7 +338,7 @@ invalidate_key:
goto out_key_put;
}
-static int
+int
sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
struct cifs_fattr *fattr, uint sidtype)
{
@@ -359,7 +359,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
return -EIO;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) {
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) ||
+ (cifs_sb_master_tcon(cifs_sb)->posix_extensions)) {
uint32_t unix_id;
bool is_group;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0fb99d25e8a8..472cb7777e3e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -71,6 +71,8 @@ bool enable_oplocks = true;
bool linuxExtEnabled = true;
bool lookupCacheEnabled = true;
bool disable_legacy_dialects; /* false by default */
+bool enable_gcm_256; /* false by default, change when more servers support it */
+bool require_gcm_256; /* false by default */
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
unsigned int sign_CIFS_PDUs = 1;
@@ -104,6 +106,12 @@ MODULE_PARM_DESC(slow_rsp_threshold, "Amount of time (in seconds) to wait "
module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
+module_param(enable_gcm_256, bool, 0644);
+MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0");
+
+module_param(require_gcm_256, bool, 0644);
+MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0");
+
module_param(disable_legacy_dialects, bool, 0644);
MODULE_PARM_DESC(disable_legacy_dialects, "To improve security it may be "
"helpful to restrict the ability to "
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b565d83ba89e..b6925aeeb621 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -195,18 +195,6 @@ struct smb_rqst {
unsigned int rq_tailsz; /* length of last page */
};
-enum smb_version {
- Smb_1 = 1,
- Smb_20,
- Smb_21,
- Smb_30,
- Smb_302,
- Smb_311,
- Smb_3any,
- Smb_default,
- Smb_version_err
-};
-
struct mid_q_entry;
struct TCP_Server_Info;
struct cifsFileInfo;
@@ -510,6 +498,8 @@ struct smb_version_operations {
struct fiemap_extent_info *, u64, u64);
/* version specific llseek implementation */
loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int);
+ /* Check for STATUS_IO_TIMEOUT */
+ bool (*is_status_io_timeout)(char *buf);
};
struct smb_version_values {
@@ -1954,6 +1944,8 @@ extern bool lookupCacheEnabled;
extern unsigned int global_secflags; /* if on, session setup sent
with more secure ntlmssp2 challenge/resp */
extern unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
+extern bool enable_gcm_256; /* allow optional negotiate of strongest signing (aes-gcm-256) */
+extern bool require_gcm_256; /* require use of strongest signing (aes-gcm-256) */
extern bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
extern unsigned int CIFSMaxBufSize; /* max size not including hdr */
extern unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index bb68cbf81074..24c6f36177ba 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -209,6 +209,8 @@ extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs,
extern int cifs_rename_pending_delete(const char *full_path,
struct dentry *dentry,
const unsigned int xid);
+extern int sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
+ struct cifs_fattr *fattr, uint sidtype);
extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
struct cifs_fattr *fattr, struct inode *inode,
bool get_mode_from_special_sid,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a5731dd6e656..c38156f324dd 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -61,6 +61,7 @@
#ifdef CONFIG_CIFS_DFS_UPCALL
#include "dfs_cache.h"
#endif
+#include "fs_context.h"
extern mempool_t *cifs_req_poolp;
extern bool disable_legacy_dialects;
@@ -69,6 +70,9 @@ extern bool disable_legacy_dialects;
#define TLINK_ERROR_EXPIRE (1 * HZ)
#define TLINK_IDLE_EXPIRE (600 * HZ)
+/* Drop the connection to not overload the server */
+#define NUM_STATUS_IO_TIMEOUT 5
+
enum {
/* Mount options that take no arguments */
Opt_user_xattr, Opt_nouser_xattr,
@@ -276,66 +280,6 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_err, NULL }
};
-enum {
- Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
- Opt_sec_ntlmsspi, Opt_sec_ntlmssp,
- Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2,
- Opt_sec_ntlmv2i, Opt_sec_lanman,
- Opt_sec_none,
-
- Opt_sec_err
-};
-
-static const match_table_t cifs_secflavor_tokens = {
- { Opt_sec_krb5, "krb5" },
- { Opt_sec_krb5i, "krb5i" },
- { Opt_sec_krb5p, "krb5p" },
- { Opt_sec_ntlmsspi, "ntlmsspi" },
- { Opt_sec_ntlmssp, "ntlmssp" },
- { Opt_ntlm, "ntlm" },
- { Opt_sec_ntlmi, "ntlmi" },
- { Opt_sec_ntlmv2, "nontlm" },
- { Opt_sec_ntlmv2, "ntlmv2" },
- { Opt_sec_ntlmv2i, "ntlmv2i" },
- { Opt_sec_lanman, "lanman" },
- { Opt_sec_none, "none" },
-
- { Opt_sec_err, NULL }
-};
-
-/* cache flavors */
-enum {
- Opt_cache_loose,
- Opt_cache_strict,
- Opt_cache_none,
- Opt_cache_ro,
- Opt_cache_rw,
- Opt_cache_err
-};
-
-static const match_table_t cifs_cacheflavor_tokens = {
- { Opt_cache_loose, "loose" },
- { Opt_cache_strict, "strict" },
- { Opt_cache_none, "none" },
- { Opt_cache_ro, "ro" },
- { Opt_cache_rw, "singleclient" },
- { Opt_cache_err, NULL }
-};
-
-static const match_table_t cifs_smb_version_tokens = {
- { Smb_1, SMB1_VERSION_STRING },
- { Smb_20, SMB20_VERSION_STRING},
- { Smb_21, SMB21_VERSION_STRING },
- { Smb_30, SMB30_VERSION_STRING },
- { Smb_302, SMB302_VERSION_STRING },
- { Smb_302, ALT_SMB302_VERSION_STRING },
- { Smb_311, SMB311_VERSION_STRING },
- { Smb_311, ALT_SMB311_VERSION_STRING },
- { Smb_3any, SMB3ANY_VERSION_STRING },
- { Smb_default, SMBDEFAULT_VERSION_STRING },
- { Smb_version_err, NULL }
-};
-
static int ip_connect(struct TCP_Server_Info *server);
static int generic_ip_connect(struct TCP_Server_Info *server);
static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
@@ -1117,7 +1061,7 @@ cifs_demultiplex_thread(void *p)
struct task_struct *task_to_wake = NULL;
struct mid_q_entry *mids[MAX_COMPOUND];
char *bufs[MAX_COMPOUND];
- unsigned int noreclaim_flag;
+ unsigned int noreclaim_flag, num_io_timeout = 0;
noreclaim_flag = memalloc_noreclaim_save();
cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current));
@@ -1213,6 +1157,16 @@ next_pdu:
continue;
}
+ if (server->ops->is_status_io_timeout &&
+ server->ops->is_status_io_timeout(buf)) {
+ num_io_timeout++;
+ if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) {
+ cifs_reconnect(server);
+ num_io_timeout = 0;
+ continue;
+ }
+ }
+
server->lstrp = jiffies;
for (i = 0; i < num_mids; i++) {
@@ -1359,177 +1313,6 @@ static int get_option_gid(substring_t args[], kgid_t *result)
return 0;
}
-static int cifs_parse_security_flavors(char *value,
- struct smb_vol *vol)
-{
-
- substring_t args[MAX_OPT_ARGS];
-
- /*
- * With mount options, the last one should win. Reset any existing
- * settings back to default.
- */
- vol->sectype = Unspecified;
- vol->sign = false;
-
- switch (match_token(value, cifs_secflavor_tokens, args)) {
- case Opt_sec_krb5p:
- cifs_dbg(VFS, "sec=krb5p is not supported!\n");
- return 1;
- case Opt_sec_krb5i:
- vol->sign = true;
- fallthrough;
- case Opt_sec_krb5:
- vol->sectype = Kerberos;
- break;
- case Opt_sec_ntlmsspi:
- vol->sign = true;
- fallthrough;
- case Opt_sec_ntlmssp:
- vol->sectype = RawNTLMSSP;
- break;
- case Opt_sec_ntlmi:
- vol->sign = true;
- fallthrough;
- case Opt_ntlm:
- vol->sectype = NTLM;
- break;
- case Opt_sec_ntlmv2i:
- vol->sign = true;
- fallthrough;
- case Opt_sec_ntlmv2:
- vol->sectype = NTLMv2;
- break;
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- case Opt_sec_lanman:
- vol->sectype = LANMAN;
- break;
-#endif
- case Opt_sec_none:
- vol->nullauth = 1;
- break;
- default:
- cifs_dbg(VFS, "bad security option: %s\n", value);
- return 1;
- }
-
- return 0;
-}
-
-static int
-cifs_parse_cache_flavor(char *value, struct smb_vol *vol)
-{
- substring_t args[MAX_OPT_ARGS];
-
- switch (match_token(value, cifs_cacheflavor_tokens, args)) {
- case Opt_cache_loose:
- vol->direct_io = false;
- vol->strict_io = false;
- vol->cache_ro = false;
- vol->cache_rw = false;
- break;
- case Opt_cache_strict:
- vol->direct_io = false;
- vol->strict_io = true;
- vol->cache_ro = false;
- vol->cache_rw = false;
- break;
- case Opt_cache_none:
- vol->direct_io = true;
- vol->strict_io = false;
- vol->cache_ro = false;
- vol->cache_rw = false;
- break;
- case Opt_cache_ro:
- vol->direct_io = false;
- vol->strict_io = false;
- vol->cache_ro = true;
- vol->cache_rw = false;
- break;
- case Opt_cache_rw:
- vol->direct_io = false;
- vol->strict_io = false;
- vol->cache_ro = false;
- vol->cache_rw = true;
- break;
- default:
- cifs_dbg(VFS, "bad cache= option: %s\n", value);
- return 1;
- }
- return 0;
-}
-
-static int
-cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
-{
- substring_t args[MAX_OPT_ARGS];
-
- switch (match_token(value, cifs_smb_version_tokens, args)) {
-#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
- case Smb_1:
- if (disable_legacy_dialects) {
- cifs_dbg(VFS, "mount with legacy dialect disabled\n");
- return 1;
- }
- if (is_smb3) {
- cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
- return 1;
- }
- cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
- vol->ops = &smb1_operations;
- vol->vals = &smb1_values;
- break;
- case Smb_20:
- if (disable_legacy_dialects) {
- cifs_dbg(VFS, "mount with legacy dialect disabled\n");
- return 1;
- }
- if (is_smb3) {
- cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n");
- return 1;
- }
- vol->ops = &smb20_operations;
- vol->vals = &smb20_values;
- break;
-#else
- case Smb_1:
- cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
- return 1;
- case Smb_20:
- cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
- return 1;
-#endif /* CIFS_ALLOW_INSECURE_LEGACY */
- case Smb_21:
- vol->ops = &smb21_operations;
- vol->vals = &smb21_values;
- break;
- case Smb_30:
- vol->ops = &smb30_operations;
- vol->vals = &smb30_values;
- break;
- case Smb_302:
- vol->ops = &smb30_operations; /* currently identical with 3.0 */
- vol->vals = &smb302_values;
- break;
- case Smb_311:
- vol->ops = &smb311_operations;
- vol->vals = &smb311_values;
- break;
- case Smb_3any:
- vol->ops = &smb30_operations; /* currently identical with 3.0 */
- vol->vals = &smb3any_values;
- break;
- case Smb_default:
- vol->ops = &smb30_operations; /* currently identical with 3.0 */
- vol->vals = &smbdefault_values;
- break;
- default:
- cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
- return 1;
- }
- return 0;
-}
-
/*
* Parse a devname into substrings and populate the vol->UNC and vol->prepath
* fields with the result. Returns 0 on success and an error otherwise.
@@ -3595,7 +3378,10 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
*/
tcon->retry = volume_info->retry;
tcon->nocase = volume_info->nocase;
- tcon->nohandlecache = volume_info->nohandlecache;
+ if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)
+ tcon->nohandlecache = volume_info->nohandlecache;
+ else
+ tcon->nohandlecache = 1;
tcon->nodelete = volume_info->nodelete;
tcon->local_lease = volume_info->local_lease;
INIT_LIST_HEAD(&tcon->pending_opens);
@@ -3889,13 +3675,21 @@ generic_ip_connect(struct TCP_Server_Info *server)
saddr = (struct sockaddr *) &server->dstaddr;
if (server->dstaddr.ss_family == AF_INET6) {
- sport = ((struct sockaddr_in6 *) saddr)->sin6_port;
+ struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&server->dstaddr;
+
+ sport = ipv6->sin6_port;
slen = sizeof(struct sockaddr_in6);
sfamily = AF_INET6;
+ cifs_dbg(FYI, "%s: connecting to [%pI6]:%d\n", __func__, &ipv6->sin6_addr,
+ ntohs(sport));
} else {
- sport = ((struct sockaddr_in *) saddr)->sin_port;
+ struct sockaddr_in *ipv4 = (struct sockaddr_in *)&server->dstaddr;
+
+ sport = ipv4->sin_port;
slen = sizeof(struct sockaddr_in);
sfamily = AF_INET;
+ cifs_dbg(FYI, "%s: connecting to %pI4:%d\n", __func__, &ipv4->sin_addr,
+ ntohs(sport));
}
if (socket == NULL) {
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
new file mode 100644
index 000000000000..ad6c2fed4055
--- /dev/null
+++ b/fs/cifs/fs_context.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020, Microsoft Corporation.
+ *
+ * Author(s): Steve French <stfrench@microsoft.com>
+ * David Howells <dhowells@redhat.com>
+ */
+
+#include "cifsglob.h"
+#include "cifs_debug.h"
+#include "fs_context.h"
+
+static const match_table_t cifs_smb_version_tokens = {
+ { Smb_1, SMB1_VERSION_STRING },
+ { Smb_20, SMB20_VERSION_STRING},
+ { Smb_21, SMB21_VERSION_STRING },
+ { Smb_30, SMB30_VERSION_STRING },
+ { Smb_302, SMB302_VERSION_STRING },
+ { Smb_302, ALT_SMB302_VERSION_STRING },
+ { Smb_311, SMB311_VERSION_STRING },
+ { Smb_311, ALT_SMB311_VERSION_STRING },
+ { Smb_3any, SMB3ANY_VERSION_STRING },
+ { Smb_default, SMBDEFAULT_VERSION_STRING },
+ { Smb_version_err, NULL }
+};
+
+int
+cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
+{
+ substring_t args[MAX_OPT_ARGS];
+
+ switch (match_token(value, cifs_smb_version_tokens, args)) {
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ case Smb_1:
+ if (disable_legacy_dialects) {
+ cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ return 1;
+ }
+ if (is_smb3) {
+ cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
+ return 1;
+ }
+ cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
+ vol->ops = &smb1_operations;
+ vol->vals = &smb1_values;
+ break;
+ case Smb_20:
+ if (disable_legacy_dialects) {
+ cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ return 1;
+ }
+ if (is_smb3) {
+ cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n");
+ return 1;
+ }
+ vol->ops = &smb20_operations;
+ vol->vals = &smb20_values;
+ break;
+#else
+ case Smb_1:
+ cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
+ return 1;
+ case Smb_20:
+ cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
+ return 1;
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
+ case Smb_21:
+ vol->ops = &smb21_operations;
+ vol->vals = &smb21_values;
+ break;
+ case Smb_30:
+ vol->ops = &smb30_operations;
+ vol->vals = &smb30_values;
+ break;
+ case Smb_302:
+ vol->ops = &smb30_operations; /* currently identical with 3.0 */
+ vol->vals = &smb302_values;
+ break;
+ case Smb_311:
+ vol->ops = &smb311_operations;
+ vol->vals = &smb311_values;
+ break;
+ case Smb_3any:
+ vol->ops = &smb30_operations; /* currently identical with 3.0 */
+ vol->vals = &smb3any_values;
+ break;
+ case Smb_default:
+ vol->ops = &smb30_operations; /* currently identical with 3.0 */
+ vol->vals = &smbdefault_values;
+ break;
+ default:
+ cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
+ return 1;
+ }
+ return 0;
+}
+
+static const match_table_t cifs_secflavor_tokens = {
+ { Opt_sec_krb5, "krb5" },
+ { Opt_sec_krb5i, "krb5i" },
+ { Opt_sec_krb5p, "krb5p" },
+ { Opt_sec_ntlmsspi, "ntlmsspi" },
+ { Opt_sec_ntlmssp, "ntlmssp" },
+ { Opt_ntlm, "ntlm" },
+ { Opt_sec_ntlmi, "ntlmi" },
+ { Opt_sec_ntlmv2, "nontlm" },
+ { Opt_sec_ntlmv2, "ntlmv2" },
+ { Opt_sec_ntlmv2i, "ntlmv2i" },
+ { Opt_sec_lanman, "lanman" },
+ { Opt_sec_none, "none" },
+
+ { Opt_sec_err, NULL }
+};
+
+int cifs_parse_security_flavors(char *value, struct smb_vol *vol)
+{
+
+ substring_t args[MAX_OPT_ARGS];
+
+ /*
+ * With mount options, the last one should win. Reset any existing
+ * settings back to default.
+ */
+ vol->sectype = Unspecified;
+ vol->sign = false;
+
+ switch (match_token(value, cifs_secflavor_tokens, args)) {
+ case Opt_sec_krb5p:
+ cifs_dbg(VFS, "sec=krb5p is not supported!\n");
+ return 1;
+ case Opt_sec_krb5i:
+ vol->sign = true;
+ fallthrough;
+ case Opt_sec_krb5:
+ vol->sectype = Kerberos;
+ break;
+ case Opt_sec_ntlmsspi:
+ vol->sign = true;
+ fallthrough;
+ case Opt_sec_ntlmssp:
+ vol->sectype = RawNTLMSSP;
+ break;
+ case Opt_sec_ntlmi:
+ vol->sign = true;
+ fallthrough;
+ case Opt_ntlm:
+ vol->sectype = NTLM;
+ break;
+ case Opt_sec_ntlmv2i:
+ vol->sign = true;
+ fallthrough;
+ case Opt_sec_ntlmv2:
+ vol->sectype = NTLMv2;
+ break;
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+ case Opt_sec_lanman:
+ vol->sectype = LANMAN;
+ break;
+#endif
+ case Opt_sec_none:
+ vol->nullauth = 1;
+ break;
+ default:
+ cifs_dbg(VFS, "bad security option: %s\n", value);
+ return 1;
+ }
+
+ return 0;
+}
+
+static const match_table_t cifs_cacheflavor_tokens = {
+ { Opt_cache_loose, "loose" },
+ { Opt_cache_strict, "strict" },
+ { Opt_cache_none, "none" },
+ { Opt_cache_ro, "ro" },
+ { Opt_cache_rw, "singleclient" },
+ { Opt_cache_err, NULL }
+};
+
+int
+cifs_parse_cache_flavor(char *value, struct smb_vol *vol)
+{
+ substring_t args[MAX_OPT_ARGS];
+
+ switch (match_token(value, cifs_cacheflavor_tokens, args)) {
+ case Opt_cache_loose:
+ vol->direct_io = false;
+ vol->strict_io = false;
+ vol->cache_ro = false;
+ vol->cache_rw = false;
+ break;
+ case Opt_cache_strict:
+ vol->direct_io = false;
+ vol->strict_io = true;
+ vol->cache_ro = false;
+ vol->cache_rw = false;
+ break;
+ case Opt_cache_none:
+ vol->direct_io = true;
+ vol->strict_io = false;
+ vol->cache_ro = false;
+ vol->cache_rw = false;
+ break;
+ case Opt_cache_ro:
+ vol->direct_io = false;
+ vol->strict_io = false;
+ vol->cache_ro = true;
+ vol->cache_rw = false;
+ break;
+ case Opt_cache_rw:
+ vol->direct_io = false;
+ vol->strict_io = false;
+ vol->cache_ro = false;
+ vol->cache_rw = true;
+ break;
+ default:
+ cifs_dbg(VFS, "bad cache= option: %s\n", value);
+ return 1;
+ }
+ return 0;
+}
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
new file mode 100644
index 000000000000..886208a1b0ef
--- /dev/null
+++ b/fs/cifs/fs_context.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020, Microsoft Corporation.
+ *
+ * Author(s): Steve French <stfrench@microsoft.com>
+ * David Howells <dhowells@redhat.com>
+ */
+
+#ifndef _FS_CONTEXT_H
+#define _FS_CONTEXT_H
+
+#include <linux/parser.h>
+#include "cifsglob.h"
+
+enum smb_version {
+ Smb_1 = 1,
+ Smb_20,
+ Smb_21,
+ Smb_30,
+ Smb_302,
+ Smb_311,
+ Smb_3any,
+ Smb_default,
+ Smb_version_err
+};
+
+int cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3);
+
+enum {
+ Opt_cache_loose,
+ Opt_cache_strict,
+ Opt_cache_none,
+ Opt_cache_ro,
+ Opt_cache_rw,
+ Opt_cache_err
+};
+
+int cifs_parse_cache_flavor(char *value, struct smb_vol *vol);
+
+enum cifs_sec_param {
+ Opt_sec_krb5,
+ Opt_sec_krb5i,
+ Opt_sec_krb5p,
+ Opt_sec_ntlmsspi,
+ Opt_sec_ntlmssp,
+ Opt_ntlm,
+ Opt_sec_ntlmi,
+ Opt_sec_ntlmv2,
+ Opt_sec_ntlmv2i,
+ Opt_sec_lanman,
+ Opt_sec_none,
+
+ Opt_sec_err
+};
+
+int cifs_parse_security_flavors(char *value, struct smb_vol *vol);
+
+#endif
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1f75b25e559a..daec31be8571 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2883,13 +2883,18 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
+ int rc, retries = 0;
- if (pTcon->unix_ext)
- return cifs_setattr_unix(direntry, attrs);
-
- return cifs_setattr_nounix(direntry, attrs);
+ do {
+ if (pTcon->unix_ext)
+ rc = cifs_setattr_unix(direntry, attrs);
+ else
+ rc = cifs_setattr_nounix(direntry, attrs);
+ retries++;
+ } while (is_retryable_error(rc) && retries < 2);
/* BB: add cifs_setattr_legacy for really old servers */
+ return rc;
}
#if 0
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 6df0922e7e30..799be3a5d25e 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -168,10 +168,33 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
fattr->cf_uid = cifs_sb->mnt_uid;
fattr->cf_gid = cifs_sb->mnt_gid;
+ /*
+ * The IO_REPARSE_TAG_LX_ tags originally were used by WSL but they
+ * are preferred by the Linux client in some cases since, unlike
+ * the NFS reparse tag (or EAs), they don't require an extra query
+ * to determine which type of special file they represent.
+ * TODO: go through all documented reparse tags to see if we can
+ * reasonably map some of them to directories vs. files vs. symlinks
+ */
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
- } else {
+ } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_SYMLINK) {
+ fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode;
+ fattr->cf_dtype = DT_LNK;
+ } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_FIFO) {
+ fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode;
+ fattr->cf_dtype = DT_FIFO;
+ } else if (fattr->cf_cifstag == IO_REPARSE_TAG_AF_UNIX) {
+ fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode;
+ fattr->cf_dtype = DT_SOCK;
+ } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_CHR) {
+ fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode;
+ fattr->cf_dtype = DT_CHR;
+ } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_BLK) {
+ fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode;
+ fattr->cf_dtype = DT_BLK;
+ } else { /* TODO: should we mark some other reparse points (like DFSR) as directories? */
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
}
@@ -267,9 +290,8 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info,
if (reparse_file_needs_reval(fattr))
fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
- /* TODO map SIDs */
- fattr->cf_uid = cifs_sb->mnt_uid;
- fattr->cf_gid = cifs_sb->mnt_gid;
+ sid_to_id(cifs_sb, &parsed.owner, fattr, SIDOWNER);
+ sid_to_id(cifs_sb, &parsed.group, fattr, SIDGROUP);
}
static void __dir_info_to_fattr(struct cifs_fattr *fattr, const void *info)
@@ -360,11 +382,11 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
*/
static int
-initiate_cifs_search(const unsigned int xid, struct file *file)
+initiate_cifs_search(const unsigned int xid, struct file *file,
+ char *full_path)
{
__u16 search_flags;
int rc = 0;
- char *full_path = NULL;
struct cifsFileInfo *cifsFile;
struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct tcon_link *tlink = NULL;
@@ -400,12 +422,6 @@ initiate_cifs_search(const unsigned int xid, struct file *file)
cifsFile->invalidHandle = true;
cifsFile->srch_inf.endOfSearch = false;
- full_path = build_path_from_dentry(file_dentry(file));
- if (full_path == NULL) {
- rc = -ENOMEM;
- goto error_exit;
- }
-
cifs_dbg(FYI, "Full path: %s start at: %lld\n", full_path, file->f_pos);
ffirst_retry:
@@ -444,7 +460,6 @@ ffirst_retry:
goto ffirst_retry;
}
error_exit:
- kfree(full_path);
cifs_put_tlink(tlink);
return rc;
}
@@ -688,7 +703,8 @@ static int cifs_save_resume_key(const char *current_entry,
*/
static int
find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
- struct file *file, char **current_entry, int *num_to_ret)
+ struct file *file, char *full_path,
+ char **current_entry, int *num_to_ret)
{
__u16 search_flags;
int rc = 0;
@@ -741,7 +757,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
ntwrk_buf_start);
cfile->srch_inf.ntwrk_buf_start = NULL;
}
- rc = initiate_cifs_search(xid, file);
+ rc = initiate_cifs_search(xid, file, full_path);
if (rc) {
cifs_dbg(FYI, "error %d reinitiating a search on rewind\n",
rc);
@@ -925,15 +941,22 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
char *tmp_buf = NULL;
char *end_of_smb;
unsigned int max_len;
+ char *full_path = NULL;
xid = get_xid();
+ full_path = build_path_from_dentry(file_dentry(file));
+ if (full_path == NULL) {
+ rc = -ENOMEM;
+ goto rddir2_exit;
+ }
+
/*
* Ensure FindFirst doesn't fail before doing filldir() for '.' and
* '..'. Otherwise we won't be able to notify VFS in case of failure.
*/
if (file->private_data == NULL) {
- rc = initiate_cifs_search(xid, file);
+ rc = initiate_cifs_search(xid, file, full_path);
cifs_dbg(FYI, "initiate cifs search rc %d\n", rc);
if (rc)
goto rddir2_exit;
@@ -960,8 +983,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
} */
tcon = tlink_tcon(cifsFile->tlink);
- rc = find_cifs_entry(xid, tcon, ctx->pos, file, &current_entry,
- &num_to_fill);
+ rc = find_cifs_entry(xid, tcon, ctx->pos, file, full_path,
+ &current_entry, &num_to_fill);
if (rc) {
cifs_dbg(FYI, "fce error %d\n", rc);
goto rddir2_exit;
@@ -1019,6 +1042,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
kfree(tmp_buf);
rddir2_exit:
+ kfree(full_path);
free_xid(xid);
return rc;
}
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index cf20f0b5d836..99a1951a01ec 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -58,6 +58,7 @@
#define SMB2_HMACSHA256_SIZE (32)
#define SMB2_CMACAES_SIZE (16)
#define SMB3_SIGNKEY_SIZE (16)
+#define SMB3_GCM256_CRYPTKEY_SIZE (32)
/* Maximum buffer size value we can send with 1 credit */
#define SMB2_MAX_BUFFER_SIZE 65536
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index eba01d0908dd..df6212e55e10 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -511,9 +511,9 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
struct smb2_file_all_info *smb2_data;
__u32 create_options = 0;
- struct cifs_fid fid;
bool no_cached_open = tcon->nohandlecache;
struct cifsFileInfo *cfile;
+ struct cached_fid *cfid = NULL;
*adjust_tz = false;
*symlink = false;
@@ -525,7 +525,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
/* If it is a root and its handle is cached then use it */
if (!strlen(full_path) && !no_cached_open) {
- rc = open_shroot(xid, tcon, cifs_sb, &fid);
+ rc = open_shroot(xid, tcon, cifs_sb, &cfid);
if (rc)
goto out;
@@ -533,12 +533,13 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
move_smb2_info_to_cifs(data,
&tcon->crfid.file_all_info);
} else {
- rc = SMB2_query_info(xid, tcon, fid.persistent_fid,
- fid.volatile_fid, smb2_data);
+ rc = SMB2_query_info(xid, tcon,
+ cfid->fid->persistent_fid,
+ cfid->fid->volatile_fid, smb2_data);
if (!rc)
move_smb2_info_to_cifs(data, smb2_data);
}
- close_shroot(&tcon->crfid);
+ close_shroot(cfid);
goto out;
}
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 7fde3775cb57..c775682ee973 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -488,7 +488,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_PIPE_CONNECTED, -EIO, "STATUS_PIPE_CONNECTED"},
{STATUS_PIPE_LISTENING, -EIO, "STATUS_PIPE_LISTENING"},
{STATUS_INVALID_READ_MODE, -EIO, "STATUS_INVALID_READ_MODE"},
- {STATUS_IO_TIMEOUT, -ETIMEDOUT, "STATUS_IO_TIMEOUT"},
+ {STATUS_IO_TIMEOUT, -EAGAIN, "STATUS_IO_TIMEOUT"},
{STATUS_FILE_FORCED_CLOSED, -EIO, "STATUS_FILE_FORCED_CLOSED"},
{STATUS_PROFILING_NOT_STARTED, -EIO, "STATUS_PROFILING_NOT_STARTED"},
{STATUS_PROFILING_NOT_STOPPED, -EIO, "STATUS_PROFILING_NOT_STOPPED"},
@@ -814,7 +814,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_INVALID_VARIANT, -EIO, "STATUS_INVALID_VARIANT"},
{STATUS_DOMAIN_CONTROLLER_NOT_FOUND, -EIO,
"STATUS_DOMAIN_CONTROLLER_NOT_FOUND"},
- {STATUS_ACCOUNT_LOCKED_OUT, -EIO, "STATUS_ACCOUNT_LOCKED_OUT"},
+ {STATUS_ACCOUNT_LOCKED_OUT, -EACCES, "STATUS_ACCOUNT_LOCKED_OUT"},
{STATUS_HANDLE_NOT_CLOSABLE, -EIO, "STATUS_HANDLE_NOT_CLOSABLE"},
{STATUS_CONNECTION_REFUSED, -EIO, "STATUS_CONNECTION_REFUSED"},
{STATUS_GRACEFUL_DISCONNECT, -EIO, "STATUS_GRACEFUL_DISCONNECT"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d44df8f95bcd..3cde719ec41b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -72,7 +72,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
/* eg found case where write overlapping reconnect messed up credits */
if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
- server->hostname, *val);
+ server->hostname, *val, add);
if ((instance == 0) || (instance == server->reconnect_instance))
*val += add;
else
@@ -121,6 +121,8 @@ smb2_add_credits(struct TCP_Server_Info *server,
cifs_dbg(FYI, "disabling oplocks\n");
break;
default:
+ trace_smb3_add_credits(server->CurrentMid,
+ server->hostname, rc, add);
cifs_dbg(FYI, "add %u credits total=%d\n", add, rc);
}
}
@@ -651,7 +653,8 @@ smb2_cached_lease_break(struct work_struct *work)
* Open the directory at the root of a share
*/
int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid)
+ struct cifs_sb_info *cifs_sb,
+ struct cached_fid **cfid)
{
struct cifs_ses *ses = tcon->ses;
struct TCP_Server_Info *server = ses->server;
@@ -666,11 +669,12 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
int rc, flags = 0;
__le16 utf16_path = 0; /* Null - since an open of top of share */
u8 oplock = SMB2_OPLOCK_LEVEL_II;
+ struct cifs_fid *pfid;
mutex_lock(&tcon->crfid.fid_mutex);
if (tcon->crfid.is_valid) {
cifs_dbg(FYI, "found a cached root file handle\n");
- memcpy(pfid, tcon->crfid.fid, sizeof(struct cifs_fid));
+ *cfid = &tcon->crfid;
kref_get(&tcon->crfid.refcount);
mutex_unlock(&tcon->crfid.fid_mutex);
return 0;
@@ -691,6 +695,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
if (!server->ops->new_lease_key)
return -EIO;
+ pfid = tcon->crfid.fid;
server->ops->new_lease_key(pfid);
memset(rqst, 0, sizeof(rqst));
@@ -820,6 +825,8 @@ oshr_free:
SMB2_query_info_free(&rqst[1]);
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ if (rc == 0)
+ *cfid = &tcon->crfid;
return rc;
}
@@ -833,6 +840,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
bool no_cached_open = tcon->nohandlecache;
+ struct cached_fid *cfid = NULL;
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
@@ -841,12 +849,14 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
oparms.fid = &fid;
oparms.reconnect = false;
- if (no_cached_open)
+ if (no_cached_open) {
rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL,
NULL, NULL);
- else
- rc = open_shroot(xid, tcon, cifs_sb, &fid);
-
+ } else {
+ rc = open_shroot(xid, tcon, cifs_sb, &cfid);
+ if (rc == 0)
+ memcpy(&fid, cfid->fid, sizeof(struct cifs_fid));
+ }
if (rc)
return;
@@ -863,7 +873,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
if (no_cached_open)
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
else
- close_shroot(&tcon->crfid);
+ close_shroot(cfid);
}
static void
@@ -2346,6 +2356,17 @@ smb2_is_session_expired(char *buf)
return true;
}
+static bool
+smb2_is_status_io_timeout(char *buf)
+{
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+
+ if (shdr->Status == STATUS_IO_TIMEOUT)
+ return true;
+ else
+ return false;
+}
+
static int
smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
struct cifsInodeInfo *cinode)
@@ -3072,7 +3093,12 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
oparms.tcon = tcon;
oparms.desired_access = READ_CONTROL;
oparms.disposition = FILE_OPEN;
- oparms.create_options = cifs_create_options(cifs_sb, 0);
+ /*
+ * When querying an ACL, even if the file is a symlink we want to open
+ * the source not the target, and so the protocol requires that the
+ * client specify this flag when opening a reparse point
+ */
+ oparms.create_options = cifs_create_options(cifs_sb, 0) | OPEN_REPARSE_POINT;
oparms.fid = &fid;
oparms.reconnect = false;
@@ -3801,10 +3827,11 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len);
tr_hdr->Flags = cpu_to_le16(0x01);
- if (cipher_type == SMB2_ENCRYPTION_AES128_GCM)
- get_random_bytes(&tr_hdr->Nonce, SMB3_AES128GCM_NONCE);
+ if ((cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
+ (cipher_type == SMB2_ENCRYPTION_AES256_GCM))
+ get_random_bytes(&tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
else
- get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CCM_NONCE);
+ get_random_bytes(&tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
}
@@ -3924,7 +3951,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
if (rc) {
cifs_server_dbg(VFS, "%s: Could not get %scryption key\n", __func__,
enc ? "en" : "de");
- return 0;
+ return rc;
}
rc = smb3_crypto_aead_allocate(server);
@@ -3935,7 +3962,12 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
tfm = enc ? server->secmech.ccmaesencrypt :
server->secmech.ccmaesdecrypt;
- rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
+
+ if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE);
+ else
+ rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
+
if (rc) {
cifs_server_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
return rc;
@@ -3973,11 +4005,12 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
goto free_sg;
}
- if (server->cipher_type == SMB2_ENCRYPTION_AES128_GCM)
- memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES128GCM_NONCE);
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
+ memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
else {
iv[0] = 3;
- memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES128CCM_NONCE);
+ memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
}
aead_request_set_crypt(req, sg, sg, crypt_len, iv);
@@ -4103,7 +4136,8 @@ smb3_is_transform_hdr(void *buf)
static int
decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
unsigned int buf_data_size, struct page **pages,
- unsigned int npages, unsigned int page_data_size)
+ unsigned int npages, unsigned int page_data_size,
+ bool is_offloaded)
{
struct kvec iov[2];
struct smb_rqst rqst = {NULL};
@@ -4129,7 +4163,8 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
memmove(buf, iov[1].iov_base, buf_data_size);
- server->total_read = buf_data_size + page_data_size;
+ if (!is_offloaded)
+ server->total_read = buf_data_size + page_data_size;
return rc;
}
@@ -4342,7 +4377,7 @@ static void smb2_decrypt_offload(struct work_struct *work)
struct mid_q_entry *mid;
rc = decrypt_raw_data(dw->server, dw->buf, dw->server->vals->read_rsp_size,
- dw->ppages, dw->npages, dw->len);
+ dw->ppages, dw->npages, dw->len, true);
if (rc) {
cifs_dbg(VFS, "error decrypting rc=%d\n", rc);
goto free_pages;
@@ -4448,7 +4483,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid,
non_offloaded_decrypt:
rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size,
- pages, npages, len);
+ pages, npages, len, false);
if (rc)
goto free_pages;
@@ -4504,7 +4539,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
server->total_read += length;
buf_size = pdu_length - sizeof(struct smb2_transform_hdr);
- length = decrypt_raw_data(server, buf, buf_size, NULL, 0, 0);
+ length = decrypt_raw_data(server, buf, buf_size, NULL, 0, 0, false);
if (length)
return length;
@@ -4809,6 +4844,7 @@ struct smb_version_operations smb20_operations = {
.make_node = smb2_make_node,
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
+ .is_status_io_timeout = smb2_is_status_io_timeout,
};
struct smb_version_operations smb21_operations = {
@@ -4909,6 +4945,7 @@ struct smb_version_operations smb21_operations = {
.make_node = smb2_make_node,
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
+ .is_status_io_timeout = smb2_is_status_io_timeout,
};
struct smb_version_operations smb30_operations = {
@@ -5019,6 +5056,7 @@ struct smb_version_operations smb30_operations = {
.make_node = smb2_make_node,
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
+ .is_status_io_timeout = smb2_is_status_io_timeout,
};
struct smb_version_operations smb311_operations = {
@@ -5130,6 +5168,7 @@ struct smb_version_operations smb311_operations = {
.make_node = smb2_make_node,
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
+ .is_status_io_timeout = smb2_is_status_io_timeout,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 96c172d94fba..445e80862865 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -449,10 +449,22 @@ static void
build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
{
pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
- pneg_ctxt->DataLength = cpu_to_le16(6); /* Cipher Count + two ciphers */
- pneg_ctxt->CipherCount = cpu_to_le16(2);
- pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
- pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM;
+ if (require_gcm_256) {
+ pneg_ctxt->DataLength = cpu_to_le16(4); /* Cipher Count + 1 cipher */
+ pneg_ctxt->CipherCount = cpu_to_le16(1);
+ pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES256_GCM;
+ } else if (enable_gcm_256) {
+ pneg_ctxt->DataLength = cpu_to_le16(8); /* Cipher Count + 3 ciphers */
+ pneg_ctxt->CipherCount = cpu_to_le16(3);
+ pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
+ pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES256_GCM;
+ pneg_ctxt->Ciphers[2] = SMB2_ENCRYPTION_AES128_CCM;
+ } else {
+ pneg_ctxt->DataLength = cpu_to_le16(6); /* Cipher Count + 2 ciphers */
+ pneg_ctxt->CipherCount = cpu_to_le16(2);
+ pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
+ pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM;
+ }
}
static unsigned int
@@ -598,8 +610,29 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server,
return -EINVAL;
}
cifs_dbg(FYI, "SMB311 cipher type:%d\n", le16_to_cpu(ctxt->Ciphers[0]));
- if ((ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES128_CCM) &&
- (ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES128_GCM)) {
+ if (require_gcm_256) {
+ if (ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES256_GCM) {
+ cifs_dbg(VFS, "Server does not support requested encryption type (AES256 GCM)\n");
+ return -EOPNOTSUPP;
+ }
+ } else if (ctxt->Ciphers[0] == 0) {
+ /*
+ * e.g. if server only supported AES256_CCM (very unlikely)
+ * or server supported no encryption types or had all disabled.
+ * Since GLOBAL_CAP_ENCRYPTION will be not set, in the case
+ * in which mount requested encryption ("seal") checks later
+ * on during tree connection will return proper rc, but if
+ * seal not requested by client, since server is allowed to
+ * return 0 to indicate no supported cipher, we can't fail here
+ */
+ server->cipher_type = 0;
+ server->capabilities &= ~SMB2_GLOBAL_CAP_ENCRYPTION;
+ pr_warn_once("Server does not support requested encryption types\n");
+ return 0;
+ } else if ((ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES128_CCM) &&
+ (ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES128_GCM) &&
+ (ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES256_GCM)) {
+ /* server returned a cipher we didn't ask for */
pr_warn_once("Invalid SMB3.11 cipher returned\n");
return -EINVAL;
}
@@ -1948,9 +1981,11 @@ smb2_parse_contexts(struct TCP_Server_Info *server,
unsigned int next;
unsigned int remaining;
char *name;
- const char smb3_create_tag_posix[] = {0x93, 0xAD, 0x25, 0x50, 0x9C,
- 0xB4, 0x11, 0xE7, 0xB4, 0x23, 0x83,
- 0xDE, 0x96, 0x8B, 0xCD, 0x7C};
+ static const char smb3_create_tag_posix[] = {
+ 0x93, 0xAD, 0x25, 0x50, 0x9C,
+ 0xB4, 0x11, 0xE7, 0xB4, 0x23, 0x83,
+ 0xDE, 0x96, 0x8B, 0xCD, 0x7C
+ };
*oplock = 0;
data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c3f1baf5bde2..171f54965703 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -128,8 +128,8 @@ struct smb2_sync_pdu {
__le16 StructureSize2; /* size of wct area (varies, request specific) */
} __packed;
-#define SMB3_AES128CCM_NONCE 11
-#define SMB3_AES128GCM_NONCE 12
+#define SMB3_AES_CCM_NONCE 11
+#define SMB3_AES_GCM_NONCE 12
/* Transform flags (for 3.0 dialect this flag indicates CCM */
#define TRANSFORM_FLAG_ENCRYPTED 0x0001
@@ -153,10 +153,14 @@ struct smb2_compression_transform_hdr {
} __packed;
/* See MS-SMB2 2.2.42.1 */
+#define SMB2_COMPRESSION_FLAG_NONE 0x0000
+#define SMB2_COMPRESSION_FLAG_CHAINED 0x0001
+
struct compression_payload_header {
- __le16 AlgorithmId;
- __le16 Reserved;
- __le32 Length;
+ __le16 CompressionAlgorithm;
+ __le16 Flags;
+ __le32 Length; /* length of compressed playload including field below if present */
+ /* __le32 OriginalPayloadSize; */ /* optional */
} __packed;
/* See MS-SMB2 2.2.42.2 */
@@ -167,6 +171,26 @@ struct compression_pattern_payload_v1 {
__le32 Repetitions;
} __packed;
+/* See MS-SMB2 2.2.43 */
+struct smb2_rdma_transform {
+ __le16 RdmaDescriptorOffset;
+ __le16 RdmaDescriptorLength;
+ __le32 Channel; /* for values see channel description in smb2 read above */
+ __le16 TransformCount;
+ __le16 Reserved1;
+ __le32 Reserved2;
+} __packed;
+
+struct smb2_rdma_encryption_transform {
+ __le16 TransformType;
+ __le16 SignatureLength;
+ __le16 NonceLength;
+ __u16 Reserved;
+ __u8 Signature[]; /* variable length */
+ /* u8 Nonce[] */
+ /* followed by padding */
+} __packed;
+
/*
* SMB2 flag definitions
*/
@@ -297,6 +321,9 @@ struct smb2_negotiate_req {
#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
#define SMB2_COMPRESSION_CAPABILITIES cpu_to_le16(3)
#define SMB2_NETNAME_NEGOTIATE_CONTEXT_ID cpu_to_le16(5)
+#define SMB2_TRANSPORT_CAPABILITIES cpu_to_le16(6)
+#define SMB2_RDMA_TRANSFORM_CAPABILITIES cpu_to_le16(7)
+#define SMB2_SIGNING_CAPABILITIES cpu_to_le16(8)
#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100)
struct smb2_neg_context {
@@ -325,6 +352,9 @@ struct smb2_preauth_neg_context {
/* Encryption Algorithms Ciphers */
#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002)
+/* we currently do not request AES256_CCM since presumably GCM faster */
+#define SMB2_ENCRYPTION_AES256_CCM cpu_to_le16(0x0003)
+#define SMB2_ENCRYPTION_AES256_GCM cpu_to_le16(0x0004)
/* Min encrypt context data is one cipher so 2 bytes + 2 byte count field */
#define MIN_ENCRYPT_CTXT_DATA_LEN 4
@@ -332,8 +362,9 @@ struct smb2_encryption_neg_context {
__le16 ContextType; /* 2 */
__le16 DataLength;
__le32 Reserved;
- __le16 CipherCount; /* AES-128-GCM and AES-128-CCM */
- __le16 Ciphers[2];
+ /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */
+ __le16 CipherCount; /* AES128-GCM and AES128-CCM by default */
+ __le16 Ciphers[3];
} __packed;
/* See MS-SMB2 2.2.3.1.3 */
@@ -351,10 +382,10 @@ struct smb2_encryption_neg_context {
struct smb2_compression_capabilities_context {
__le16 ContextType; /* 3 */
__le16 DataLength;
- __u32 Flags;
+ __u32 Reserved;
__le16 CompressionAlgorithmCount;
__u16 Padding;
- __u32 Reserved1;
+ __u32 Flags;
__le16 CompressionAlgorithms[3];
} __packed;
@@ -363,12 +394,44 @@ struct smb2_compression_capabilities_context {
* Its struct simply contains NetName, an array of Unicode characters
*/
struct smb2_netname_neg_context {
- __le16 ContextType; /* 0x100 */
+ __le16 ContextType; /* 5 */
__le16 DataLength;
__le32 Reserved;
__le16 NetName[]; /* hostname of target converted to UCS-2 */
} __packed;
+/*
+ * For rdma transform capabilities context see MS-SMB2 2.2.3.1.6
+ * and 2.2.4.1.5
+ */
+
+/* RDMA Transform IDs */
+#define SMB2_RDMA_TRANSFORM_NONE 0x0000
+#define SMB2_RDMA_TRANSFORM_ENCRYPTION 0x0001
+
+struct smb2_rdma_transform_capabilities_context {
+ __le16 ContextType; /* 7 */
+ __le16 DataLength;
+ __u32 Reserved;
+ __le16 TransformCount;
+ __u16 Reserved1;
+ __u32 Reserved2;
+ __le16 RDMATransformIds[1];
+} __packed;
+
+/* Signing algorithms */
+#define SIGNING_ALG_HMAC_SHA256 0
+#define SIGNING_ALG_AES_CMAC 1
+#define SIGNING_ALG_AES_GMAC 2
+
+struct smb2_signing_capabilities {
+ __le16 ContextType; /* 8 */
+ __le16 DataLength;
+ __u32 Reserved;
+ __le16 SigningAlgorithmCount;
+ __le16 SigningAlgorithms[];
+} __packed;
+
#define POSIX_CTXT_DATA_LEN 16
struct smb2_posix_neg_context {
__le16 ContextType; /* 0x100 */
@@ -1178,6 +1241,7 @@ struct smb2_flush_rsp {
#define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000)
#define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001) /* SMB3 or later */
#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002) /* >= SMB3.02 */
+#define SMB2_CHANNEL_RDMA_TRANSFORM cpu_to_le32(0x00000003) /* >= SMB3.02, only used on write */
/* SMB2 read request without RFC1001 length at the beginning */
struct smb2_read_plain_req {
@@ -1197,6 +1261,10 @@ struct smb2_read_plain_req {
__u8 Buffer[1];
} __packed;
+/* Read flags */
+#define SMB2_READFLAG_RESPONSE_NONE 0x00000000
+#define SMB2_READFLAG_RESPONSE_RDMA_TRANSFORM 0x00000001
+
struct smb2_read_rsp {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 17 */
@@ -1204,7 +1272,7 @@ struct smb2_read_rsp {
__u8 Reserved;
__le32 DataLength;
__le32 DataRemaining;
- __u32 Reserved2;
+ __u32 Flags;
__u8 Buffer[1];
} __packed;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 2f8ecbf54214..67c50d78caa1 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -70,7 +70,8 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid);
+ struct cifs_sb_info *cifs_sb,
+ struct cached_fid **cfid);
extern void close_shroot(struct cached_fid *cfid);
extern void close_shroot_lease(struct cached_fid *cfid);
extern void close_shroot_lease_locked(struct cached_fid *cfid);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index c0348e3b1695..ebccd71cc60a 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -849,12 +849,13 @@ smb3_crypto_aead_allocate(struct TCP_Server_Info *server)
struct crypto_aead *tfm;
if (!server->secmech.ccmaesencrypt) {
- if (server->cipher_type == SMB2_ENCRYPTION_AES128_GCM)
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
else
tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
if (IS_ERR(tfm)) {
- cifs_server_dbg(VFS, "%s: Failed to alloc encrypt aead\n",
+ cifs_server_dbg(VFS, "%s: Failed alloc encrypt aead\n",
__func__);
return PTR_ERR(tfm);
}
@@ -862,7 +863,8 @@ smb3_crypto_aead_allocate(struct TCP_Server_Info *server)
}
if (!server->secmech.ccmaesdecrypt) {
- if (server->cipher_type == SMB2_ENCRYPTION_AES128_GCM)
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
else
tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index eef4b08c7208..90e0fab69bb8 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -878,33 +878,39 @@ DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
DECLARE_EVENT_CLASS(smb3_credit_class,
TP_PROTO(__u64 currmid,
char *hostname,
- int credits),
- TP_ARGS(currmid, hostname, credits),
+ int credits,
+ int credits_to_add),
+ TP_ARGS(currmid, hostname, credits, credits_to_add),
TP_STRUCT__entry(
__field(__u64, currmid)
__field(char *, hostname)
__field(int, credits)
+ __field(int, credits_to_add)
),
TP_fast_assign(
__entry->currmid = currmid;
__entry->hostname = hostname;
__entry->credits = credits;
+ __entry->credits_to_add = credits_to_add;
),
- TP_printk("server=%s current_mid=0x%llx credits=%d",
+ TP_printk("server=%s current_mid=0x%llx credits=%d credits_to_add=%d",
__entry->hostname,
__entry->currmid,
- __entry->credits)
+ __entry->credits,
+ __entry->credits_to_add)
)
#define DEFINE_SMB3_CREDIT_EVENT(name) \
DEFINE_EVENT(smb3_credit_class, smb3_##name, \
TP_PROTO(__u64 currmid, \
char *hostname, \
- int credits), \
- TP_ARGS(currmid, hostname, credits))
+ int credits, \
+ int credits_to_add), \
+ TP_ARGS(currmid, hostname, credits, credits_to_add))
DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
DEFINE_SMB3_CREDIT_EVENT(credit_timeout);
+DEFINE_SMB3_CREDIT_EVENT(add_credits);
#endif /* _CIFS_TRACE_H */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index ac7632482736..e27e255d40dd 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -563,7 +563,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
cifs_num_waiters_dec(server);
if (!rc) {
trace_smb3_credit_timeout(server->CurrentMid,
- server->hostname, num_credits);
+ server->hostname, num_credits, 0);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
timeout);
return -ENOTSUPP;
@@ -604,7 +604,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
if (!rc) {
trace_smb3_credit_timeout(
server->CurrentMid,
- server->hostname, num_credits);
+ server->hostname, num_credits,
+ 0);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
timeout);
return -ENOTSUPP;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 912308600d39..4b90cfd1ec36 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -690,8 +690,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = 0;
buf->f_files = CRAMFS_SB(sb)->files;
buf->f_ffree = 0;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = CRAMFS_MAXPATHLEN;
return 0;
}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index a4a945d0ac6a..62b155b9366b 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -342,8 +342,7 @@ static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) {
sbi->inode_blocks *
(EFS_BLOCKSIZE / sizeof(struct efs_dinode));
buf->f_ffree = sbi->inode_free; /* free inodes */
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */
return 0;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index b9a09806512a..be10b16ea66e 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -561,8 +561,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EROFS_NAME_LEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 3ffdce5c7384..87be5bfc31eb 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -89,8 +89,7 @@ static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = sbi->num_clusters - 2; /* clu 0 & 1 */
buf->f_bfree = buf->f_blocks - sbi->used_clusters;
buf->f_bavail = buf->f_bfree;
- buf->f_fsid.val[0] = (unsigned int)id;
- buf->f_fsid.val[1] = (unsigned int)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
/* Unicode utf16 255 characters */
buf->f_namelen = EXFAT_MAX_FILE_LEN * NLS_MAX_CHARSET_SIZE;
return 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7fab2b3b5b39..09f1fe676972 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1455,8 +1455,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
buf->f_namelen = EXT2_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
- buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
- buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+ buf->f_fsid = u64_to_fsid(fsid);
spin_unlock(&sbi->s_lock);
return 0;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 03373471131c..2fe141ff3c7e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6144,8 +6144,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EXT4_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
- buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
- buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+ buf->f_fsid = u64_to_fsid(fsid);
#ifdef CONFIG_QUOTA
if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 0c958fed3392..00eff2f51807 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1442,8 +1442,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
}
buf->f_namelen = F2FS_NAME_LEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
#ifdef CONFIG_QUOTA
if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) &&
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a0cf99debb1e..bab9b202b496 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -836,8 +836,7 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = sbi->max_cluster - FAT_START_ENT;
buf->f_bfree = sbi->free_clusters;
buf->f_bavail = sbi->free_clusters;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen =
(sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
diff --git a/fs/file_table.c b/fs/file_table.c
index 656647f9575a..709ada3151da 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,7 +339,7 @@ void fput_many(struct file *file, unsigned int refs)
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
init_task_work(&file->f_u.fu_rcuhead, ____fput);
- if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
+ if (!task_work_add(task, &file->f_u.fu_rcuhead, TWA_RESUME))
return;
/*
* After this task has run exit_task_work(),
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index d4af283fc888..9cd2ecad07db 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -91,22 +91,13 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_SHIFT;
- unsigned offset;
+ struct iomap_writepage_ctx wpc = { };
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
goto out;
if (current->journal_info)
goto redirty;
- /* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_SIZE-1);
- if (page->index > end_index || (page->index == end_index && !offset)) {
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
- goto out;
- }
-
- return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
+ return iomap_writepage(page, wbc, &wpc, &gfs2_writeback_ops);
redirty:
redirty_page_for_writepage(wbc, page);
@@ -115,11 +106,16 @@ out:
return 0;
}
-/* This is the same as calling block_write_full_page, but it also
+/**
+ * gfs2_write_jdata_page - gfs2 jdata-specific version of block_write_full_page
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ * This is the same as calling block_write_full_page, but it also
* writes pages outside of i_size
*/
-static int gfs2_write_full_page(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc)
+static int gfs2_write_jdata_page(struct page *page,
+ struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
@@ -137,7 +133,7 @@ static int gfs2_write_full_page(struct page *page, get_block_t *get_block,
if (page->index == end_index && offset)
zero_user_segment(page, offset, PAGE_SIZE);
- return __block_write_full_page(inode, page, get_block, wbc,
+ return __block_write_full_page(inode, page, gfs2_get_block_noalloc, wbc,
end_buffer_async_write);
}
@@ -166,7 +162,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
}
gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
}
- return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
+ return gfs2_write_jdata_page(page, wbc);
}
/**
@@ -208,7 +204,8 @@ static int gfs2_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
- int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+ struct iomap_writepage_ctx wpc = { };
+ int ret;
/*
* Even if we didn't write any pages here, we might still be holding
@@ -216,9 +213,9 @@ static int gfs2_writepages(struct address_space *mapping,
* want balance_dirty_pages() to loop indefinitely trying to write out
* pages held in the ail that it can't find.
*/
+ ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
if (ret == 0)
set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
-
return ret;
}
@@ -470,12 +467,13 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
static int __gfs2_readpage(void *file, struct page *page)
{
- struct gfs2_inode *ip = GFS2_I(page->mapping->host);
- struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+ struct inode *inode = page->mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
- if (i_blocksize(page->mapping->host) == PAGE_SIZE &&
- !page_has_buffers(page)) {
+ if (!gfs2_is_jdata(ip) ||
+ (i_blocksize(inode) == PAGE_SIZE && !page_has_buffers(page))) {
error = iomap_readpage(page, &gfs2_iomap_ops);
} else if (gfs2_is_stuffed(ip)) {
error = stuffed_readpage(ip, page);
@@ -563,8 +561,12 @@ static void gfs2_readahead(struct readahead_control *rac)
struct inode *inode = rac->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- if (!gfs2_is_stuffed(ip))
+ if (gfs2_is_stuffed(ip))
+ ;
+ else if (gfs2_is_jdata(ip))
mpage_readahead(rac, gfs2_block_map);
+ else
+ iomap_readahead(rac, &gfs2_iomap_ops);
}
/**
@@ -621,7 +623,8 @@ out:
static int jdata_set_page_dirty(struct page *page)
{
- SetPageChecked(page);
+ if (current->journal_info)
+ SetPageChecked(page);
return __set_page_dirty_buffers(page);
}
@@ -663,8 +666,11 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (bd) {
if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
list_del_init(&bd->bd_list);
- else
+ else {
+ spin_lock(&sdp->sd_ail_lock);
gfs2_remove_from_journal(bh, REMOVE_JDATA);
+ spin_unlock(&sdp->sd_ail_lock);
+ }
}
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
@@ -736,7 +742,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
*/
gfs2_log_lock(sdp);
- spin_lock(&sdp->sd_ail_lock);
head = bh = page_buffers(page);
do {
if (atomic_read(&bh->b_count))
@@ -748,7 +753,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
goto cannot_release;
bh = bh->b_this_page;
} while(bh != head);
- spin_unlock(&sdp->sd_ail_lock);
head = bh = page_buffers(page);
do {
@@ -774,7 +778,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
return try_to_free_buffers(page);
cannot_release:
- spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
return 0;
}
@@ -784,12 +787,13 @@ static const struct address_space_operations gfs2_aops = {
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readahead = gfs2_readahead,
+ .set_page_dirty = iomap_set_page_dirty,
+ .releasepage = iomap_releasepage,
+ .invalidatepage = iomap_invalidatepage,
.bmap = gfs2_bmap,
- .invalidatepage = gfs2_invalidatepage,
- .releasepage = gfs2_releasepage,
.direct_IO = noop_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
+ .migratepage = iomap_migrate_page,
+ .is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
};
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0f69fbd4af66..8dff9cbd0a87 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -56,7 +56,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
u64 block, struct page *page)
{
struct inode *inode = &ip->i_inode;
- struct buffer_head *bh;
int release = 0;
if (!page || page->index) {
@@ -80,20 +79,21 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
SetPageUptodate(page);
}
- if (!page_has_buffers(page))
- create_empty_buffers(page, BIT(inode->i_blkbits),
- BIT(BH_Uptodate));
+ if (gfs2_is_jdata(ip)) {
+ struct buffer_head *bh;
- bh = page_buffers(page);
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, BIT(inode->i_blkbits),
+ BIT(BH_Uptodate));
- if (!buffer_mapped(bh))
- map_bh(bh, inode->i_sb, block);
+ bh = page_buffers(page);
+ if (!buffer_mapped(bh))
+ map_bh(bh, inode->i_sb, block);
- set_buffer_uptodate(bh);
- if (gfs2_is_jdata(ip))
+ set_buffer_uptodate(bh);
gfs2_trans_add_data(ip->i_gl, bh);
- else {
- mark_buffer_dirty(bh);
+ } else {
+ set_page_dirty(page);
gfs2_ordered_add_inode(ip);
}
@@ -1158,7 +1158,8 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
struct metapath mp = { .mp_aheight = 1, };
int ret;
- iomap->flags |= IOMAP_F_BUFFER_HEAD;
+ if (gfs2_is_jdata(ip))
+ iomap->flags |= IOMAP_F_BUFFER_HEAD;
trace_gfs2_iomap_start(ip, pos, length, flags);
if (gfs2_iomap_need_write_lock(flags)) {
@@ -1291,6 +1292,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
loff_t length = bh_map->b_size;
struct metapath mp = { .mp_aheight = 1, };
struct iomap iomap = { };
+ int flags = create ? IOMAP_WRITE : 0;
int ret;
clear_buffer_mapped(bh_map);
@@ -1298,15 +1300,14 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
clear_buffer_boundary(bh_map);
trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
- if (create) {
- ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
- if (!ret && iomap.type == IOMAP_HOLE)
+ ret = gfs2_iomap_get(inode, pos, length, flags, &iomap, &mp);
+ if (!ret && iomap.type == IOMAP_HOLE) {
+ if (create)
ret = gfs2_iomap_alloc(inode, &iomap, &mp);
- release_metapath(&mp);
- } else {
- ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
- release_metapath(&mp);
+ else
+ ret = -ENODATA;
}
+ release_metapath(&mp);
if (ret)
goto out;
@@ -2518,3 +2519,26 @@ out:
gfs2_trans_end(sdp);
return error;
}
+
+static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
+ loff_t offset)
+{
+ struct metapath mp = { .mp_aheight = 1, };
+ int ret;
+
+ if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
+ return -EIO;
+
+ if (offset >= wpc->iomap.offset &&
+ offset < wpc->iomap.offset + wpc->iomap.length)
+ return 0;
+
+ memset(&wpc->iomap, 0, sizeof(wpc->iomap));
+ ret = gfs2_iomap_get(inode, offset, INT_MAX, 0, &wpc->iomap, &mp);
+ release_metapath(&mp);
+ return ret;
+}
+
+const struct iomap_writeback_ops gfs2_writeback_ops = {
+ .map_blocks = gfs2_map_blocks,
+};
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index b88fd45ab79f..aed4632d47d3 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -44,6 +44,7 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
}
extern const struct iomap_ops gfs2_iomap_ops;
+extern const struct iomap_writeback_ops gfs2_writeback_ops;
extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
extern int gfs2_block_map(struct inode *inode, sector_t lblock,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f13b136654ca..5441c17562c5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -270,7 +270,12 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
- GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp));
+ if (mapping) {
+ truncate_inode_pages_final(mapping);
+ if (!gfs2_withdrawn(sdp))
+ GLOCK_BUG_ON(gl, mapping->nrpages ||
+ mapping->nrexceptional);
+ }
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
@@ -453,9 +458,6 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
else
gl->gl_lockref.count--;
}
- if (held1 && held2 && list_empty(&gl->gl_holders))
- clear_bit(GLF_QUEUED, &gl->gl_flags);
-
if (new_state != gl->gl_target)
/* shorten our minimum hold time */
gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
@@ -1049,7 +1051,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_object = NULL;
gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
- INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
+ if (gl->gl_name.ln_type == LM_TYPE_IOPEN)
+ INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
mapping = gfs2_glock2aspace(gl);
if (mapping) {
@@ -1345,7 +1348,6 @@ fail:
if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
insert_pt = &gh2->gh_list;
}
- set_bit(GLF_QUEUED, &gl->gl_flags);
trace_gfs2_glock_queue(gh, 1);
gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
@@ -1646,16 +1648,15 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
unsigned long now = jiffies;
gfs2_glock_hold(gl);
+ spin_lock(&gl->gl_lockref.lock);
holdtime = gl->gl_tchange + gl->gl_hold_time;
- if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
+ if (!list_empty(&gl->gl_holders) &&
gl->gl_name.ln_type == LM_TYPE_INODE) {
if (time_before(now, holdtime))
delay = holdtime - now;
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
delay = gl->gl_hold_time;
}
-
- spin_lock(&gl->gl_lockref.lock);
handle_callback(gl, state, delay, true);
__gfs2_glock_queue_work(gl, delay);
spin_unlock(&gl->gl_lockref.lock);
@@ -1842,10 +1843,9 @@ static struct shrinker glock_shrinker = {
};
/**
- * examine_bucket - Call a function for glock in a hash bucket
+ * glock_hash_walk - Call a function for glock in a hash bucket
* @examiner: the function
* @sdp: the filesystem
- * @bucket: the bucket
*
* Note that the function can be called multiple times on the same
* object. So the user must ensure that the function can cope with
@@ -1901,9 +1901,11 @@ bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
static void flush_delete_work(struct gfs2_glock *gl)
{
- if (cancel_delayed_work(&gl->gl_delete)) {
- queue_delayed_work(gfs2_delete_workqueue,
- &gl->gl_delete, 0);
+ if (gl->gl_name.ln_type == LM_TYPE_IOPEN) {
+ if (cancel_delayed_work(&gl->gl_delete)) {
+ queue_delayed_work(gfs2_delete_workqueue,
+ &gl->gl_delete, 0);
+ }
}
gfs2_glock_queue_work(gl, 0);
}
@@ -2100,7 +2102,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'I';
if (test_bit(GLF_FROZEN, gflags))
*p++ = 'F';
- if (test_bit(GLF_QUEUED, gflags))
+ if (!list_empty(&gl->gl_holders))
*p++ = 'q';
if (test_bit(GLF_LRU, gflags))
*p++ = 'L';
@@ -2415,7 +2417,7 @@ static const struct seq_operations gfs2_glstats_seq_ops = {
.show = gfs2_glstats_seq_show,
};
-static const struct seq_operations gfs2_sbstats_seq_ops = {
+static const struct seq_operations gfs2_sbstats_sops = {
.start = gfs2_sbstats_seq_start,
.next = gfs2_sbstats_seq_next,
.stop = gfs2_sbstats_seq_stop,
@@ -2468,16 +2470,6 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
}
-static int gfs2_sbstats_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &gfs2_sbstats_seq_ops);
- if (ret == 0) {
- struct seq_file *seq = file->private_data;
- seq->private = inode->i_private; /* sdp */
- }
- return ret;
-}
-
static const struct file_operations gfs2_glocks_fops = {
.owner = THIS_MODULE,
.open = gfs2_glocks_open,
@@ -2494,13 +2486,7 @@ static const struct file_operations gfs2_glstats_fops = {
.release = gfs2_glocks_release,
};
-static const struct file_operations gfs2_sbstats_fops = {
- .owner = THIS_MODULE,
- .open = gfs2_sbstats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats);
void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index de1d5f1d9ff8..aa3f5236befb 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -178,6 +178,9 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
+ const unsigned bsize = sdp->sd_sb.sb_bsize;
+ loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
int error;
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
@@ -186,18 +189,13 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_RGRP_GO_SYNC);
- filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
- error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
- WARN_ON_ONCE(error);
+ filemap_fdatawrite_range(mapping, start, end);
+ error = filemap_fdatawait_range(mapping, start, end);
+ WARN_ON_ONCE(error && !gfs2_withdrawn(sdp));
mapping_set_error(mapping, error);
if (!error)
error = gfs2_ail_empty_gl(gl);
-
- spin_lock(&gl->gl_lockref.lock);
- rgd = gl->gl_object;
- if (rgd)
- gfs2_free_clones(rgd);
- spin_unlock(&gl->gl_lockref.lock);
+ gfs2_free_clones(rgd);
return error;
}
@@ -216,15 +214,23 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
+ const unsigned bsize = sdp->sd_sb.sb_bsize;
+ loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
- if (rgd)
- gfs2_rgrp_brelse(rgd);
-
+ gfs2_rgrp_brelse(rgd);
WARN_ON_ONCE(!(flags & DIO_METADATA));
- truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
+ truncate_inode_pages_range(mapping, start, end);
+ rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+}
+
+static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
+{
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
if (rgd)
- rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+ gfs2_rgrp_dump(seq, rgd, fs_id_buf);
}
static struct gfs2_inode *gfs2_glock2inode(struct gfs2_glock *gl)
@@ -712,7 +718,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_sync = rgrp_go_sync,
.go_inval = rgrp_go_inval,
.go_lock = gfs2_rgrp_go_lock,
- .go_dump = gfs2_rgrp_dump,
+ .go_dump = gfs2_rgrp_go_dump,
.go_type = LM_TYPE_RGRP,
.go_flags = GLOF_LVB,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index ca2ec02436ec..d7707307f4b1 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -41,6 +41,10 @@ struct gfs2_log_header_host {
u32 lh_flags; /* GFS2_LOG_HEAD_... */
u32 lh_tail; /* Block number of log tail */
u32 lh_blkno;
+
+ s64 lh_local_total;
+ s64 lh_local_free;
+ s64 lh_local_dinodes;
};
/*
@@ -340,7 +344,6 @@ enum {
GLF_REPLY_PENDING = 9,
GLF_INITIAL = 10,
GLF_FROZEN = 11,
- GLF_QUEUED = 12,
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
@@ -378,17 +381,10 @@ struct gfs2_glock {
atomic_t gl_ail_count;
atomic_t gl_revokes;
struct delayed_work gl_work;
- union {
- /* For iopen glocks only */
- struct {
- struct delayed_work gl_delete;
- u64 gl_no_formal_ino;
- };
- /* For rgrp glocks only */
- struct {
- loff_t start;
- loff_t end;
- } gl_vm;
+ /* For iopen glocks only */
+ struct {
+ struct delayed_work gl_delete;
+ u64 gl_no_formal_ino;
};
struct rcu_head gl_rcu;
struct rhash_head gl_node;
@@ -701,10 +697,18 @@ struct gfs2_pcpu_lkstats {
struct gfs2_lkstats lkstats[10];
};
+/* List of local (per node) statfs inodes */
+struct local_statfs_inode {
+ struct list_head si_list;
+ struct inode *si_sc_inode;
+ unsigned int si_jid; /* journal id this statfs inode corresponds to */
+};
+
struct gfs2_sbd {
struct super_block *sd_vfs;
struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
struct kobject sd_kobj;
+ struct completion sd_kobj_unregister;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
@@ -751,6 +755,7 @@ struct gfs2_sbd {
struct inode *sd_jindex;
struct inode *sd_statfs_inode;
struct inode *sd_sc_inode;
+ struct list_head sd_sc_inodes_list;
struct inode *sd_qc_inode;
struct inode *sd_rindex;
struct inode *sd_quota_inode;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 3763c9ff1406..9133b3178677 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -70,7 +70,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
*
*/
-static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
{
bd->bd_tr = NULL;
list_del_init(&bd->bd_ail_st_list);
@@ -244,13 +244,15 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
* @tr: the transaction
* @max_revokes: If nonzero, issue revokes for the bd items for written buffers
*
+ * returns: the transaction's count of remaining active items
*/
-static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
int *max_revokes)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
+ int active_count = 0;
list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
bd_ail_st_list) {
@@ -265,8 +267,10 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
* If the ail buffer is not busy and caught an error, flag it
* for others.
*/
- if (!sdp->sd_log_error && buffer_busy(bh))
+ if (!sdp->sd_log_error && buffer_busy(bh)) {
+ active_count++;
continue;
+ }
if (!buffer_uptodate(bh) &&
!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
gfs2_io_error_bh(sdp, bh);
@@ -285,6 +289,7 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
}
list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
}
+ return active_count;
}
/**
@@ -303,8 +308,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
- gfs2_ail1_empty_one(sdp, tr, &max_revokes);
- if (list_empty(&tr->tr_ail1_list) && oldest_tr)
+ if (!gfs2_ail1_empty_one(sdp, tr, &max_revokes) && oldest_tr)
list_move(&tr->tr_list, &sdp->sd_ail2_list);
else
oldest_tr = 0;
@@ -716,16 +720,24 @@ void gfs2_write_revokes(struct gfs2_sbd *sdp)
atomic_dec(&sdp->sd_log_blks_free);
/* If no blocks have been reserved, we need to also
* reserve a block for the header */
- if (!sdp->sd_log_blks_reserved)
+ if (!sdp->sd_log_blks_reserved) {
atomic_dec(&sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, -2);
+ } else {
+ trace_gfs2_log_blocks(sdp, -1);
+ }
}
gfs2_ail1_empty(sdp, max_revokes);
gfs2_log_unlock(sdp);
if (!sdp->sd_log_num_revoke) {
atomic_inc(&sdp->sd_log_blks_free);
- if (!sdp->sd_log_blks_reserved)
+ if (!sdp->sd_log_blks_reserved) {
atomic_inc(&sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, 2);
+ } else {
+ trace_gfs2_log_blocks(sdp, 1);
+ }
}
}
@@ -902,7 +914,7 @@ static void empty_ail1_list(struct gfs2_sbd *sdp)
}
/**
- * drain_bd - drain the buf and databuf queue for a failed transaction
+ * trans_drain - drain the buf and databuf queue for a failed transaction
* @tr: the transaction to drain
*
* When this is called, we're taking an error exit for a log write that failed
@@ -954,10 +966,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
goto out;
/* Log might have been flushed while we waited for the flush lock */
- if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
- up_write(&sdp->sd_log_flush_lock);
- return;
- }
+ if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
+ goto out;
trace_gfs2_log_flush(sdp, 1, flags);
if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
@@ -971,25 +981,25 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
if (unlikely (state == SFS_FROZEN))
if (gfs2_assert_withdraw_delayed(sdp,
!tr->tr_num_buf_new && !tr->tr_num_databuf_new))
- goto out;
+ goto out_withdraw;
}
if (unlikely(state == SFS_FROZEN))
if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
- goto out;
+ goto out_withdraw;
if (gfs2_assert_withdraw_delayed(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
- goto out;
+ goto out_withdraw;
gfs2_ordered_write(sdp);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
lops_before_commit(sdp, tr);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
@@ -1000,7 +1010,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
log_write_header(sdp, flags);
}
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
lops_after_commit(sdp, tr);
gfs2_log_lock(sdp);
@@ -1020,7 +1030,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
if (!sdp->sd_log_idle) {
empty_ail1_list(sdp);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, flags);
@@ -1033,27 +1043,30 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
}
-out:
- if (gfs2_withdrawn(sdp)) {
- trans_drain(tr);
- /**
- * If the tr_list is empty, we're withdrawing during a log
- * flush that targets a transaction, but the transaction was
- * never queued onto any of the ail lists. Here we add it to
- * ail1 just so that ail_drain() will find and free it.
- */
- spin_lock(&sdp->sd_ail_lock);
- if (tr && list_empty(&tr->tr_list))
- list_add(&tr->tr_list, &sdp->sd_ail1_list);
- spin_unlock(&sdp->sd_ail_lock);
- ail_drain(sdp); /* frees all transactions */
- tr = NULL;
- }
-
+out_end:
trace_gfs2_log_flush(sdp, 0, flags);
+out:
up_write(&sdp->sd_log_flush_lock);
-
gfs2_trans_free(sdp, tr);
+ if (gfs2_withdrawing(sdp))
+ gfs2_withdraw(sdp);
+ return;
+
+out_withdraw:
+ trans_drain(tr);
+ /**
+ * If the tr_list is empty, we're withdrawing during a log
+ * flush that targets a transaction, but the transaction was
+ * never queued onto any of the ail lists. Here we add it to
+ * ail1 just so that ail_drain() will find and free it.
+ */
+ spin_lock(&sdp->sd_ail_lock);
+ if (tr && list_empty(&tr->tr_list))
+ list_add(&tr->tr_list, &sdp->sd_ail1_list);
+ spin_unlock(&sdp->sd_ail_lock);
+ ail_drain(sdp); /* frees all transactions */
+ tr = NULL;
+ goto out_end;
}
/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 8965c751a303..79f97290146e 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -63,7 +63,7 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
-
+extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ed1da4323967..ed69298dd824 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -823,7 +823,7 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
*
*/
-static void gfs2_meta_sync(struct gfs2_glock *gl)
+void gfs2_meta_sync(struct gfs2_glock *gl)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 9c5e4e491e03..4a3d8aecdf82 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -27,6 +27,7 @@ extern void gfs2_log_submit_bio(struct bio **biop, int opf);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, bool keep_cache);
+extern void gfs2_meta_sync(struct gfs2_glock *gl);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 9856cc2e0795..2db573e31f78 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -348,38 +348,109 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
brelse(bh);
}
if (bd) {
- spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) {
gfs2_trans_add_revoke(sdp, bd);
} else if (was_pinned) {
bh->b_private = NULL;
kmem_cache_free(gfs2_bufdata_cachep, bd);
+ } else if (!list_empty(&bd->bd_ail_st_list) &&
+ !list_empty(&bd->bd_ail_gl_list)) {
+ gfs2_remove_from_ail(bd);
}
- spin_unlock(&sdp->sd_ail_lock);
}
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
}
/**
- * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
+ * gfs2_ail1_wipe - remove deleted/freed buffers from the ail1 list
+ * @sdp: superblock
+ * @bstart: starting block address of buffers to remove
+ * @blen: length of buffers to be removed
+ *
+ * This function is called from gfs2_journal wipe, whose job is to remove
+ * buffers, corresponding to deleted blocks, from the journal. If we find any
+ * bufdata elements on the system ail1 list, they haven't been written to
+ * the journal yet. So we remove them.
+ */
+static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen)
+{
+ struct gfs2_trans *tr, *s;
+ struct gfs2_bufdata *bd, *bs;
+ struct buffer_head *bh;
+ u64 end = bstart + blen;
+
+ gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
+ list_for_each_entry_safe(tr, s, &sdp->sd_ail1_list, tr_list) {
+ list_for_each_entry_safe(bd, bs, &tr->tr_ail1_list,
+ bd_ail_st_list) {
+ bh = bd->bd_bh;
+ if (bh->b_blocknr < bstart || bh->b_blocknr >= end)
+ continue;
+
+ gfs2_remove_from_journal(bh, REMOVE_JDATA);
+ }
+ }
+ spin_unlock(&sdp->sd_ail_lock);
+ gfs2_log_unlock(sdp);
+}
+
+static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno)
+{
+ struct address_space *mapping = ip->i_inode.i_mapping;
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct page *page;
+ struct buffer_head *bh;
+ unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
+ unsigned long index = blkno >> shift; /* convert block to page */
+ unsigned int bufnum = blkno - (index << shift);
+
+ page = find_get_page_flags(mapping, index, FGP_LOCK|FGP_ACCESSED);
+ if (!page)
+ return NULL;
+ if (!page_has_buffers(page)) {
+ unlock_page(page);
+ put_page(page);
+ return NULL;
+ }
+ /* Locate header for our buffer within our page */
+ for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
+ /* Do nothing */;
+ get_bh(bh);
+ unlock_page(page);
+ put_page(page);
+ return bh;
+}
+
+/**
+ * gfs2_journal_wipe - make inode's buffers so they aren't dirty/pinned anymore
* @ip: the inode who owns the buffers
* @bstart: the first buffer in the run
* @blen: the number of buffers in the run
*
*/
-void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *bh;
+ int ty;
+ gfs2_ail1_wipe(sdp, bstart, blen);
while (blen) {
+ ty = REMOVE_META;
bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
+ if (!bh && gfs2_is_jdata(ip)) {
+ bh = gfs2_getjdatabuf(ip, bstart);
+ ty = REMOVE_JDATA;
+ }
if (bh) {
lock_buffer(bh);
gfs2_log_lock(sdp);
- gfs2_remove_from_journal(bh, REMOVE_META);
+ spin_lock(&sdp->sd_ail_lock);
+ gfs2_remove_from_journal(bh, ty);
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
brelse(bh);
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index eafb74e861c6..4a8c01929b79 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -60,7 +60,7 @@ enum {
};
extern void gfs2_remove_from_journal(struct buffer_head *bh, int meta);
-extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head **bhp);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6d18d2c91add..7a7e3c10a9a9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -110,6 +110,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
spin_lock_init(&sdp->sd_trunc_lock);
spin_lock_init(&sdp->sd_bitmap_lock);
+ INIT_LIST_HEAD(&sdp->sd_sc_inodes_list);
+
mapping = &sdp->sd_aspace;
address_space_init_once(mapping);
@@ -169,15 +171,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
+ if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+ sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ return -EINVAL;
+ }
- fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ (sb->sb_bsize & (sb->sb_bsize - 1))) {
+ pr_warn("Invalid superblock size\n");
+ return -EINVAL;
+ }
- return -EINVAL;
+ return 0;
}
static void end_bio_io_page(struct bio *bio)
@@ -604,6 +610,90 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
return error;
}
+/**
+ * init_statfs - look up and initialize master and local (per node) statfs inodes
+ * @sdp: The GFS2 superblock
+ *
+ * This should be called after the jindex is initialized in init_journal() and
+ * before gfs2_journal_recovery() is called because we need to be able to write
+ * to these inodes during recovery.
+ *
+ * Returns: errno
+ */
+static int init_statfs(struct gfs2_sbd *sdp)
+{
+ int error = 0;
+ struct inode *master = d_inode(sdp->sd_master_dir);
+ struct inode *pn = NULL;
+ char buf[30];
+ struct gfs2_jdesc *jd;
+ struct gfs2_inode *ip;
+
+ sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
+ if (IS_ERR(sdp->sd_statfs_inode)) {
+ error = PTR_ERR(sdp->sd_statfs_inode);
+ fs_err(sdp, "can't read in statfs inode: %d\n", error);
+ goto fail;
+ }
+
+ pn = gfs2_lookup_simple(master, "per_node");
+ if (IS_ERR(pn)) {
+ error = PTR_ERR(pn);
+ fs_err(sdp, "can't find per_node directory: %d\n", error);
+ goto put_statfs;
+ }
+
+ /* For each jid, lookup the corresponding local statfs inode in the
+ * per_node metafs directory and save it in the sdp->sd_sc_inodes_list. */
+ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+ struct local_statfs_inode *lsi =
+ kmalloc(sizeof(struct local_statfs_inode), GFP_NOFS);
+ if (!lsi) {
+ error = -ENOMEM;
+ goto free_local;
+ }
+ sprintf(buf, "statfs_change%u", jd->jd_jid);
+ lsi->si_sc_inode = gfs2_lookup_simple(pn, buf);
+ if (IS_ERR(lsi->si_sc_inode)) {
+ error = PTR_ERR(lsi->si_sc_inode);
+ fs_err(sdp, "can't find local \"sc\" file#%u: %d\n",
+ jd->jd_jid, error);
+ goto free_local;
+ }
+ lsi->si_jid = jd->jd_jid;
+ if (jd->jd_jid == sdp->sd_jdesc->jd_jid)
+ sdp->sd_sc_inode = lsi->si_sc_inode;
+
+ list_add_tail(&lsi->si_list, &sdp->sd_sc_inodes_list);
+ }
+
+ iput(pn);
+ ip = GFS2_I(sdp->sd_sc_inode);
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
+ &sdp->sd_sc_gh);
+ if (error) {
+ fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
+ goto free_local;
+ }
+ return 0;
+
+free_local:
+ free_local_statfs_inodes(sdp);
+ iput(pn);
+put_statfs:
+ iput(sdp->sd_statfs_inode);
+fail:
+ return error;
+}
+
+/* Uninitialize and free up memory used by the list of statfs inodes */
+static void uninit_statfs(struct gfs2_sbd *sdp)
+{
+ gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+ free_local_statfs_inodes(sdp);
+ iput(sdp->sd_statfs_inode);
+}
+
static int init_journal(struct gfs2_sbd *sdp, int undo)
{
struct inode *master = d_inode(sdp->sd_master_dir);
@@ -690,6 +780,11 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
}
trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
+ /* Lookup statfs inodes here so journal recovery can use them. */
+ error = init_statfs(sdp);
+ if (error)
+ goto fail_jinode_gh;
+
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
for (x = 0; x < sdp->sd_journals; x++) {
@@ -698,14 +793,14 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
if (sdp->sd_args.ar_spectator) {
error = check_journal_clean(sdp, jd, true);
if (error)
- goto fail_jinode_gh;
+ goto fail_statfs;
continue;
}
error = gfs2_recover_journal(jd, true);
if (error) {
fs_err(sdp, "error recovering journal %u: %d\n",
x, error);
- goto fail_jinode_gh;
+ goto fail_statfs;
}
}
@@ -714,7 +809,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
error = gfs2_recover_journal(sdp->sd_jdesc, true);
if (error) {
fs_err(sdp, "error recovering my journal: %d\n", error);
- goto fail_jinode_gh;
+ goto fail_statfs;
}
}
@@ -725,6 +820,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func);
return 0;
+fail_statfs:
+ uninit_statfs(sdp);
fail_jinode_gh:
/* A withdraw may have done dq/uninit so now we need to check it */
if (!sdp->sd_args.ar_spectator &&
@@ -758,20 +855,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
if (error)
goto fail;
- /* Read in the master statfs inode */
- sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
- if (IS_ERR(sdp->sd_statfs_inode)) {
- error = PTR_ERR(sdp->sd_statfs_inode);
- fs_err(sdp, "can't read in statfs inode: %d\n", error);
- goto fail_journal;
- }
-
/* Read in the resource index inode */
sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
if (IS_ERR(sdp->sd_rindex)) {
error = PTR_ERR(sdp->sd_rindex);
fs_err(sdp, "can't get resource index inode: %d\n", error);
- goto fail_statfs;
+ goto fail_journal;
}
sdp->sd_rindex_uptodate = 0;
@@ -800,8 +889,6 @@ fail_qinode:
fail_rindex:
gfs2_clear_rgrpd(sdp);
iput(sdp->sd_rindex);
-fail_statfs:
- iput(sdp->sd_statfs_inode);
fail_journal:
init_journal(sdp, UNDO);
fail:
@@ -829,14 +916,6 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
return error;
}
- sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
- sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
- if (IS_ERR(sdp->sd_sc_inode)) {
- error = PTR_ERR(sdp->sd_sc_inode);
- fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
- goto fail;
- }
-
sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
if (IS_ERR(sdp->sd_qc_inode)) {
@@ -848,33 +927,21 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
iput(pn);
pn = NULL;
- ip = GFS2_I(sdp->sd_sc_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
- &sdp->sd_sc_gh);
- if (error) {
- fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
- goto fail_qc_i;
- }
-
ip = GFS2_I(sdp->sd_qc_inode);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
&sdp->sd_qc_gh);
if (error) {
fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
- goto fail_ut_gh;
+ goto fail_qc_i;
}
return 0;
fail_qc_gh:
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
-fail_ut_gh:
- gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
fail_qc_i:
iput(sdp->sd_qc_inode);
fail_ut_i:
- iput(sdp->sd_sc_inode);
-fail:
iput(pn);
return error;
}
@@ -1062,26 +1129,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
}
error = init_names(sdp, silent);
- if (error) {
- /* In this case, we haven't initialized sysfs, so we have to
- manually free the sdp. */
- free_sbd(sdp);
- sb->s_fs_info = NULL;
- return error;
- }
+ if (error)
+ goto fail_free;
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
error = gfs2_sys_fs_add(sdp);
- /*
- * If we hit an error here, gfs2_sys_fs_add will have called function
- * kobject_put which causes the sysfs usage count to go to zero, which
- * causes sysfs to call function gfs2_sbd_release, which frees sdp.
- * Subsequent error paths here will call gfs2_sys_fs_del, which also
- * kobject_put to free sdp.
- */
if (error)
- return error;
+ goto fail_free;
gfs2_create_debugfs_file(sdp);
@@ -1179,9 +1234,9 @@ fail_lm:
gfs2_lm_unmount(sdp);
fail_debug:
gfs2_delete_debugfs_file(sdp);
- /* gfs2_sys_fs_del must be the last thing we do, since it causes
- * sysfs to call function gfs2_sbd_release, which frees sdp. */
gfs2_sys_fs_del(sdp);
+fail_free:
+ free_sbd(sdp);
sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 390ea79d682c..b5cbe21efdfb 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -144,6 +144,10 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
head->lh_tail = be32_to_cpu(lh->lh_tail);
head->lh_blkno = be32_to_cpu(lh->lh_blkno);
+ head->lh_local_total = be64_to_cpu(lh->lh_local_total);
+ head->lh_local_free = be64_to_cpu(lh->lh_local_free);
+ head->lh_local_dinodes = be64_to_cpu(lh->lh_local_dinodes);
+
return 0;
}
/**
@@ -292,6 +296,109 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
}
+/**
+ * update_statfs_inode - Update the master statfs inode or zero out the local
+ * statfs inode for a given journal.
+ * @jd: The journal
+ * @head: If NULL, @inode is the local statfs inode and we need to zero it out.
+ * Otherwise, it @head contains the statfs change info that needs to be
+ * synced to the master statfs inode (pointed to by @inode).
+ * @inode: statfs inode to update.
+ */
+static int update_statfs_inode(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head,
+ struct inode *inode)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_inode *ip;
+ struct buffer_head *bh;
+ struct gfs2_statfs_change_host sc;
+ int error = 0;
+
+ BUG_ON(!inode);
+ ip = GFS2_I(inode);
+
+ error = gfs2_meta_inode_buffer(ip, &bh);
+ if (error)
+ goto out;
+
+ spin_lock(&sdp->sd_statfs_spin);
+
+ if (head) { /* Update the master statfs inode */
+ gfs2_statfs_change_in(&sc, bh->b_data + sizeof(struct gfs2_dinode));
+ sc.sc_total += head->lh_local_total;
+ sc.sc_free += head->lh_local_free;
+ sc.sc_dinodes += head->lh_local_dinodes;
+ gfs2_statfs_change_out(&sc, bh->b_data + sizeof(struct gfs2_dinode));
+
+ fs_info(sdp, "jid=%u: Updated master statfs Total:%lld, "
+ "Free:%lld, Dinodes:%lld after change "
+ "[%+lld,%+lld,%+lld]\n", jd->jd_jid, sc.sc_total,
+ sc.sc_free, sc.sc_dinodes, head->lh_local_total,
+ head->lh_local_free, head->lh_local_dinodes);
+ } else { /* Zero out the local statfs inode */
+ memset(bh->b_data + sizeof(struct gfs2_dinode), 0,
+ sizeof(struct gfs2_statfs_change));
+ /* If it's our own journal, reset any in-memory changes too */
+ if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
+ memset(&sdp->sd_statfs_local, 0,
+ sizeof(struct gfs2_statfs_change_host));
+ }
+ }
+ spin_unlock(&sdp->sd_statfs_spin);
+
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ gfs2_meta_sync(ip->i_gl);
+
+out:
+ return error;
+}
+
+/**
+ * recover_local_statfs - Update the master and local statfs changes for this
+ * journal.
+ *
+ * Previously, statfs updates would be read in from the local statfs inode and
+ * synced to the master statfs inode during recovery.
+ *
+ * We now use the statfs updates in the journal head to update the master statfs
+ * inode instead of reading in from the local statfs inode. To preserve backward
+ * compatibility with kernels that can't do this, we still need to keep the
+ * local statfs inode up to date by writing changes to it. At some point in the
+ * future, we can do away with the local statfs inodes altogether and keep the
+ * statfs changes solely in the journal.
+ *
+ * @jd: the journal
+ * @head: the journal head
+ *
+ * Returns: errno
+ */
+static void recover_local_statfs(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head)
+{
+ int error;
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+
+ if (!head->lh_local_total && !head->lh_local_free
+ && !head->lh_local_dinodes) /* No change */
+ goto zero_local;
+
+ /* First update the master statfs inode with the changes we
+ * found in the journal. */
+ error = update_statfs_inode(jd, head, sdp->sd_statfs_inode);
+ if (error)
+ goto out;
+
+zero_local:
+ /* Zero out the local statfs inode so any changes in there
+ * are not re-recovered. */
+ error = update_statfs_inode(jd, NULL,
+ find_local_statfs_inode(sdp, jd->jd_jid));
+out:
+ return;
+}
+
void gfs2_recover_func(struct work_struct *work)
{
struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
@@ -411,6 +518,7 @@ void gfs2_recover_func(struct work_struct *work)
goto fail_gunlock_thaw;
}
+ recover_local_statfs(jd, &head);
clean_journal(jd, &head);
up_read(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 074f228ea839..ee491bb9c1cc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -878,7 +878,6 @@ static int rgd_insert(struct gfs2_rgrpd *rgd)
static int read_rindex_entry(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- const unsigned bsize = sdp->sd_sb.sb_bsize;
loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
struct gfs2_rindex buf;
int error;
@@ -924,9 +923,6 @@ static int read_rindex_entry(struct gfs2_inode *ip)
spin_unlock(&sdp->sd_rindex_spin);
if (!error) {
glock_set_object(rgd->rd_gl, rgd);
- rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
- rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
- rgd->rd_length) * bsize) - 1;
return 0;
}
@@ -2209,20 +2205,17 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
/**
* gfs2_rgrp_dump - print out an rgrp
* @seq: The iterator
- * @gl: The glock in question
+ * @rgd: The rgrp in question
* @fs_id_buf: pointer to file system id (if requested)
*
*/
-void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
const char *fs_id_buf)
{
- struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
const struct rb_node *n;
- if (rgd == NULL)
- return;
gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
@@ -2253,7 +2246,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
(unsigned long long)rgd->rd_addr);
fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
- gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
+ gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
rgd->rd_flags |= GFS2_RDF_ERROR;
}
@@ -2445,8 +2438,8 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
/* Directories keep their data in the metadata address space */
- if (meta || ip->i_depth)
- gfs2_meta_wipe(ip, bstart, blen);
+ if (meta || ip->i_depth || gfs2_is_jdata(ip))
+ gfs2_journal_wipe(ip, bstart, blen);
}
/**
@@ -2502,7 +2495,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
gfs2_statfs_change(sdp, 0, +1, -1);
trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
- gfs2_meta_wipe(ip, ip->i_no_addr, 1);
+ gfs2_journal_wipe(ip, ip->i_no_addr, 1);
}
/**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index a1d7e14fc55b..9a587ada51ed 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -67,7 +67,7 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
const char *fs_id_buf);
extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
struct buffer_head *bh,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9f4d9e7be839..b285192bd6b3 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -44,6 +44,12 @@
#include "xattr.h"
#include "lops.h"
+enum dinode_demise {
+ SHOULD_DELETE_DINODE,
+ SHOULD_NOT_DELETE_DINODE,
+ SHOULD_DEFER_EVICTION,
+};
+
/**
* gfs2_jindex_free - Clear all the journal index information
* @sdp: The GFS2 superblock
@@ -224,7 +230,7 @@ void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
}
-static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
{
struct gfs2_statfs_change *str = buf;
@@ -702,6 +708,8 @@ restart:
if (error)
gfs2_io_error(sdp);
}
+ WARN_ON(gfs2_withdrawing(sdp));
+
/* At this point, we're through modifying the disk */
/* Release stuff */
@@ -721,7 +729,7 @@ restart:
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
- iput(sdp->sd_sc_inode);
+ free_local_statfs_inodes(sdp);
iput(sdp->sd_qc_inode);
}
@@ -736,6 +744,7 @@ restart:
/* At this point, we're through participating in the lockspace */
gfs2_sys_fs_del(sdp);
+ free_sbd(sdp);
}
/**
@@ -1309,109 +1318,98 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
}
/**
- * gfs2_evict_inode - Remove an inode from cache
+ * evict_should_delete - determine whether the inode is eligible for deletion
* @inode: The inode to evict
*
- * There are three cases to consider:
- * 1. i_nlink == 0, we are final opener (and must deallocate)
- * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
- * 3. i_nlink > 0
- *
- * If the fs is read only, then we have to treat all cases as per #3
- * since we are unable to do any deallocation. The inode will be
- * deallocated by the next read/write node to attempt an allocation
- * in the same resource group
+ * This function determines whether the evicted inode is eligible to be deleted
+ * and locks the inode glock.
*
- * We have to (at the moment) hold the inodes main lock to cover
- * the gap between unlocking the shared lock on the iopen lock and
- * taking the exclusive lock. I'd rather do a shared -> exclusive
- * conversion on the iopen lock, but we can change that later. This
- * is safe, just less efficient.
+ * Returns: the fate of the dinode
*/
-
-static void gfs2_evict_inode(struct inode *inode)
+static enum dinode_demise evict_should_delete(struct inode *inode,
+ struct gfs2_holder *gh)
{
+ struct gfs2_inode *ip = GFS2_I(inode);
struct super_block *sb = inode->i_sb;
struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- struct address_space *metamapping;
- int error;
-
- if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
- clear_inode(inode);
- return;
- }
-
- if (inode->i_nlink || sb_rdonly(sb))
- goto out;
+ int ret;
if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
- gfs2_holder_mark_uninitialized(&gh);
- goto out_delete;
+ goto should_delete;
}
if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
- goto out;
+ return SHOULD_DEFER_EVICTION;
/* Deletes should never happen under memory pressure anymore. */
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
- goto out;
+ return SHOULD_DEFER_EVICTION;
/* Must not read inode block until block type has been verified */
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
- if (unlikely(error)) {
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
+ if (unlikely(ret)) {
glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
- goto out;
+ return SHOULD_DEFER_EVICTION;
}
if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
- goto out_truncate;
- error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
- if (error)
- goto out_truncate;
+ return SHOULD_NOT_DELETE_DINODE;
+ ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
+ if (ret)
+ return SHOULD_NOT_DELETE_DINODE;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
- error = gfs2_inode_refresh(ip);
- if (error)
- goto out_truncate;
+ ret = gfs2_inode_refresh(ip);
+ if (ret)
+ return SHOULD_NOT_DELETE_DINODE;
}
/*
* The inode may have been recreated in the meantime.
*/
if (inode->i_nlink)
- goto out_truncate;
+ return SHOULD_NOT_DELETE_DINODE;
-out_delete:
+should_delete:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
if (!gfs2_upgrade_iopen_glock(inode)) {
gfs2_holder_uninit(&ip->i_iopen_gh);
- goto out_truncate;
+ return SHOULD_NOT_DELETE_DINODE;
}
}
+ return SHOULD_DELETE_DINODE;
+}
+
+/**
+ * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
+ * @inode: The inode to evict
+ */
+static int evict_unlinked_inode(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ int ret;
if (S_ISDIR(inode->i_mode) &&
(ip->i_diskflags & GFS2_DIF_EXHASH)) {
- error = gfs2_dir_exhash_dealloc(ip);
- if (error)
- goto out_unlock;
+ ret = gfs2_dir_exhash_dealloc(ip);
+ if (ret)
+ goto out;
}
if (ip->i_eattr) {
- error = gfs2_ea_dealloc(ip);
- if (error)
- goto out_unlock;
+ ret = gfs2_ea_dealloc(ip);
+ if (ret)
+ goto out;
}
if (!gfs2_is_stuffed(ip)) {
- error = gfs2_file_dealloc(ip);
- if (error)
- goto out_unlock;
+ ret = gfs2_file_dealloc(ip);
+ if (ret)
+ goto out;
}
/* We're about to clear the bitmap for the dinode, but as soon as we
@@ -1419,11 +1417,24 @@ out_delete:
location and try to set gl_object again. We clear gl_object here so
that subsequent inode creates don't see an old gl_object. */
glock_clear_object(ip->i_gl, ip);
- error = gfs2_dinode_dealloc(ip);
+ ret = gfs2_dinode_dealloc(ip);
gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
- goto out_unlock;
+out:
+ return ret;
+}
+
+/*
+ * evict_linked_inode - evict an inode whose dinode has not been unlinked
+ * @inode: The inode to evict
+ */
+static int evict_linked_inode(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct address_space *metamapping;
+ int ret;
-out_truncate:
gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_EVICT_INODE);
metamapping = gfs2_glock2aspace(ip->i_gl);
@@ -1434,15 +1445,63 @@ out_truncate:
write_inode_now(inode, 1);
gfs2_ail_flush(ip->i_gl, 0);
- error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
- if (error)
- goto out_unlock;
+ ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
+ if (ret)
+ return ret;
+
/* Needs to be done before glock release & also in a transaction */
truncate_inode_pages(&inode->i_data, 0);
truncate_inode_pages(metamapping, 0);
gfs2_trans_end(sdp);
+ return 0;
+}
+
+/**
+ * gfs2_evict_inode - Remove an inode from cache
+ * @inode: The inode to evict
+ *
+ * There are three cases to consider:
+ * 1. i_nlink == 0, we are final opener (and must deallocate)
+ * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
+ * 3. i_nlink > 0
+ *
+ * If the fs is read only, then we have to treat all cases as per #3
+ * since we are unable to do any deallocation. The inode will be
+ * deallocated by the next read/write node to attempt an allocation
+ * in the same resource group
+ *
+ * We have to (at the moment) hold the inodes main lock to cover
+ * the gap between unlocking the shared lock on the iopen lock and
+ * taking the exclusive lock. I'd rather do a shared -> exclusive
+ * conversion on the iopen lock, but we can change that later. This
+ * is safe, just less efficient.
+ */
+
+static void gfs2_evict_inode(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
+ clear_inode(inode);
+ return;
+ }
+
+ if (inode->i_nlink || sb_rdonly(sb))
+ goto out;
+
+ gfs2_holder_mark_uninitialized(&gh);
+ ret = evict_should_delete(inode, &gh);
+ if (ret == SHOULD_DEFER_EVICTION)
+ goto out;
+ if (ret == SHOULD_DELETE_DINODE)
+ ret = evict_unlinked_inode(inode);
+ else
+ ret = evict_linked_inode(inode);
-out_unlock:
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_deltree(&ip->i_res);
@@ -1450,8 +1509,8 @@ out_unlock:
glock_clear_object(ip->i_gl, ip);
gfs2_glock_dq_uninit(&gh);
}
- if (error && error != GLR_TRYFAILED && error != -EROFS)
- fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
+ if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
+ fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
out:
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
@@ -1502,6 +1561,35 @@ static void gfs2_free_inode(struct inode *inode)
kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
}
+extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
+{
+ struct local_statfs_inode *lsi, *safe;
+
+ /* Run through the statfs inodes list to iput and free memory */
+ list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
+ if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
+ sdp->sd_sc_inode = NULL; /* belongs to this node */
+ if (lsi->si_sc_inode)
+ iput(lsi->si_sc_inode);
+ list_del(&lsi->si_list);
+ kfree(lsi);
+ }
+}
+
+extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+ unsigned int index)
+{
+ struct local_statfs_inode *lsi;
+
+ /* Return the local (per node) statfs inode in the
+ * sdp->sd_sc_inodes_list corresponding to the 'index'. */
+ list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
+ if (lsi->si_jid == index)
+ return lsi->si_sc_inode;
+ }
+ return NULL;
+}
+
const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
.free_inode = gfs2_free_inode,
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 51900554ed81..c9fb2a654181 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -37,11 +37,16 @@ extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes);
extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
const void *buf);
+extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
+ void *buf);
extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
struct buffer_head *l_bh);
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
+extern void free_local_statfs_inodes(struct gfs2_sbd *sdp);
+extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+ unsigned int index);
extern void free_sbd(struct gfs2_sbd *sdp);
extern struct file_system_type gfs2_fs_type;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d28c41bd69b0..c3e72dba7418 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -303,7 +303,7 @@ static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- free_sbd(sdp);
+ complete(&sdp->sd_kobj_unregister);
}
static struct kobj_type gfs2_ktype = {
@@ -655,6 +655,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
sprintf(ro, "RDONLY=%d", sb_rdonly(sb));
sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
+ init_completion(&sdp->sd_kobj_unregister);
sdp->sd_kobj.kset = gfs2_kset;
error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
"%s", sdp->sd_table_name);
@@ -685,6 +686,7 @@ fail_tune:
fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
+ wait_for_completion(&sdp->sd_kobj_unregister);
sb->s_fs_info = NULL;
return error;
}
@@ -695,6 +697,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
+ wait_for_completion(&sdp->sd_kobj_unregister);
}
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index e0025258107a..0b2f858d9a8c 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -56,7 +56,6 @@
{(1UL << GLF_REPLY_PENDING), "r" }, \
{(1UL << GLF_INITIAL), "I" }, \
{(1UL << GLF_FROZEN), "F" }, \
- {(1UL << GLF_QUEUED), "q" }, \
{(1UL << GLF_LRU), "L" }, \
{(1UL << GLF_OBJECT), "o" }, \
{(1UL << GLF_BLOCKING), "b" })
@@ -388,15 +387,17 @@ TRACE_EVENT(gfs2_log_blocks,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, blocks )
+ __field( int, blks_free )
),
TP_fast_assign(
__entry->dev = sdp->sd_vfs->s_dev;
__entry->blocks = blocks;
+ __entry->blks_free = atomic_read(&sdp->sd_log_blks_free);
),
- TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
- MINOR(__entry->dev), __entry->blocks)
+ TP_printk("%u,%u log reserve %d %d", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->blocks, __entry->blks_free)
);
/* Writing back the AIL */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 1cd0328cae20..0fba3bf64189 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -419,7 +419,7 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
- gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
+ gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
gfs2_lm(sdp,
"fatal: filesystem consistency error\n"
" RG = %llu\n"
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 6d9157efe16c..d7562981b3a0 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -205,6 +205,16 @@ static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp)
test_bit(SDF_WITHDRAWING, &sdp->sd_flags);
}
+/**
+ * gfs2_withdrawing - check if a withdraw is pending
+ * @sdp: the superblock
+ */
+static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp)
+{
+ return test_bit(SDF_WITHDRAWING, &sdp->sd_flags) &&
+ !test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
+}
+
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index c33324686d89..44d07c9e3a7f 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -104,8 +104,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = buf->f_bfree;
buf->f_files = HFS_SB(sb)->fs_ablocks;
buf->f_ffree = HFS_SB(sb)->free_ablocks;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = HFS_NAMELEN;
return 0;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 129dca3f4b78..807119ae5adf 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -320,8 +320,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = buf->f_bfree;
buf->f_files = 0xFFFFFFFF;
buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = HFSPLUS_MAX_STRLEN;
return 0;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 0a677a9aaf34..a7dbfc892022 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -192,8 +192,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = sbi->sb_n_free;
buf->f_files = sbi->sb_dirband_size / 4;
buf->f_ffree = hpfs_get_free_dnodes(s);
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = 254;
hpfs_unlock(s);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 7cb3b4cb9b11..02894df7656d 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -19,7 +19,9 @@
#include <linux/task_work.h>
#include <linux/blk-cgroup.h>
#include <linux/audit.h>
+#include <linux/cpu.h>
+#include "../kernel/sched/sched.h"
#include "io-wq.h"
#define WORKER_IDLE_TIMEOUT (5 * HZ)
@@ -123,9 +125,13 @@ struct io_wq {
refcount_t refs;
struct completion done;
+ struct hlist_node cpuhp_node;
+
refcount_t use_refs;
};
+static enum cpuhp_state io_wq_online;
+
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
@@ -187,7 +193,8 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
worker->blkcg_css = NULL;
}
#endif
-
+ if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
return dropped_lock;
}
@@ -483,7 +490,10 @@ static void io_impersonate_work(struct io_worker *worker,
if ((work->flags & IO_WQ_WORK_CREDS) &&
worker->cur_creds != work->identity->creds)
io_wq_switch_creds(worker, work);
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
+ if (work->flags & IO_WQ_WORK_FSIZE)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
+ else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
io_wq_switch_blkcg(worker, work);
#ifdef CONFIG_AUDIT
current->loginuid = work->identity->loginuid;
@@ -1087,10 +1097,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM);
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
- if (!wq->wqes) {
- kfree(wq);
- return ERR_PTR(-ENOMEM);
- }
+ if (!wq->wqes)
+ goto err_wq;
+
+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+ if (ret)
+ goto err_wqes;
wq->free_work = data->free_work;
wq->do_work = data->do_work;
@@ -1098,6 +1110,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
/* caller must already hold a reference to this */
wq->user = data->user;
+ ret = -ENOMEM;
for_each_node(node) {
struct io_wqe *wqe;
int alloc_node = node;
@@ -1141,9 +1154,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
ret = PTR_ERR(wq->manager);
complete(&wq->done);
err:
+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node)
kfree(wq->wqes[node]);
+err_wqes:
kfree(wq->wqes);
+err_wq:
kfree(wq);
return ERR_PTR(ret);
}
@@ -1160,6 +1176,8 @@ static void __io_wq_destroy(struct io_wq *wq)
{
int node;
+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+
set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (wq->manager)
kthread_stop(wq->manager);
@@ -1187,3 +1205,41 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
{
return wq->manager;
}
+
+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
+{
+ struct task_struct *task = worker->task;
+ struct rq_flags rf;
+ struct rq *rq;
+
+ rq = task_rq_lock(task, &rf);
+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
+ task->flags |= PF_NO_SETAFFINITY;
+ task_rq_unlock(rq, task, &rf);
+ return false;
+}
+
+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+ int i;
+
+ rcu_read_lock();
+ for_each_node(i)
+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
+ rcu_read_unlock();
+ return 0;
+}
+
+static __init int io_wq_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
+ io_wq_cpu_online, NULL);
+ if (ret < 0)
+ return ret;
+ io_wq_online = ret;
+ return 0;
+}
+subsys_initcall(io_wq_init);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index be21c500c925..cba36f03c355 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -17,6 +17,7 @@ enum {
IO_WQ_WORK_MM = 128,
IO_WQ_WORK_CREDS = 256,
IO_WQ_WORK_BLKCG = 512,
+ IO_WQ_WORK_FSIZE = 1024,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
};
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 02dc81622081..b42dfa0243bf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -277,7 +277,7 @@ struct io_ring_ctx {
unsigned sq_mask;
unsigned sq_thread_idle;
unsigned cached_sq_dropped;
- atomic_t cached_cq_overflow;
+ unsigned cached_cq_overflow;
unsigned long sq_check_overflow;
struct list_head defer_list;
@@ -585,6 +585,7 @@ enum {
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_WORK_INITIALIZED_BIT,
+ REQ_F_LTIMEOUT_ACTIVE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -614,7 +615,7 @@ enum {
REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
/* must not punt to workers */
REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
- /* has linked timeout */
+ /* has or had linked timeout */
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
/* regular file */
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
@@ -628,6 +629,8 @@ enum {
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
/* io_wq_work is initialized */
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
+ /* linked timeout is active, i.e. prepared by link's head */
+ REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
};
struct async_poll {
@@ -750,8 +753,6 @@ struct io_op_def {
unsigned pollout : 1;
/* op supports buffer selection */
unsigned buffer_select : 1;
- /* needs rlimit(RLIMIT_FSIZE) assigned */
- unsigned needs_fsize : 1;
/* must always have async data allocated */
unsigned needs_async_data : 1;
/* size of async data needed, if any */
@@ -775,10 +776,10 @@ static const struct io_op_def io_op_defs[] = {
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
- .needs_fsize = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+ IO_WQ_WORK_FSIZE,
},
[IORING_OP_FSYNC] = {
.needs_file = 1,
@@ -789,16 +790,16 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.pollin = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_BLKCG,
+ .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
},
[IORING_OP_WRITE_FIXED] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
- .needs_fsize = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_BLKCG,
+ .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
+ IO_WQ_WORK_MM,
},
[IORING_OP_POLL_ADD] = {
.needs_file = 1,
@@ -856,8 +857,7 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
- .needs_fsize = 1,
- .work_flags = IO_WQ_WORK_BLKCG,
+ .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
},
[IORING_OP_OPENAT] = {
.work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
@@ -887,9 +887,9 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
- .needs_fsize = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+ IO_WQ_WORK_FSIZE,
},
[IORING_OP_FADVISE] = {
.needs_file = 1,
@@ -1070,6 +1070,12 @@ static void io_init_identity(struct io_identity *id)
refcount_set(&id->count, 1);
}
+static inline void __io_req_init_async(struct io_kiocb *req)
+{
+ memset(&req->work, 0, sizeof(req->work));
+ req->flags |= REQ_F_WORK_INITIALIZED;
+}
+
/*
* Note: must call io_req_init_async() for the first time you
* touch any members of io_wq_work.
@@ -1081,8 +1087,7 @@ static inline void io_req_init_async(struct io_kiocb *req)
if (req->flags & REQ_F_WORK_INITIALIZED)
return;
- memset(&req->work, 0, sizeof(req->work));
- req->flags |= REQ_F_WORK_INITIALIZED;
+ __io_req_init_async(req);
/* Grab a ref if this isn't our static identity */
req->work.identity = tctx->identity;
@@ -1174,7 +1179,7 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
struct io_ring_ctx *ctx = req->ctx;
return seq != ctx->cached_cq_tail
- + atomic_read(&ctx->cached_cq_overflow);
+ + READ_ONCE(ctx->cached_cq_overflow);
}
return false;
@@ -1285,8 +1290,11 @@ static bool io_grab_identity(struct io_kiocb *req)
struct io_identity *id = req->work.identity;
struct io_ring_ctx *ctx = req->ctx;
- if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE))
- return false;
+ if (def->work_flags & IO_WQ_WORK_FSIZE) {
+ if (id->fsize != rlimit(RLIMIT_FSIZE))
+ return false;
+ req->work.flags |= IO_WQ_WORK_FSIZE;
+ }
if (!(req->work.flags & IO_WQ_WORK_FILES) &&
(def->work_flags & IO_WQ_WORK_FILES) &&
@@ -1619,8 +1627,9 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
WRITE_ONCE(cqe->res, req->result);
WRITE_ONCE(cqe->flags, req->compl.cflags);
} else {
+ ctx->cached_cq_overflow++;
WRITE_ONCE(ctx->rings->cq_overflow,
- atomic_inc_return(&ctx->cached_cq_overflow));
+ ctx->cached_cq_overflow);
}
}
@@ -1662,8 +1671,8 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
* then we cannot store the request for later flushing, we need
* to drop it on the floor.
*/
- WRITE_ONCE(ctx->rings->cq_overflow,
- atomic_inc_return(&ctx->cached_cq_overflow));
+ ctx->cached_cq_overflow++;
+ WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
} else {
if (list_empty(&ctx->cq_overflow_list)) {
set_bit(0, &ctx->sq_check_overflow);
@@ -1865,6 +1874,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
if (link->opcode != IORING_OP_LINK_TIMEOUT)
return false;
+ /*
+ * Can happen if a linked timeout fired and link had been like
+ * req -> link t-out -> link t-out [-> ...]
+ */
+ if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE))
+ return false;
list_del_init(&link->link_list);
wake_ev = io_link_cancel_timeout(link);
@@ -1908,10 +1923,12 @@ static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
/*
* Called if REQ_F_LINK_HEAD is set, and we fail the head request
*/
-static void __io_fail_links(struct io_kiocb *req)
+static void io_fail_links(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
+ unsigned long flags;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
while (!list_empty(&req->link_list)) {
struct io_kiocb *link = list_first_entry(&req->link_list,
struct io_kiocb, link_list);
@@ -1933,15 +1950,6 @@ static void __io_fail_links(struct io_kiocb *req)
}
io_commit_cqring(ctx);
-}
-
-static void io_fail_links(struct io_kiocb *req)
-{
- struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->completion_lock, flags);
- __io_fail_links(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
@@ -1976,7 +1984,8 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
{
struct task_struct *tsk = req->task;
struct io_ring_ctx *ctx = req->ctx;
- int ret, notify;
+ enum task_work_notify_mode notify;
+ int ret;
if (tsk->flags & PF_EXITING)
return -ESRCH;
@@ -1987,7 +1996,7 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
* processing task_work. There's no reliable way to tell if TWA_RESUME
* will do the job.
*/
- notify = 0;
+ notify = TWA_NONE;
if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
notify = TWA_SIGNAL;
@@ -2056,7 +2065,7 @@ static void io_req_task_queue(struct io_kiocb *req)
init_task_work(&req->task_work, io_req_task_cancel);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
}
@@ -2177,7 +2186,7 @@ static void io_free_req_deferred(struct io_kiocb *req)
struct task_struct *tsk;
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
}
@@ -3108,9 +3117,10 @@ static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
* For files that don't have ->read_iter() and ->write_iter(), handle them
* by looping over ->read() or ->write() manually.
*/
-static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
- struct iov_iter *iter)
+static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
{
+ struct kiocb *kiocb = &req->rw.kiocb;
+ struct file *file = req->file;
ssize_t ret = 0;
/*
@@ -3130,11 +3140,8 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
if (!iov_iter_is_bvec(iter)) {
iovec = iov_iter_iovec(iter);
} else {
- /* fixed buffers import bvec */
- iovec.iov_base = kmap(iter->bvec->bv_page)
- + iter->iov_offset;
- iovec.iov_len = min(iter->count,
- iter->bvec->bv_len - iter->iov_offset);
+ iovec.iov_base = u64_to_user_ptr(req->rw.addr);
+ iovec.iov_len = req->rw.len;
}
if (rw == READ) {
@@ -3145,9 +3152,6 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
iovec.iov_len, io_kiocb_ppos(kiocb));
}
- if (iov_iter_is_bvec(iter))
- kunmap(iter->bvec->bv_page);
-
if (nr < 0) {
if (!ret)
ret = nr;
@@ -3156,6 +3160,8 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
ret += nr;
if (nr != iovec.iov_len)
break;
+ req->rw.len -= nr;
+ req->rw.addr += nr;
iov_iter_advance(iter, nr);
}
@@ -3291,7 +3297,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
/* queue just for cancelation */
init_task_work(&req->task_work, io_req_task_cancel);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
return 1;
@@ -3345,7 +3351,7 @@ static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
if (req->file->f_op->read_iter)
return call_read_iter(req->file, &req->rw.kiocb, iter);
else if (req->file->f_op->read)
- return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter);
+ return loop_rw_iter(READ, req, iter);
else
return -EINVAL;
}
@@ -3536,7 +3542,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
if (req->file->f_op->write_iter)
ret2 = call_write_iter(req->file, kiocb, iter);
else if (req->file->f_op->write)
- ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter);
+ ret2 = loop_rw_iter(WRITE, req, iter);
else
ret2 = -EINVAL;
@@ -4857,7 +4863,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
WRITE_ONCE(poll->canceled, true);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
return 1;
@@ -4926,32 +4932,25 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
io_commit_cqring(ctx);
}
-static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
+static void io_poll_task_func(struct callback_head *cb)
{
+ struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
struct io_ring_ctx *ctx = req->ctx;
+ struct io_kiocb *nxt;
if (io_poll_rewait(req, &req->poll)) {
spin_unlock_irq(&ctx->completion_lock);
- return;
- }
-
- hash_del(&req->hash_node);
- io_poll_complete(req, req->result, 0);
- spin_unlock_irq(&ctx->completion_lock);
-
- *nxt = io_put_req_find_next(req);
- io_cqring_ev_posted(ctx);
-}
+ } else {
+ hash_del(&req->hash_node);
+ io_poll_complete(req, req->result, 0);
+ spin_unlock_irq(&ctx->completion_lock);
-static void io_poll_task_func(struct callback_head *cb)
-{
- struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
- struct io_ring_ctx *ctx = req->ctx;
- struct io_kiocb *nxt = NULL;
+ nxt = io_put_req_find_next(req);
+ io_cqring_ev_posted(ctx);
+ if (nxt)
+ __io_req_task_submit(nxt);
+ }
- io_poll_task_handler(req, &nxt);
- if (nxt)
- __io_req_task_submit(nxt);
percpu_ref_put(&ctx->refs);
}
@@ -5105,6 +5104,7 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
struct io_ring_ctx *ctx = req->ctx;
bool cancel = false;
+ INIT_HLIST_NODE(&req->hash_node);
io_init_poll_iocb(poll, mask, wake_func);
poll->file = req->file;
poll->wait.private = req;
@@ -5166,7 +5166,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
req->flags |= REQ_F_POLLED;
req->apoll = apoll;
- INIT_HLIST_NODE(&req->hash_node);
mask = 0;
if (def->pollin)
@@ -5348,8 +5347,6 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return -EINVAL;
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
return -EINVAL;
- if (!poll->file)
- return -EBADF;
events = READ_ONCE(sqe->poll32_events);
#ifdef __BIG_ENDIAN
@@ -5367,7 +5364,6 @@ static int io_poll_add(struct io_kiocb *req)
struct io_poll_table ipt;
__poll_t mask;
- INIT_HLIST_NODE(&req->hash_node);
ipt.pt._qproc = io_poll_queue_proc;
mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
@@ -6117,10 +6113,9 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (!list_empty(&req->link_list)) {
prev = list_entry(req->link_list.prev, struct io_kiocb,
link_list);
- if (refcount_inc_not_zero(&prev->refs)) {
+ if (refcount_inc_not_zero(&prev->refs))
list_del_init(&req->link_list);
- prev->flags &= ~REQ_F_LINK_TIMEOUT;
- } else
+ else
prev = NULL;
}
@@ -6177,6 +6172,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT)
return NULL;
+ nxt->flags |= REQ_F_LTIMEOUT_ACTIVE;
req->flags |= REQ_F_LINK_TIMEOUT;
return nxt;
}
@@ -6191,7 +6187,8 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
again:
linked_timeout = io_prep_linked_timeout(req);
- if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.identity->creds &&
+ if ((req->flags & REQ_F_WORK_INITIALIZED) &&
+ (req->work.flags & IO_WQ_WORK_CREDS) &&
req->work.identity->creds != current_cred()) {
if (old_creds)
revert_creds(old_creds);
@@ -6199,7 +6196,6 @@ again:
old_creds = NULL; /* restored original creds */
else
old_creds = override_creds(req->work.identity->creds);
- req->work.flags |= IO_WQ_WORK_CREDS;
}
ret = io_issue_sqe(req, true, cs);
@@ -6240,8 +6236,10 @@ punt:
if (nxt) {
req = nxt;
- if (req->flags & REQ_F_FORCE_ASYNC)
+ if (req->flags & REQ_F_FORCE_ASYNC) {
+ linked_timeout = NULL;
goto punt;
+ }
goto again;
}
exit:
@@ -6504,12 +6502,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (id) {
struct io_identity *iod;
- io_req_init_async(req);
iod = idr_find(&ctx->personality_idr, id);
if (unlikely(!iod))
return -EINVAL;
refcount_inc(&iod->count);
- io_put_identity(current->io_uring, req);
+
+ __io_req_init_async(req);
get_cred(iod->creds);
req->work.identity = iod;
req->work.flags |= IO_WQ_WORK_CREDS;
@@ -8685,19 +8683,11 @@ static void io_uring_del_task_file(struct file *file)
fput(file);
}
-static void __io_uring_attempt_task_drop(struct file *file)
-{
- struct file *old = xa_load(&current->io_uring->xa, (unsigned long)file);
-
- if (old == file)
- io_uring_del_task_file(file);
-}
-
/*
* Drop task note for this file if we're the only ones that hold it after
* pending fput()
*/
-static void io_uring_attempt_task_drop(struct file *file, bool exiting)
+static void io_uring_attempt_task_drop(struct file *file)
{
if (!current->io_uring)
return;
@@ -8705,10 +8695,9 @@ static void io_uring_attempt_task_drop(struct file *file, bool exiting)
* fput() is pending, will be 2 if the only other ref is our potential
* task file note. If the task is exiting, drop regardless of count.
*/
- if (!exiting && atomic_long_read(&file->f_count) != 2)
- return;
-
- __io_uring_attempt_task_drop(file);
+ if (fatal_signal_pending(current) || (current->flags & PF_EXITING) ||
+ atomic_long_read(&file->f_count) == 2)
+ io_uring_del_task_file(file);
}
void __io_uring_files_cancel(struct files_struct *files)
@@ -8766,16 +8755,7 @@ void __io_uring_task_cancel(void)
static int io_uring_flush(struct file *file, void *data)
{
- struct io_ring_ctx *ctx = file->private_data;
-
- /*
- * If the task is going away, cancel work it may have pending
- */
- if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
- data = NULL;
-
- io_uring_cancel_task_requests(ctx, data);
- io_uring_attempt_task_drop(file, !data);
+ io_uring_attempt_task_drop(file);
return 0;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 78f5c96c76f3..ec90773527ee 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1038,8 +1038,7 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = 0;
buf->f_files = ISOFS_SB(sb)->s_ninodes;
buf->f_ffree = 0;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_namelen = NAME_MAX;
return 0;
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 7b09a9158e40..34f546404aa1 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -383,8 +383,7 @@ static int minix_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = sbi->s_ninodes;
buf->f_ffree = minix_count_free_inodes(sb);
buf->f_namelen = sbi->s_namelen;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/namei.c b/fs/namei.c
index f1eb8ccd2be9..d4a6dd772303 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1626,7 +1626,8 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
return ERR_PTR(error);
}
- if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
+ if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) ||
+ unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
return ERR_PTR(-ELOOP);
if (!(nd->flags & LOOKUP_RCU)) {
diff --git a/fs/namespace.c b/fs/namespace.c
index 294e05a13d17..cebaa3e81794 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1191,7 +1191,7 @@ static void mntput_no_expire(struct mount *mnt)
struct task_struct *task = current;
if (likely(!(task->flags & PF_KTHREAD))) {
init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
- if (!task_work_add(task, &mnt->mnt_rcu, true))
+ if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
return;
}
if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
@@ -3171,6 +3171,8 @@ int path_mount(const char *dev_name, struct path *path,
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
+ if (flags & MS_NOSYMFOLLOW)
+ mnt_flags |= MNT_NOSYMFOLLOW;
/* The default atime for remount is preservation */
if ((flags & MS_REMOUNT) &&
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 2eee5fb1a882..4abd928b0bc8 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -651,8 +651,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = nmaxinodes;
buf->f_ffree = nfreeinodes;
buf->f_namelen = NILFS_NAME_LEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 7dc3bc604f78..0d7e948cb29c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2643,8 +2643,7 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
* the least significant 32-bits in f_fsid[0] and the most significant
* 32-bits in f_fsid[1].
*/
- sfs->f_fsid.val[0] = vol->serial_no & 0xffffffff;
- sfs->f_fsid.val[1] = (vol->serial_no >> 32) & 0xffffffff;
+ sfs->f_fsid = u64_to_fsid(vol->serial_no);
/* Maximum length of filenames. */
sfs->f_namelen = NTFS_MAX_NAME_LEN;
return 0;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index b76ec6b88ded..ce93ccca8639 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -282,8 +282,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = sbi->s_num_blocks;
buf->f_files = sbi->s_num_blocks;
buf->f_namelen = OMFS_NAMELEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_bfree = buf->f_bavail = buf->f_ffree =
omfs_count_free(s);
@@ -363,12 +362,11 @@ static int omfs_get_imap(struct super_block *sb)
bh = sb_bread(sb, block++);
if (!bh)
goto nomem_free;
- *ptr = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ *ptr = kmemdup(bh->b_data, sb->s_blocksize, GFP_KERNEL);
if (!*ptr) {
brelse(bh);
goto nomem_free;
}
- memcpy(*ptr, bh->b_data, sb->s_blocksize);
if (count < sb->s_blocksize)
memset((void *)*ptr + count, 0xff,
sb->s_blocksize - count);
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3059a9394c2d..e59d4bb3a89e 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -70,6 +70,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
{ MNT_NOATIME, ",noatime" },
{ MNT_NODIRATIME, ",nodiratime" },
{ MNT_RELATIME, ",relatime" },
+ { MNT_NOSYMFOLLOW, ",nosymfollow" },
{ 0, NULL }
};
const struct proc_fs_opts *fs_infop;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index e8da1cde87b9..3fb7fc819b4f 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -137,8 +137,7 @@ static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bfree = qnx4_count_free_blocks(sb);
buf->f_bavail = buf->f_bfree;
buf->f_namelen = QNX4_NAME_MAX;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 755293c8c71a..61191f7bdf62 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -166,8 +166,7 @@ static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_ffree = fs32_to_cpu(sbi, sbi->sb->sb_free_inodes);
buf->f_bavail = buf->f_bfree;
buf->f_namelen = QNX6_LONG_NAME_MAX;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index a669fb049b84..75f764b43418 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1411,6 +1411,59 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
}
/*
+ * Performs necessary checks before doing a file copy
+ *
+ * Can adjust amount of bytes to copy via @req_count argument.
+ * Returns appropriate error code that caller should return or
+ * zero in case the copy should be allowed.
+ */
+static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t *req_count, unsigned int flags)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ uint64_t count = *req_count;
+ loff_t size_in;
+ int ret;
+
+ ret = generic_file_rw_checks(file_in, file_out);
+ if (ret)
+ return ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ return -ETXTBSY;
+
+ /* Ensure offsets don't wrap. */
+ if (pos_in + count < pos_in || pos_out + count < pos_out)
+ return -EOVERFLOW;
+
+ /* Shorten the copy to EOF */
+ size_in = i_size_read(inode_in);
+ if (pos_in >= size_in)
+ count = 0;
+ else
+ count = min(count, size_in - (uint64_t)pos_in);
+
+ ret = generic_write_check_limits(file_out, pos_out, &count);
+ if (ret)
+ return ret;
+
+ /* Don't allow overlapped copying within the same file. */
+ if (inode_in == inode_out &&
+ pos_out + count > pos_in &&
+ pos_out < pos_in + count)
+ return -EINVAL;
+
+ *req_count = count;
+ return 0;
+}
+
+/*
* copy_file_range() differs from regular file read and write in that it
* specifically allows return partial success. When it does so is up to
* the copy_file_range method.
@@ -1542,475 +1595,92 @@ out2:
return ret;
}
-static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
- bool write)
-{
- struct inode *inode = file_inode(file);
-
- if (unlikely(pos < 0 || len < 0))
- return -EINVAL;
-
- if (unlikely((loff_t) (pos + len) < 0))
- return -EINVAL;
-
- if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
- loff_t end = len ? pos + len - 1 : OFFSET_MAX;
- int retval;
-
- retval = locks_mandatory_area(inode, file, pos, end,
- write ? F_WRLCK : F_RDLCK);
- if (retval < 0)
- return retval;
- }
-
- return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
-}
-/*
- * Ensure that we don't remap a partial EOF block in the middle of something
- * else. Assume that the offsets have already been checked for block
- * alignment.
- *
- * For clone we only link a partial EOF block above or at the destination file's
- * EOF. For deduplication we accept a partial EOF block only if it ends at the
- * destination file's EOF (can not link it into the middle of a file).
- *
- * Shorten the request if possible.
- */
-static int generic_remap_check_len(struct inode *inode_in,
- struct inode *inode_out,
- loff_t pos_out,
- loff_t *len,
- unsigned int remap_flags)
-{
- u64 blkmask = i_blocksize(inode_in) - 1;
- loff_t new_len = *len;
-
- if ((*len & blkmask) == 0)
- return 0;
-
- if (pos_out + *len < i_size_read(inode_out))
- new_len &= ~blkmask;
-
- if (new_len == *len)
- return 0;
-
- if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
- *len = new_len;
- return 0;
- }
-
- return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
-}
-
-/* Read a page's worth of file data into the page cache. */
-static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
-{
- struct page *page;
-
- page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
- if (IS_ERR(page))
- return page;
- if (!PageUptodate(page)) {
- put_page(page);
- return ERR_PTR(-EIO);
- }
- return page;
-}
-
/*
- * Lock two pages, ensuring that we lock in offset order if the pages are from
- * the same file.
+ * Don't operate on ranges the page cache doesn't support, and don't exceed the
+ * LFS limits. If pos is under the limit it becomes a short access. If it
+ * exceeds the limit we return -EFBIG.
*/
-static void vfs_lock_two_pages(struct page *page1, struct page *page2)
-{
- /* Always lock in order of increasing index. */
- if (page1->index > page2->index)
- swap(page1, page2);
-
- lock_page(page1);
- if (page1 != page2)
- lock_page(page2);
-}
-
-/* Unlock two pages, being careful not to unlock the same page twice. */
-static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
+int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
{
- unlock_page(page1);
- if (page1 != page2)
- unlock_page(page2);
-}
-
-/*
- * Compare extents of two files to see if they are the same.
- * Caller must have locked both inodes to prevent write races.
- */
-static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
- struct inode *dest, loff_t destoff,
- loff_t len, bool *is_same)
-{
- loff_t src_poff;
- loff_t dest_poff;
- void *src_addr;
- void *dest_addr;
- struct page *src_page;
- struct page *dest_page;
- loff_t cmp_len;
- bool same;
- int error;
-
- error = -EINVAL;
- same = true;
- while (len) {
- src_poff = srcoff & (PAGE_SIZE - 1);
- dest_poff = destoff & (PAGE_SIZE - 1);
- cmp_len = min(PAGE_SIZE - src_poff,
- PAGE_SIZE - dest_poff);
- cmp_len = min(cmp_len, len);
- if (cmp_len <= 0)
- goto out_error;
-
- src_page = vfs_dedupe_get_page(src, srcoff);
- if (IS_ERR(src_page)) {
- error = PTR_ERR(src_page);
- goto out_error;
- }
- dest_page = vfs_dedupe_get_page(dest, destoff);
- if (IS_ERR(dest_page)) {
- error = PTR_ERR(dest_page);
- put_page(src_page);
- goto out_error;
- }
-
- vfs_lock_two_pages(src_page, dest_page);
+ struct inode *inode = file->f_mapping->host;
+ loff_t max_size = inode->i_sb->s_maxbytes;
+ loff_t limit = rlimit(RLIMIT_FSIZE);
- /*
- * Now that we've locked both pages, make sure they're still
- * mapped to the file data we're interested in. If not,
- * someone is invalidating pages on us and we lose.
- */
- if (!PageUptodate(src_page) || !PageUptodate(dest_page) ||
- src_page->mapping != src->i_mapping ||
- dest_page->mapping != dest->i_mapping) {
- same = false;
- goto unlock;
+ if (limit != RLIM_INFINITY) {
+ if (pos >= limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
}
+ *count = min(*count, limit - pos);
+ }
- src_addr = kmap_atomic(src_page);
- dest_addr = kmap_atomic(dest_page);
-
- flush_dcache_page(src_page);
- flush_dcache_page(dest_page);
-
- if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
- same = false;
+ if (!(file->f_flags & O_LARGEFILE))
+ max_size = MAX_NON_LFS;
- kunmap_atomic(dest_addr);
- kunmap_atomic(src_addr);
-unlock:
- vfs_unlock_two_pages(src_page, dest_page);
- put_page(dest_page);
- put_page(src_page);
+ if (unlikely(pos >= max_size))
+ return -EFBIG;
- if (!same)
- break;
+ *count = min(*count, max_size - pos);
- srcoff += cmp_len;
- destoff += cmp_len;
- len -= cmp_len;
- }
-
- *is_same = same;
return 0;
-
-out_error:
- return error;
}
/*
- * Check that the two inodes are eligible for cloning, the ranges make
- * sense, and then flush all dirty data. Caller must ensure that the
- * inodes have been locked against any other modifications.
+ * Performs necessary checks before doing a write
*
- * If there's an error, then the usual negative error code is returned.
- * Otherwise returns 0 with *len set to the request length.
+ * Can adjust writing position or amount of bytes to write.
+ * Returns appropriate error code that caller should return or
+ * zero in case that write should be allowed.
*/
-int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t *len, unsigned int remap_flags)
+ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
- bool same_inode = (inode_in == inode_out);
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ loff_t count;
int ret;
- /* Don't touch certain kinds of inodes */
- if (IS_IMMUTABLE(inode_out))
- return -EPERM;
-
- if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ if (IS_SWAPFILE(inode))
return -ETXTBSY;
- /* Don't reflink dirs, pipes, sockets... */
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
- /* Zero length dedupe exits immediately; reflink goes to EOF. */
- if (*len == 0) {
- loff_t isize = i_size_read(inode_in);
-
- if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
- return 0;
- if (pos_in > isize)
- return -EINVAL;
- *len = isize - pos_in;
- if (*len == 0)
- return 0;
- }
-
- /* Check that we don't violate system file offset limits. */
- ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
- remap_flags);
- if (ret)
- return ret;
-
- /* Wait for the completion of any pending IOs on both files */
- inode_dio_wait(inode_in);
- if (!same_inode)
- inode_dio_wait(inode_out);
-
- ret = filemap_write_and_wait_range(inode_in->i_mapping,
- pos_in, pos_in + *len - 1);
- if (ret)
- return ret;
-
- ret = filemap_write_and_wait_range(inode_out->i_mapping,
- pos_out, pos_out + *len - 1);
- if (ret)
- return ret;
-
- /*
- * Check that the extents are the same.
- */
- if (remap_flags & REMAP_FILE_DEDUP) {
- bool is_same = false;
-
- ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
- inode_out, pos_out, *len, &is_same);
- if (ret)
- return ret;
- if (!is_same)
- return -EBADE;
- }
-
- ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
- remap_flags);
- if (ret)
- return ret;
-
- /* If can't alter the file contents, we're done. */
- if (!(remap_flags & REMAP_FILE_DEDUP))
- ret = file_modified(file_out);
-
- return ret;
-}
-EXPORT_SYMBOL(generic_remap_file_range_prep);
-
-loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
-{
- loff_t ret;
-
- WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
-
- /*
- * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
- * the same mount. Practically, they only need to be on the same file
- * system.
- */
- if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
- return -EXDEV;
-
- ret = generic_file_rw_checks(file_in, file_out);
- if (ret < 0)
- return ret;
-
- if (!file_in->f_op->remap_file_range)
- return -EOPNOTSUPP;
-
- ret = remap_verify_area(file_in, pos_in, len, false);
- if (ret)
- return ret;
-
- ret = remap_verify_area(file_out, pos_out, len, true);
- if (ret)
- return ret;
-
- ret = file_in->f_op->remap_file_range(file_in, pos_in,
- file_out, pos_out, len, remap_flags);
- if (ret < 0)
- return ret;
-
- fsnotify_access(file_in);
- fsnotify_modify(file_out);
- return ret;
-}
-EXPORT_SYMBOL(do_clone_file_range);
-
-loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
-{
- loff_t ret;
-
- file_start_write(file_out);
- ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
- remap_flags);
- file_end_write(file_out);
-
- return ret;
-}
-EXPORT_SYMBOL(vfs_clone_file_range);
-
-/* Check whether we are allowed to dedupe the destination file */
-static bool allow_file_dedupe(struct file *file)
-{
- if (capable(CAP_SYS_ADMIN))
- return true;
- if (file->f_mode & FMODE_WRITE)
- return true;
- if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
- return true;
- if (!inode_permission(file_inode(file), MAY_WRITE))
- return true;
- return false;
-}
+ if (!iov_iter_count(from))
+ return 0;
-loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
- struct file *dst_file, loff_t dst_pos,
- loff_t len, unsigned int remap_flags)
-{
- loff_t ret;
+ /* FIXME: this is for backwards compatibility with 2.4 */
+ if (iocb->ki_flags & IOCB_APPEND)
+ iocb->ki_pos = i_size_read(inode);
- WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
- REMAP_FILE_CAN_SHORTEN));
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+ return -EINVAL;
- ret = mnt_want_write_file(dst_file);
+ count = iov_iter_count(from);
+ ret = generic_write_check_limits(file, iocb->ki_pos, &count);
if (ret)
return ret;
- ret = remap_verify_area(dst_file, dst_pos, len, true);
- if (ret < 0)
- goto out_drop_write;
-
- ret = -EPERM;
- if (!allow_file_dedupe(dst_file))
- goto out_drop_write;
-
- ret = -EXDEV;
- if (src_file->f_path.mnt != dst_file->f_path.mnt)
- goto out_drop_write;
-
- ret = -EISDIR;
- if (S_ISDIR(file_inode(dst_file)->i_mode))
- goto out_drop_write;
-
- ret = -EINVAL;
- if (!dst_file->f_op->remap_file_range)
- goto out_drop_write;
-
- if (len == 0) {
- ret = 0;
- goto out_drop_write;
- }
-
- ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
- dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
-out_drop_write:
- mnt_drop_write_file(dst_file);
-
- return ret;
+ iov_iter_truncate(from, count);
+ return iov_iter_count(from);
}
-EXPORT_SYMBOL(vfs_dedupe_file_range_one);
+EXPORT_SYMBOL(generic_write_checks);
-int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
+/*
+ * Performs common checks before doing a file copy/clone
+ * from @file_in to @file_out.
+ */
+int generic_file_rw_checks(struct file *file_in, struct file *file_out)
{
- struct file_dedupe_range_info *info;
- struct inode *src = file_inode(file);
- u64 off;
- u64 len;
- int i;
- int ret;
- u16 count = same->dest_count;
- loff_t deduped;
-
- if (!(file->f_mode & FMODE_READ))
- return -EINVAL;
-
- if (same->reserved1 || same->reserved2)
- return -EINVAL;
-
- off = same->src_offset;
- len = same->src_length;
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
- if (S_ISDIR(src->i_mode))
+ /* Don't copy dirs, pipes, sockets... */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
-
- if (!S_ISREG(src->i_mode))
- return -EINVAL;
-
- if (!file->f_op->remap_file_range)
- return -EOPNOTSUPP;
-
- ret = remap_verify_area(file, off, len, false);
- if (ret < 0)
- return ret;
- ret = 0;
-
- if (off + len > i_size_read(src))
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;
- /* Arbitrary 1G limit on a single dedupe request, can be raised. */
- len = min_t(u64, len, 1 << 30);
-
- /* pre-format output fields to sane values */
- for (i = 0; i < count; i++) {
- same->info[i].bytes_deduped = 0ULL;
- same->info[i].status = FILE_DEDUPE_RANGE_SAME;
- }
-
- for (i = 0, info = same->info; i < count; i++, info++) {
- struct fd dst_fd = fdget(info->dest_fd);
- struct file *dst_file = dst_fd.file;
-
- if (!dst_file) {
- info->status = -EBADF;
- goto next_loop;
- }
-
- if (info->reserved) {
- info->status = -EINVAL;
- goto next_fdput;
- }
-
- deduped = vfs_dedupe_file_range_one(file, off, dst_file,
- info->dest_offset, len,
- REMAP_FILE_CAN_SHORTEN);
- if (deduped == -EBADE)
- info->status = FILE_DEDUPE_RANGE_DIFFERS;
- else if (deduped < 0)
- info->status = deduped;
- else
- info->bytes_deduped = len;
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND))
+ return -EBADF;
-next_fdput:
- fdput(dst_fd);
-next_loop:
- if (fatal_signal_pending(current))
- break;
- }
- return ret;
+ return 0;
}
-EXPORT_SYMBOL(vfs_dedupe_file_range);
diff --git a/fs/remap_range.c b/fs/remap_range.c
new file mode 100644
index 000000000000..e6099beefa97
--- /dev/null
+++ b/fs/remap_range.c
@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/sched/xacct.h>
+#include <linux/fcntl.h>
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/fsnotify.h>
+#include <linux/security.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+#include <linux/pagemap.h>
+#include <linux/splice.h>
+#include <linux/compat.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
+#include "internal.h"
+
+#include <linux/uaccess.h>
+#include <asm/unistd.h>
+
+/*
+ * Performs necessary checks before doing a clone.
+ *
+ * Can adjust amount of bytes to clone via @req_count argument.
+ * Returns appropriate error code that caller should return or
+ * zero in case the clone should be allowed.
+ */
+static int generic_remap_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *req_count, unsigned int remap_flags)
+{
+ struct inode *inode_in = file_in->f_mapping->host;
+ struct inode *inode_out = file_out->f_mapping->host;
+ uint64_t count = *req_count;
+ uint64_t bcount;
+ loff_t size_in, size_out;
+ loff_t bs = inode_out->i_sb->s_blocksize;
+ int ret;
+
+ /* The start of both ranges must be aligned to an fs block. */
+ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
+ return -EINVAL;
+
+ /* Ensure offsets don't wrap. */
+ if (pos_in + count < pos_in || pos_out + count < pos_out)
+ return -EINVAL;
+
+ size_in = i_size_read(inode_in);
+ size_out = i_size_read(inode_out);
+
+ /* Dedupe requires both ranges to be within EOF. */
+ if ((remap_flags & REMAP_FILE_DEDUP) &&
+ (pos_in >= size_in || pos_in + count > size_in ||
+ pos_out >= size_out || pos_out + count > size_out))
+ return -EINVAL;
+
+ /* Ensure the infile range is within the infile. */
+ if (pos_in >= size_in)
+ return -EINVAL;
+ count = min(count, size_in - (uint64_t)pos_in);
+
+ ret = generic_write_check_limits(file_out, pos_out, &count);
+ if (ret)
+ return ret;
+
+ /*
+ * If the user wanted us to link to the infile's EOF, round up to the
+ * next block boundary for this check.
+ *
+ * Otherwise, make sure the count is also block-aligned, having
+ * already confirmed the starting offsets' block alignment.
+ */
+ if (pos_in + count == size_in) {
+ bcount = ALIGN(size_in, bs) - pos_in;
+ } else {
+ if (!IS_ALIGNED(count, bs))
+ count = ALIGN_DOWN(count, bs);
+ bcount = count;
+ }
+
+ /* Don't allow overlapped cloning within the same file. */
+ if (inode_in == inode_out &&
+ pos_out + bcount > pos_in &&
+ pos_out < pos_in + bcount)
+ return -EINVAL;
+
+ /*
+ * We shortened the request but the caller can't deal with that, so
+ * bounce the request back to userspace.
+ */
+ if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
+ return -EINVAL;
+
+ *req_count = count;
+ return 0;
+}
+
+static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
+ bool write)
+{
+ struct inode *inode = file_inode(file);
+
+ if (unlikely(pos < 0 || len < 0))
+ return -EINVAL;
+
+ if (unlikely((loff_t) (pos + len) < 0))
+ return -EINVAL;
+
+ if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
+ loff_t end = len ? pos + len - 1 : OFFSET_MAX;
+ int retval;
+
+ retval = locks_mandatory_area(inode, file, pos, end,
+ write ? F_WRLCK : F_RDLCK);
+ if (retval < 0)
+ return retval;
+ }
+
+ return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
+}
+
+/*
+ * Ensure that we don't remap a partial EOF block in the middle of something
+ * else. Assume that the offsets have already been checked for block
+ * alignment.
+ *
+ * For clone we only link a partial EOF block above or at the destination file's
+ * EOF. For deduplication we accept a partial EOF block only if it ends at the
+ * destination file's EOF (can not link it into the middle of a file).
+ *
+ * Shorten the request if possible.
+ */
+static int generic_remap_check_len(struct inode *inode_in,
+ struct inode *inode_out,
+ loff_t pos_out,
+ loff_t *len,
+ unsigned int remap_flags)
+{
+ u64 blkmask = i_blocksize(inode_in) - 1;
+ loff_t new_len = *len;
+
+ if ((*len & blkmask) == 0)
+ return 0;
+
+ if (pos_out + *len < i_size_read(inode_out))
+ new_len &= ~blkmask;
+
+ if (new_len == *len)
+ return 0;
+
+ if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+ *len = new_len;
+ return 0;
+ }
+
+ return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
+}
+
+/* Read a page's worth of file data into the page cache. */
+static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+{
+ struct page *page;
+
+ page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
+ if (IS_ERR(page))
+ return page;
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ return page;
+}
+
+/*
+ * Lock two pages, ensuring that we lock in offset order if the pages are from
+ * the same file.
+ */
+static void vfs_lock_two_pages(struct page *page1, struct page *page2)
+{
+ /* Always lock in order of increasing index. */
+ if (page1->index > page2->index)
+ swap(page1, page2);
+
+ lock_page(page1);
+ if (page1 != page2)
+ lock_page(page2);
+}
+
+/* Unlock two pages, being careful not to unlock the same page twice. */
+static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
+{
+ unlock_page(page1);
+ if (page1 != page2)
+ unlock_page(page2);
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same)
+{
+ loff_t src_poff;
+ loff_t dest_poff;
+ void *src_addr;
+ void *dest_addr;
+ struct page *src_page;
+ struct page *dest_page;
+ loff_t cmp_len;
+ bool same;
+ int error;
+
+ error = -EINVAL;
+ same = true;
+ while (len) {
+ src_poff = srcoff & (PAGE_SIZE - 1);
+ dest_poff = destoff & (PAGE_SIZE - 1);
+ cmp_len = min(PAGE_SIZE - src_poff,
+ PAGE_SIZE - dest_poff);
+ cmp_len = min(cmp_len, len);
+ if (cmp_len <= 0)
+ goto out_error;
+
+ src_page = vfs_dedupe_get_page(src, srcoff);
+ if (IS_ERR(src_page)) {
+ error = PTR_ERR(src_page);
+ goto out_error;
+ }
+ dest_page = vfs_dedupe_get_page(dest, destoff);
+ if (IS_ERR(dest_page)) {
+ error = PTR_ERR(dest_page);
+ put_page(src_page);
+ goto out_error;
+ }
+
+ vfs_lock_two_pages(src_page, dest_page);
+
+ /*
+ * Now that we've locked both pages, make sure they're still
+ * mapped to the file data we're interested in. If not,
+ * someone is invalidating pages on us and we lose.
+ */
+ if (!PageUptodate(src_page) || !PageUptodate(dest_page) ||
+ src_page->mapping != src->i_mapping ||
+ dest_page->mapping != dest->i_mapping) {
+ same = false;
+ goto unlock;
+ }
+
+ src_addr = kmap_atomic(src_page);
+ dest_addr = kmap_atomic(dest_page);
+
+ flush_dcache_page(src_page);
+ flush_dcache_page(dest_page);
+
+ if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+ same = false;
+
+ kunmap_atomic(dest_addr);
+ kunmap_atomic(src_addr);
+unlock:
+ vfs_unlock_two_pages(src_page, dest_page);
+ put_page(dest_page);
+ put_page(src_page);
+
+ if (!same)
+ break;
+
+ srcoff += cmp_len;
+ destoff += cmp_len;
+ len -= cmp_len;
+ }
+
+ *is_same = same;
+ return 0;
+
+out_error:
+ return error;
+}
+
+/*
+ * Check that the two inodes are eligible for cloning, the ranges make
+ * sense, and then flush all dirty data. Caller must ensure that the
+ * inodes have been locked against any other modifications.
+ *
+ * If there's an error, then the usual negative error code is returned.
+ * Otherwise returns 0 with *len set to the request length.
+ */
+int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *len, unsigned int remap_flags)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ bool same_inode = (inode_in == inode_out);
+ int ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ return -ETXTBSY;
+
+ /* Don't reflink dirs, pipes, sockets... */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ /* Zero length dedupe exits immediately; reflink goes to EOF. */
+ if (*len == 0) {
+ loff_t isize = i_size_read(inode_in);
+
+ if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
+ return 0;
+ if (pos_in > isize)
+ return -EINVAL;
+ *len = isize - pos_in;
+ if (*len == 0)
+ return 0;
+ }
+
+ /* Check that we don't violate system file offset limits. */
+ ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+ remap_flags);
+ if (ret)
+ return ret;
+
+ /* Wait for the completion of any pending IOs on both files */
+ inode_dio_wait(inode_in);
+ if (!same_inode)
+ inode_dio_wait(inode_out);
+
+ ret = filemap_write_and_wait_range(inode_in->i_mapping,
+ pos_in, pos_in + *len - 1);
+ if (ret)
+ return ret;
+
+ ret = filemap_write_and_wait_range(inode_out->i_mapping,
+ pos_out, pos_out + *len - 1);
+ if (ret)
+ return ret;
+
+ /*
+ * Check that the extents are the same.
+ */
+ if (remap_flags & REMAP_FILE_DEDUP) {
+ bool is_same = false;
+
+ ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
+ inode_out, pos_out, *len, &is_same);
+ if (ret)
+ return ret;
+ if (!is_same)
+ return -EBADE;
+ }
+
+ ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
+ remap_flags);
+ if (ret)
+ return ret;
+
+ /* If can't alter the file contents, we're done. */
+ if (!(remap_flags & REMAP_FILE_DEDUP))
+ ret = file_modified(file_out);
+
+ return ret;
+}
+EXPORT_SYMBOL(generic_remap_file_range_prep);
+
+loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
+{
+ loff_t ret;
+
+ WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
+
+ /*
+ * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
+ * the same mount. Practically, they only need to be on the same file
+ * system.
+ */
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ return -EXDEV;
+
+ ret = generic_file_rw_checks(file_in, file_out);
+ if (ret < 0)
+ return ret;
+
+ if (!file_in->f_op->remap_file_range)
+ return -EOPNOTSUPP;
+
+ ret = remap_verify_area(file_in, pos_in, len, false);
+ if (ret)
+ return ret;
+
+ ret = remap_verify_area(file_out, pos_out, len, true);
+ if (ret)
+ return ret;
+
+ ret = file_in->f_op->remap_file_range(file_in, pos_in,
+ file_out, pos_out, len, remap_flags);
+ if (ret < 0)
+ return ret;
+
+ fsnotify_access(file_in);
+ fsnotify_modify(file_out);
+ return ret;
+}
+EXPORT_SYMBOL(do_clone_file_range);
+
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
+{
+ loff_t ret;
+
+ file_start_write(file_out);
+ ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
+ remap_flags);
+ file_end_write(file_out);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_clone_file_range);
+
+/* Check whether we are allowed to dedupe the destination file */
+static bool allow_file_dedupe(struct file *file)
+{
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+ if (file->f_mode & FMODE_WRITE)
+ return true;
+ if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
+ return true;
+ if (!inode_permission(file_inode(file), MAY_WRITE))
+ return true;
+ return false;
+}
+
+loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+ struct file *dst_file, loff_t dst_pos,
+ loff_t len, unsigned int remap_flags)
+{
+ loff_t ret;
+
+ WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+ REMAP_FILE_CAN_SHORTEN));
+
+ ret = mnt_want_write_file(dst_file);
+ if (ret)
+ return ret;
+
+ ret = remap_verify_area(dst_file, dst_pos, len, true);
+ if (ret < 0)
+ goto out_drop_write;
+
+ ret = -EPERM;
+ if (!allow_file_dedupe(dst_file))
+ goto out_drop_write;
+
+ ret = -EXDEV;
+ if (src_file->f_path.mnt != dst_file->f_path.mnt)
+ goto out_drop_write;
+
+ ret = -EISDIR;
+ if (S_ISDIR(file_inode(dst_file)->i_mode))
+ goto out_drop_write;
+
+ ret = -EINVAL;
+ if (!dst_file->f_op->remap_file_range)
+ goto out_drop_write;
+
+ if (len == 0) {
+ ret = 0;
+ goto out_drop_write;
+ }
+
+ ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
+ dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
+out_drop_write:
+ mnt_drop_write_file(dst_file);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range_one);
+
+int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
+{
+ struct file_dedupe_range_info *info;
+ struct inode *src = file_inode(file);
+ u64 off;
+ u64 len;
+ int i;
+ int ret;
+ u16 count = same->dest_count;
+ loff_t deduped;
+
+ if (!(file->f_mode & FMODE_READ))
+ return -EINVAL;
+
+ if (same->reserved1 || same->reserved2)
+ return -EINVAL;
+
+ off = same->src_offset;
+ len = same->src_length;
+
+ if (S_ISDIR(src->i_mode))
+ return -EISDIR;
+
+ if (!S_ISREG(src->i_mode))
+ return -EINVAL;
+
+ if (!file->f_op->remap_file_range)
+ return -EOPNOTSUPP;
+
+ ret = remap_verify_area(file, off, len, false);
+ if (ret < 0)
+ return ret;
+ ret = 0;
+
+ if (off + len > i_size_read(src))
+ return -EINVAL;
+
+ /* Arbitrary 1G limit on a single dedupe request, can be raised. */
+ len = min_t(u64, len, 1 << 30);
+
+ /* pre-format output fields to sane values */
+ for (i = 0; i < count; i++) {
+ same->info[i].bytes_deduped = 0ULL;
+ same->info[i].status = FILE_DEDUPE_RANGE_SAME;
+ }
+
+ for (i = 0, info = same->info; i < count; i++, info++) {
+ struct fd dst_fd = fdget(info->dest_fd);
+ struct file *dst_file = dst_fd.file;
+
+ if (!dst_file) {
+ info->status = -EBADF;
+ goto next_loop;
+ }
+
+ if (info->reserved) {
+ info->status = -EINVAL;
+ goto next_fdput;
+ }
+
+ deduped = vfs_dedupe_file_range_one(file, off, dst_file,
+ info->dest_offset, len,
+ REMAP_FILE_CAN_SHORTEN);
+ if (deduped == -EBADE)
+ info->status = FILE_DEDUPE_RANGE_DIFFERS;
+ else if (deduped < 0)
+ info->status = deduped;
+ else
+ info->bytes_deduped = len;
+
+next_fdput:
+ fdput(dst_fd);
+next_loop:
+ if (fatal_signal_pending(current))
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index b1b7d3f5752f..259f684d9236 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -416,8 +416,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bfree = buf->f_bavail = buf->f_ffree;
buf->f_blocks =
(romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/splice.c b/fs/splice.c
index 599b740f1098..866d5c2367b2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1005,9 +1005,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
/*
* Determine where to splice to/from.
*/
-long do_splice(struct file *in, loff_t __user *off_in,
- struct file *out, loff_t __user *off_out,
- size_t len, unsigned int flags)
+long do_splice(struct file *in, loff_t *off_in, struct file *out,
+ loff_t *off_out, size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
@@ -1041,8 +1040,7 @@ long do_splice(struct file *in, loff_t __user *off_in,
if (off_out) {
if (!(out->f_mode & FMODE_PWRITE))
return -EINVAL;
- if (copy_from_user(&offset, off_out, sizeof(loff_t)))
- return -EFAULT;
+ offset = *off_out;
} else {
offset = out->f_pos;
}
@@ -1063,8 +1061,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
if (!off_out)
out->f_pos = offset;
- else if (copy_to_user(off_out, &offset, sizeof(loff_t)))
- ret = -EFAULT;
+ else
+ *off_out = offset;
return ret;
}
@@ -1075,8 +1073,7 @@ long do_splice(struct file *in, loff_t __user *off_in,
if (off_in) {
if (!(in->f_mode & FMODE_PREAD))
return -EINVAL;
- if (copy_from_user(&offset, off_in, sizeof(loff_t)))
- return -EFAULT;
+ offset = *off_in;
} else {
offset = in->f_pos;
}
@@ -1100,8 +1097,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
wakeup_pipe_readers(opipe);
if (!off_in)
in->f_pos = offset;
- else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
- ret = -EFAULT;
+ else
+ *off_in = offset;
return ret;
}
@@ -1109,6 +1106,46 @@ long do_splice(struct file *in, loff_t __user *off_in,
return -EINVAL;
}
+static long __do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags)
+{
+ struct pipe_inode_info *ipipe;
+ struct pipe_inode_info *opipe;
+ loff_t offset, *__off_in = NULL, *__off_out = NULL;
+ long ret;
+
+ ipipe = get_pipe_info(in, true);
+ opipe = get_pipe_info(out, true);
+
+ if (ipipe && off_in)
+ return -ESPIPE;
+ if (opipe && off_out)
+ return -ESPIPE;
+
+ if (off_out) {
+ if (copy_from_user(&offset, off_out, sizeof(loff_t)))
+ return -EFAULT;
+ __off_out = &offset;
+ }
+ if (off_in) {
+ if (copy_from_user(&offset, off_in, sizeof(loff_t)))
+ return -EFAULT;
+ __off_in = &offset;
+ }
+
+ ret = do_splice(in, __off_in, out, __off_out, len, flags);
+ if (ret < 0)
+ return ret;
+
+ if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
+ return -EFAULT;
+ if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
+ return -EFAULT;
+
+ return ret;
+}
+
static int iter_to_pipe(struct iov_iter *from,
struct pipe_inode_info *pipe,
unsigned flags)
@@ -1303,8 +1340,8 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
if (in.file) {
out = fdget(fd_out);
if (out.file) {
- error = do_splice(in.file, off_in, out.file, off_out,
- len, flags);
+ error = __do_splice(in.file, off_in, out.file, off_out,
+ len, flags);
fdput(out);
}
fdput(in);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 0cc4ceec0562..d6c6593ec169 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -380,8 +380,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = msblk->inodes;
buf->f_ffree = 0;
buf->f_namelen = SQUASHFS_NAME_LEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/stat.c b/fs/stat.c
index 44f8ad346db4..dacecdda2e79 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL(generic_fillattr);
* @path: file to get attributes from
* @stat: structure to return attributes in
* @request_mask: STATX_xxx flags indicating what the caller wants
- * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ * @query_flags: Query mode (AT_STATX_SYNC_TYPE)
*
* Get attributes without calling security_inode_getattr.
*
@@ -71,7 +71,7 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
memset(stat, 0, sizeof(*stat));
stat->result_mask |= STATX_BASIC_STATS;
- query_flags &= KSTAT_QUERY_FLAGS;
+ query_flags &= AT_STATX_SYNC_TYPE;
/* allow the fs to override these if it really wants to */
/* SB_NOATIME means filesystem supplies dummy atime value */
@@ -97,7 +97,7 @@ EXPORT_SYMBOL(vfs_getattr_nosec);
* @path: The file of interest
* @stat: Where to return the statistics
* @request_mask: STATX_xxx flags indicating what the caller wants
- * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ * @query_flags: Query mode (AT_STATX_SYNC_TYPE)
*
* Ask the filesystem for a file's attributes. The caller must indicate in
* request_mask and query_flags to indicate what they want.
@@ -126,53 +126,27 @@ int vfs_getattr(const struct path *path, struct kstat *stat,
EXPORT_SYMBOL(vfs_getattr);
/**
- * vfs_statx_fd - Get the enhanced basic attributes by file descriptor
+ * vfs_fstat - Get the basic attributes by file descriptor
* @fd: The file descriptor referring to the file of interest
* @stat: The result structure to fill in.
- * @request_mask: STATX_xxx flags indicating what the caller wants
- * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
*
* This function is a wrapper around vfs_getattr(). The main difference is
* that it uses a file descriptor to determine the file location.
*
* 0 will be returned on success, and a -ve error code if unsuccessful.
*/
-int vfs_statx_fd(unsigned int fd, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int vfs_fstat(int fd, struct kstat *stat)
{
struct fd f;
- int error = -EBADF;
-
- if (query_flags & ~KSTAT_QUERY_FLAGS)
- return -EINVAL;
+ int error;
f = fdget_raw(fd);
- if (f.file) {
- error = vfs_getattr(&f.file->f_path, stat,
- request_mask, query_flags);
- fdput(f);
- }
+ if (!f.file)
+ return -EBADF;
+ error = vfs_getattr(&f.file->f_path, stat, STATX_BASIC_STATS, 0);
+ fdput(f);
return error;
}
-EXPORT_SYMBOL(vfs_statx_fd);
-
-static inline unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags,
- int flags)
-{
- if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
- AT_EMPTY_PATH | KSTAT_QUERY_FLAGS)) != 0)
- return -EINVAL;
-
- *lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
- if (flags & AT_SYMLINK_NOFOLLOW)
- *lookup_flags &= ~LOOKUP_FOLLOW;
- if (flags & AT_NO_AUTOMOUNT)
- *lookup_flags &= ~LOOKUP_AUTOMOUNT;
- if (flags & AT_EMPTY_PATH)
- *lookup_flags |= LOOKUP_EMPTY;
-
- return 0;
-}
/**
* vfs_statx - Get basic and extra attributes by filename
@@ -189,15 +163,24 @@ static inline unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags,
*
* 0 will be returned on success, and a -ve error code if unsuccessful.
*/
-int vfs_statx(int dfd, const char __user *filename, int flags,
+static int vfs_statx(int dfd, const char __user *filename, int flags,
struct kstat *stat, u32 request_mask)
{
struct path path;
- int error = -EINVAL;
- unsigned lookup_flags;
+ unsigned lookup_flags = 0;
+ int error;
- if (vfs_stat_set_lookup_flags(&lookup_flags, flags))
+ if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH |
+ AT_STATX_SYNC_TYPE))
return -EINVAL;
+
+ if (!(flags & AT_SYMLINK_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
+ if (!(flags & AT_NO_AUTOMOUNT))
+ lookup_flags |= LOOKUP_AUTOMOUNT;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
@@ -217,8 +200,13 @@ retry:
out:
return error;
}
-EXPORT_SYMBOL(vfs_statx);
+int vfs_fstatat(int dfd, const char __user *filename,
+ struct kstat *stat, int flags)
+{
+ return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
+ stat, STATX_BASIC_STATS);
+}
#ifdef __ARCH_WANT_OLD_STAT
diff --git a/fs/statfs.c b/fs/statfs.c
index 2616424012ea..59f33752c131 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -29,6 +29,8 @@ static int flags_by_mnt(int mnt_flags)
flags |= ST_NODIRATIME;
if (mnt_flags & MNT_RELATIME)
flags |= ST_RELATIME;
+ if (mnt_flags & MNT_NOSYMFOLLOW)
+ flags |= ST_NOSYMFOLLOW;
return flags;
}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 02b1d9d0c182..be47263b8605 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -98,8 +98,7 @@ static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = sbi->s_ninodes;
buf->f_ffree = sysv_count_free_inodes(sb);
buf->f_namelen = SYSV_NAMELEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index faf2017ada11..5bef3a68395d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -2414,8 +2414,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
+ buf->f_bfree;
buf->f_ffree = buf->f_bfree;
buf->f_namelen = UDF_NAME_LEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
return 0;
}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index e3b69fb280e8..983558b572c7 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1431,8 +1431,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
? (buf->f_bfree - uspi->s_root_blocks) : 0;
buf->f_files = uspi->s_ncg * uspi->s_ipg;
buf->f_namelen = UFS_MAXNAMLEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
mutex_unlock(&UFS_SB(sb)->s_lock);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f2a8a0e75e1f..7371a7f7c652 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -947,11 +947,11 @@ xfs_free_file_space(
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
/* We can only free complete realtime extents. */
- if (XFS_IS_REALTIME_INODE(ip)) {
- xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
-
- if ((startoffset_fsb | endoffset_fsb) & (extsz - 1))
- return -EINVAL;
+ if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
+ startoffset_fsb = roundup_64(startoffset_fsb,
+ mp->m_sb.sb_rextsize);
+ endoffset_fsb = rounddown_64(endoffset_fsb,
+ mp->m_sb.sb_rextsize);
}
/*
@@ -1147,14 +1147,6 @@ xfs_insert_file_space(
trace_xfs_insert_file_space(ip);
- /* We can only insert complete realtime extents. */
- if (XFS_IS_REALTIME_INODE(ip)) {
- xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
-
- if ((stop_fsb | shift_fsb) & (extsz - 1))
- return -EINVAL;
- }
-
error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
if (error)
return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3d1b95124744..5b0f93f73837 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -32,6 +32,39 @@
static const struct vm_operations_struct xfs_file_vm_ops;
+/*
+ * Decide if the given file range is aligned to the size of the fundamental
+ * allocation unit for the file.
+ */
+static bool
+xfs_is_falloc_aligned(
+ struct xfs_inode *ip,
+ loff_t pos,
+ long long int len)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ uint64_t mask;
+
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ if (!is_power_of_2(mp->m_sb.sb_rextsize)) {
+ u64 rextbytes;
+ u32 mod;
+
+ rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
+ div_u64_rem(pos, rextbytes, &mod);
+ if (mod)
+ return false;
+ div_u64_rem(len, rextbytes, &mod);
+ return mod == 0;
+ }
+ mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;
+ } else {
+ mask = mp->m_sb.sb_blocksize - 1;
+ }
+
+ return !((pos | len) & mask);
+}
+
int
xfs_update_prealloc_flags(
struct xfs_inode *ip,
@@ -850,9 +883,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- unsigned int blksize_mask = i_blocksize(inode) - 1;
-
- if (offset & blksize_mask || len & blksize_mask) {
+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
@@ -872,10 +903,9 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned int blksize_mask = i_blocksize(inode) - 1;
loff_t isize = i_size_read(inode);
- if (offset & blksize_mask || len & blksize_mask) {
+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ad1009778d33..5b7a1e201559 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -175,6 +175,12 @@ static inline xfs_dev_t linux_to_xfs_dev_t(dev_t dev)
#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
#define xfs_stack_trace() dump_stack()
+static inline uint64_t rounddown_64(uint64_t x, uint32_t y)
+{
+ do_div(x, y);
+ return x * y;
+}
+
static inline uint64_t roundup_64(uint64_t x, uint32_t y)
{
x += y - 1;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a8289adc1b29..87886b7f77da 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3446,6 +3446,14 @@ xlog_recover_finish(
int error;
error = xlog_recover_process_intents(log);
if (error) {
+ /*
+ * Cancel all the unprocessed intent items now so that
+ * we don't leave them pinned in the AIL. This can
+ * cause the AIL to livelock on the pinned item if
+ * anyone tries to push the AIL (inode reclaim does
+ * this) before we get around to xfs_log_mount_cancel.
+ */
+ xlog_recover_cancel_intents(log);
xfs_alert(log->l_mp, "Failed to recover intents");
return error;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d1b5f2d2a245..e3e229e52512 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -794,8 +794,7 @@ xfs_fs_statfs(
statp->f_namelen = MAXNAMELEN - 1;
id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
- statp->f_fsid.val[0] = (u32)id;
- statp->f_fsid.val[1] = (u32)(id >> 32);
+ statp->f_fsid = u64_to_fsid(id);
icount = percpu_counter_sum(&mp->m_icount);
ifree = percpu_counter_sum(&mp->m_ifree);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 64cc2a9c38c8..ff5930be096c 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1116,8 +1116,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^
le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64));
- buf->f_fsid.val[0] = (u32)fsid;
- buf->f_fsid.val[1] = (u32)(fsid >> 32);
+ buf->f_fsid = u64_to_fsid(fsid);
return 0;
}