diff options
Diffstat (limited to 'fs')
173 files changed, 4482 insertions, 3466 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index a020a8f00a1a..057487efaaeb 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -13,7 +13,8 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/cred.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> +#include <linux/fs_context.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> @@ -33,6 +34,10 @@ struct kmem_cache *v9fs_inode_cache; */ enum { + /* Mount-point source, we need to handle this explicitly because + * the code below accepts unknown args and the vfs layer only handles + * source if we rejected it as EINVAL */ + Opt_source, /* Options that take integer arguments */ Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, /* String options */ @@ -43,27 +48,71 @@ enum { Opt_access, Opt_posixacl, /* Lock timeout option */ Opt_locktimeout, - /* Error token */ - Opt_err + + /* Client options */ + Opt_msize, Opt_trans, Opt_legacy, Opt_version, + + /* fd transport options */ + /* Options that take integer arguments */ + Opt_rfdno, Opt_wfdno, + /* Options that take no arguments */ + + /* rdma transport options */ + /* Options that take integer arguments */ + Opt_rq_depth, Opt_sq_depth, Opt_timeout, + + /* Options for both fd and rdma transports */ + Opt_port, Opt_privport, +}; + +static const struct constant_table p9_versions[] = { + { "9p2000", p9_proto_legacy }, + { "9p2000.u", p9_proto_2000u }, + { "9p2000.L", p9_proto_2000L }, + {} }; -static const match_table_t tokens = { - {Opt_debug, "debug=%x"}, - {Opt_dfltuid, "dfltuid=%u"}, - {Opt_dfltgid, "dfltgid=%u"}, - {Opt_afid, "afid=%u"}, - {Opt_uname, "uname=%s"}, - {Opt_remotename, "aname=%s"}, - {Opt_nodevmap, "nodevmap"}, - {Opt_noxattr, "noxattr"}, - {Opt_directio, "directio"}, - {Opt_ignoreqv, "ignoreqv"}, - {Opt_cache, "cache=%s"}, - {Opt_cachetag, "cachetag=%s"}, - {Opt_access, "access=%s"}, - {Opt_posixacl, "posixacl"}, - {Opt_locktimeout, "locktimeout=%u"}, - {Opt_err, NULL} +/* + * This structure contains all parameters used for the core code, + * the client, and all the transports. + */ +const struct fs_parameter_spec v9fs_param_spec[] = { + fsparam_string ("source", Opt_source), + fsparam_u32hex ("debug", Opt_debug), + fsparam_uid ("dfltuid", Opt_dfltuid), + fsparam_gid ("dfltgid", Opt_dfltgid), + fsparam_u32 ("afid", Opt_afid), + fsparam_string ("uname", Opt_uname), + fsparam_string ("aname", Opt_remotename), + fsparam_flag ("nodevmap", Opt_nodevmap), + fsparam_flag ("noxattr", Opt_noxattr), + fsparam_flag ("directio", Opt_directio), + fsparam_flag ("ignoreqv", Opt_ignoreqv), + fsparam_string ("cache", Opt_cache), + fsparam_string ("cachetag", Opt_cachetag), + fsparam_string ("access", Opt_access), + fsparam_flag ("posixacl", Opt_posixacl), + fsparam_u32 ("locktimeout", Opt_locktimeout), + + /* client options */ + fsparam_u32 ("msize", Opt_msize), + fsparam_flag ("noextend", Opt_legacy), + fsparam_string ("trans", Opt_trans), + fsparam_enum ("version", Opt_version, p9_versions), + + /* fd transport options */ + fsparam_u32 ("rfdno", Opt_rfdno), + fsparam_u32 ("wfdno", Opt_wfdno), + + /* rdma transport options */ + fsparam_u32 ("sq", Opt_sq_depth), + fsparam_u32 ("rq", Opt_rq_depth), + fsparam_u32 ("timeout", Opt_timeout), + + /* fd and rdma transprt options */ + fsparam_u32 ("port", Opt_port), + fsparam_flag ("privport", Opt_privport), + {} }; /* Interpret mount options for cache mode */ @@ -101,7 +150,7 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root) struct v9fs_session_info *v9ses = root->d_sb->s_fs_info; if (v9ses->debug) - seq_printf(m, ",debug=%x", v9ses->debug); + seq_printf(m, ",debug=%#x", v9ses->debug); if (!uid_eq(v9ses->dfltuid, V9FS_DEFUID)) seq_printf(m, ",dfltuid=%u", from_kuid_munged(&init_user_ns, v9ses->dfltuid)); @@ -117,7 +166,7 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root) if (v9ses->nodev) seq_puts(m, ",nodevmap"); if (v9ses->cache) - seq_printf(m, ",cache=%x", v9ses->cache); + seq_printf(m, ",cache=%#x", v9ses->cache); #ifdef CONFIG_9P_FSCACHE if (v9ses->cachetag && (v9ses->cache & CACHE_FSCACHE)) seq_printf(m, ",cachetag=%s", v9ses->cachetag); @@ -153,267 +202,254 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root) } /** - * v9fs_parse_options - parse mount options into session structure - * @v9ses: existing v9fs session information - * @opts: The mount option string + * v9fs_parse_param - parse a mount option into the filesystem context + * @fc: the filesystem context + * @param: the parameter to parse * * Return 0 upon success, -ERRNO upon failure. */ - -static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) +int v9fs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *options, *tmp_options; - substring_t args[MAX_OPT_ARGS]; - char *p; - int option = 0; + struct v9fs_context *ctx = fc->fs_private; + struct fs_parse_result result; char *s; - int ret = 0; - - /* setup defaults */ - v9ses->afid = ~0; - v9ses->debug = 0; - v9ses->cache = CACHE_NONE; -#ifdef CONFIG_9P_FSCACHE - v9ses->cachetag = NULL; -#endif - v9ses->session_lock_timeout = P9_LOCK_TIMEOUT; - - if (!opts) - return 0; + int r; + int opt; + struct p9_client_opts *clnt = &ctx->client_opts; + struct p9_fd_opts *fd_opts = &ctx->fd_opts; + struct p9_rdma_opts *rdma_opts = &ctx->rdma_opts; + struct p9_session_opts *session_opts = &ctx->session_opts; + + opt = fs_parse(fc, v9fs_param_spec, param, &result); + if (opt < 0) { + /* + * We might like to report bad mount options here, but + * traditionally 9p has ignored unknown mount options + */ + if (opt == -ENOPARAM) + return 0; - tmp_options = kstrdup(opts, GFP_KERNEL); - if (!tmp_options) { - ret = -ENOMEM; - goto fail_option_alloc; + return opt; } - options = tmp_options; - - while ((p = strsep(&options, ",")) != NULL) { - int token, r; - - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_debug: - r = match_int(&args[0], &option); - if (r < 0) { - p9_debug(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; - } else { - v9ses->debug = option; + + switch (opt) { + case Opt_source: + if (fc->source) { + pr_info("p9: multiple sources not supported\n"); + return -EINVAL; + } + fc->source = param->string; + param->string = NULL; + break; + case Opt_debug: + session_opts->debug = result.uint_32; #ifdef CONFIG_NET_9P_DEBUG - p9_debug_level = option; + p9_debug_level = result.uint_32; #endif - } - break; - - case Opt_dfltuid: - r = match_int(&args[0], &option); - if (r < 0) { - p9_debug(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; - continue; - } - v9ses->dfltuid = make_kuid(current_user_ns(), option); - if (!uid_valid(v9ses->dfltuid)) { - p9_debug(P9_DEBUG_ERROR, - "uid field, but not a uid?\n"); - ret = -EINVAL; - } - break; - case Opt_dfltgid: - r = match_int(&args[0], &option); - if (r < 0) { - p9_debug(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; - continue; - } - v9ses->dfltgid = make_kgid(current_user_ns(), option); - if (!gid_valid(v9ses->dfltgid)) { - p9_debug(P9_DEBUG_ERROR, - "gid field, but not a gid?\n"); - ret = -EINVAL; - } - break; - case Opt_afid: - r = match_int(&args[0], &option); - if (r < 0) { - p9_debug(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; - } else { - v9ses->afid = option; - } - break; - case Opt_uname: - kfree(v9ses->uname); - v9ses->uname = match_strdup(&args[0]); - if (!v9ses->uname) { - ret = -ENOMEM; - goto free_and_return; - } - break; - case Opt_remotename: - kfree(v9ses->aname); - v9ses->aname = match_strdup(&args[0]); - if (!v9ses->aname) { - ret = -ENOMEM; - goto free_and_return; - } - break; - case Opt_nodevmap: - v9ses->nodev = 1; - break; - case Opt_noxattr: - v9ses->flags |= V9FS_NO_XATTR; - break; - case Opt_directio: - v9ses->flags |= V9FS_DIRECT_IO; - break; - case Opt_ignoreqv: - v9ses->flags |= V9FS_IGNORE_QV; - break; - case Opt_cachetag: + break; + + case Opt_dfltuid: + session_opts->dfltuid = result.uid; + break; + case Opt_dfltgid: + session_opts->dfltgid = result.gid; + break; + case Opt_afid: + session_opts->afid = result.uint_32; + break; + case Opt_uname: + kfree(session_opts->uname); + session_opts->uname = param->string; + param->string = NULL; + break; + case Opt_remotename: + kfree(session_opts->aname); + session_opts->aname = param->string; + param->string = NULL; + break; + case Opt_nodevmap: + session_opts->nodev = 1; + break; + case Opt_noxattr: + session_opts->flags |= V9FS_NO_XATTR; + break; + case Opt_directio: + session_opts->flags |= V9FS_DIRECT_IO; + break; + case Opt_ignoreqv: + session_opts->flags |= V9FS_IGNORE_QV; + break; + case Opt_cachetag: #ifdef CONFIG_9P_FSCACHE - kfree(v9ses->cachetag); - v9ses->cachetag = match_strdup(&args[0]); - if (!v9ses->cachetag) { - ret = -ENOMEM; - goto free_and_return; - } + kfree(session_opts->cachetag); + session_opts->cachetag = param->string; + param->string = NULL; #endif - break; - case Opt_cache: - s = match_strdup(&args[0]); - if (!s) { - ret = -ENOMEM; - p9_debug(P9_DEBUG_ERROR, - "problem allocating copy of cache arg\n"); - goto free_and_return; - } - r = get_cache_mode(s); - if (r < 0) - ret = r; - else - v9ses->cache = r; - - kfree(s); - break; - - case Opt_access: - s = match_strdup(&args[0]); - if (!s) { - ret = -ENOMEM; - p9_debug(P9_DEBUG_ERROR, - "problem allocating copy of access arg\n"); - goto free_and_return; + break; + case Opt_cache: + r = get_cache_mode(param->string); + if (r < 0) + return r; + session_opts->cache = r; + break; + case Opt_access: + s = param->string; + session_opts->flags &= ~V9FS_ACCESS_MASK; + if (strcmp(s, "user") == 0) { + session_opts->flags |= V9FS_ACCESS_USER; + } else if (strcmp(s, "any") == 0) { + session_opts->flags |= V9FS_ACCESS_ANY; + } else if (strcmp(s, "client") == 0) { + session_opts->flags |= V9FS_ACCESS_CLIENT; + } else { + uid_t uid; + + session_opts->flags |= V9FS_ACCESS_SINGLE; + r = kstrtouint(s, 10, &uid); + if (r) { + pr_info("Unknown access argument %s: %d\n", + param->string, r); + return r; } - - v9ses->flags &= ~V9FS_ACCESS_MASK; - if (strcmp(s, "user") == 0) - v9ses->flags |= V9FS_ACCESS_USER; - else if (strcmp(s, "any") == 0) - v9ses->flags |= V9FS_ACCESS_ANY; - else if (strcmp(s, "client") == 0) { - v9ses->flags |= V9FS_ACCESS_CLIENT; - } else { - uid_t uid; - - v9ses->flags |= V9FS_ACCESS_SINGLE; - r = kstrtouint(s, 10, &uid); - if (r) { - ret = r; - pr_info("Unknown access argument %s: %d\n", - s, r); - kfree(s); - continue; - } - v9ses->uid = make_kuid(current_user_ns(), uid); - if (!uid_valid(v9ses->uid)) { - ret = -EINVAL; - pr_info("Unknown uid %s\n", s); - } + session_opts->uid = make_kuid(current_user_ns(), uid); + if (!uid_valid(session_opts->uid)) { + pr_info("Unknown uid %s\n", s); + return -EINVAL; } + } + break; - kfree(s); - break; - - case Opt_posixacl: + case Opt_posixacl: #ifdef CONFIG_9P_FS_POSIX_ACL - v9ses->flags |= V9FS_POSIX_ACL; + session_opts->flags |= V9FS_POSIX_ACL; #else - p9_debug(P9_DEBUG_ERROR, - "Not defined CONFIG_9P_FS_POSIX_ACL. Ignoring posixacl option\n"); + p9_debug(P9_DEBUG_ERROR, + "Not defined CONFIG_9P_FS_POSIX_ACL. Ignoring posixacl option\n"); #endif - break; - - case Opt_locktimeout: - r = match_int(&args[0], &option); - if (r < 0) { - p9_debug(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; - continue; - } - if (option < 1) { - p9_debug(P9_DEBUG_ERROR, - "locktimeout must be a greater than zero integer.\n"); - ret = -EINVAL; - continue; - } - v9ses->session_lock_timeout = (long)option * HZ; - break; + break; - default: - continue; + case Opt_locktimeout: + if (result.uint_32 < 1) { + p9_debug(P9_DEBUG_ERROR, + "locktimeout must be a greater than zero integer.\n"); + return -EINVAL; } + session_opts->session_lock_timeout = (long)result.uint_32 * HZ; + break; + + /* Options for client */ + case Opt_msize: + if (result.uint_32 < 4096) { + p9_debug(P9_DEBUG_ERROR, "msize should be at least 4k\n"); + return -EINVAL; + } + if (result.uint_32 > INT_MAX) { + p9_debug(P9_DEBUG_ERROR, "msize too big\n"); + return -EINVAL; + } + clnt->msize = result.uint_32; + break; + case Opt_trans: + v9fs_put_trans(clnt->trans_mod); + clnt->trans_mod = v9fs_get_trans_by_name(param->string); + if (!clnt->trans_mod) { + pr_info("Could not find request transport: %s\n", + param->string); + return -EINVAL; + } + break; + case Opt_legacy: + clnt->proto_version = p9_proto_legacy; + break; + case Opt_version: + clnt->proto_version = result.uint_32; + p9_debug(P9_DEBUG_9P, "Protocol version: %s\n", param->string); + break; + /* Options for fd transport */ + case Opt_rfdno: + fd_opts->rfd = result.uint_32; + break; + case Opt_wfdno: + fd_opts->wfd = result.uint_32; + break; + /* Options for rdma transport */ + case Opt_sq_depth: + rdma_opts->sq_depth = result.uint_32; + break; + case Opt_rq_depth: + rdma_opts->rq_depth = result.uint_32; + break; + case Opt_timeout: + rdma_opts->timeout = result.uint_32; + break; + /* Options for both fd and rdma transports */ + case Opt_port: + fd_opts->port = result.uint_32; + rdma_opts->port = result.uint_32; + break; + case Opt_privport: + fd_opts->privport = true; + rdma_opts->port = true; + break; } -free_and_return: - kfree(tmp_options); -fail_option_alloc: - return ret; + return 0; +} + +static void v9fs_apply_options(struct v9fs_session_info *v9ses, + struct fs_context *fc) +{ + struct v9fs_context *ctx = fc->fs_private; + + v9ses->debug = ctx->session_opts.debug; + v9ses->dfltuid = ctx->session_opts.dfltuid; + v9ses->dfltgid = ctx->session_opts.dfltgid; + v9ses->afid = ctx->session_opts.afid; + v9ses->uname = ctx->session_opts.uname; + ctx->session_opts.uname = NULL; + v9ses->aname = ctx->session_opts.aname; + ctx->session_opts.aname = NULL; + v9ses->nodev = ctx->session_opts.nodev; + /* + * Note that we must |= flags here as session_init already + * set basic flags. This adds in flags from parsed options. + */ + v9ses->flags |= ctx->session_opts.flags; +#ifdef CONFIG_9P_FSCACHE + v9ses->cachetag = ctx->session_opts.cachetag; + ctx->session_opts.cachetag = NULL; +#endif + v9ses->cache = ctx->session_opts.cache; + v9ses->uid = ctx->session_opts.uid; + v9ses->session_lock_timeout = ctx->session_opts.session_lock_timeout; } /** * v9fs_session_init - initialize session * @v9ses: session information structure - * @dev_name: device being mounted - * @data: options + * @fc: the filesystem mount context * */ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, - const char *dev_name, char *data) + struct fs_context *fc) { struct p9_fid *fid; int rc = -ENOMEM; - v9ses->uname = kstrdup(V9FS_DEFUSER, GFP_KERNEL); - if (!v9ses->uname) - goto err_names; - - v9ses->aname = kstrdup(V9FS_DEFANAME, GFP_KERNEL); - if (!v9ses->aname) - goto err_names; init_rwsem(&v9ses->rename_sem); - v9ses->uid = INVALID_UID; - v9ses->dfltuid = V9FS_DEFUID; - v9ses->dfltgid = V9FS_DEFGID; - - v9ses->clnt = p9_client_create(dev_name, data); + v9ses->clnt = p9_client_create(fc); if (IS_ERR(v9ses->clnt)) { rc = PTR_ERR(v9ses->clnt); p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n"); goto err_names; } + /* + * Initialize flags on the real v9ses. v9fs_apply_options below + * will |= the additional flags from parsed options. + */ v9ses->flags = V9FS_ACCESS_USER; if (p9_is_proto_dotl(v9ses->clnt)) { @@ -423,9 +459,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->flags |= V9FS_PROTO_2000U; } - rc = v9fs_parse_options(v9ses, data); - if (rc < 0) - goto err_clnt; + v9fs_apply_options(v9ses, fc); v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; @@ -471,7 +505,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, #ifdef CONFIG_9P_FSCACHE /* register the session for caching */ if (v9ses->cache & CACHE_FSCACHE) { - rc = v9fs_cache_session_get_cookie(v9ses, dev_name); + rc = v9fs_cache_session_get_cookie(v9ses, fc->source); if (rc < 0) goto err_clnt; } diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index f28bc763847a..6a12445d3858 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -10,6 +10,9 @@ #include <linux/backing-dev.h> #include <linux/netfs.h> +#include <linux/fs_parser.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> /** * enum p9_session_flags - option flags for each 9P session @@ -163,11 +166,13 @@ static inline struct fscache_volume *v9fs_session_cache(struct v9fs_session_info #endif } +extern const struct fs_parameter_spec v9fs_param_spec[]; +extern int v9fs_parse_param(struct fs_context *fc, struct fs_parameter *param); extern int v9fs_show_options(struct seq_file *m, struct dentry *root); struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, - const char *dev_name, char *data); + struct fs_context *fc); extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index c1acbc98465d..c5bf74d547e8 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -109,7 +109,6 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) p9_debug(P9_DEBUG_VFS, "refresh inode: dentry = %pd (%p), got error %pe\n", dentry, dentry, ERR_PTR(retval)); - if (retval < 0) return retval; } } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 612a230bc012..6f3880208587 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -43,14 +43,18 @@ int v9fs_file_open(struct inode *inode, struct file *file) struct v9fs_session_info *v9ses; struct p9_fid *fid; int omode; + int o_append; p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); v9ses = v9fs_inode2v9ses(inode); - if (v9fs_proto_dotl(v9ses)) + if (v9fs_proto_dotl(v9ses)) { omode = v9fs_open_to_dotl_flags(file->f_flags); - else + o_append = P9_DOTL_APPEND; + } else { omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses)); + o_append = P9_OAPPEND; + } fid = file->private_data; if (!fid) { fid = v9fs_fid_clone(file_dentry(file)); @@ -58,9 +62,10 @@ int v9fs_file_open(struct inode *inode, struct file *file) return PTR_ERR(fid); if ((v9ses->cache & CACHE_WRITEBACK) && (omode & P9_OWRITE)) { - int writeback_omode = (omode & ~P9_OWRITE) | P9_ORDWR; + int writeback_omode = (omode & ~(P9_OWRITE | o_append)) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n"); + err = p9_client_open(fid, writeback_omode); if (err < 0) { p9_debug(P9_DEBUG_CACHE, "could not open O_RDWR, disabling caches\n"); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8666c9c62258..97abe65bf7c1 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -786,7 +786,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses)); if ((v9ses->cache & CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) { - p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR; + p9_omode = (p9_omode & ~(P9_OWRITE | P9_OAPPEND)) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, creating w/ O_RDWR\n"); } @@ -1393,4 +1393,3 @@ static const struct inode_operations v9fs_symlink_inode_operations = { .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; - diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 1661a25f2772..643e759eacb2 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -282,7 +282,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } if ((v9ses->cache & CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) { - p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR; + p9_omode = (p9_omode & ~(P9_OWRITE | P9_DOTL_APPEND)) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, creating w/ O_RDWR\n"); } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 1581ebac5bb4..315336de6f02 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -19,6 +19,7 @@ #include <linux/statfs.h> #include <linux/magic.h> #include <linux/fscache.h> +#include <linux/fs_context.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -30,32 +31,10 @@ static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl; -/** - * v9fs_set_super - set the superblock - * @s: super block - * @data: file system specific data - * - */ - -static int v9fs_set_super(struct super_block *s, void *data) -{ - s->s_fs_info = data; - return set_anon_super(s, data); -} - -/** - * v9fs_fill_super - populate superblock with info - * @sb: superblock - * @v9ses: session information - * @flags: flags propagated from v9fs_mount() - * - */ - -static int -v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, - int flags) +static int v9fs_fill_super(struct super_block *sb) { int ret; + struct v9fs_session_info *v9ses = v9ses = sb->s_fs_info; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize_bits = fls(v9ses->maxdata - 1); @@ -95,16 +74,12 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, } /** - * v9fs_mount - mount a superblock - * @fs_type: file system type - * @flags: mount flags - * @dev_name: device name that was mounted - * @data: mount options + * v9fs_get_tree - create the mountable root and superblock + * @fc: the filesystem context * */ -static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int v9fs_get_tree(struct fs_context *fc) { struct super_block *sb = NULL; struct inode *inode = NULL; @@ -117,20 +92,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - fid = v9fs_session_init(v9ses, dev_name, data); + fid = v9fs_session_init(v9ses, fc); if (IS_ERR(fid)) { retval = PTR_ERR(fid); goto free_session; } - sb = sget(fs_type, NULL, v9fs_set_super, flags, v9ses); + fc->s_fs_info = v9ses; + sb = sget_fc(fc, NULL, set_anon_super_fc); if (IS_ERR(sb)) { retval = PTR_ERR(sb); goto clunk_fid; } - retval = v9fs_fill_super(sb, v9ses, flags); + retval = v9fs_fill_super(sb); if (retval) goto release_sb; @@ -159,14 +135,15 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, v9fs_fid_add(root, &fid); p9_debug(P9_DEBUG_VFS, " simple set mount, return 0\n"); - return dget(sb->s_root); + fc->root = dget(sb->s_root); + return 0; clunk_fid: p9_fid_put(fid); v9fs_session_close(v9ses); free_session: kfree(v9ses); - return ERR_PTR(retval); + return retval; release_sb: /* @@ -177,7 +154,7 @@ release_sb: */ p9_fid_put(fid); deactivate_locked_super(sb); - return ERR_PTR(retval); + return retval; } /** @@ -303,11 +280,86 @@ static const struct super_operations v9fs_super_ops_dotl = { .write_inode = v9fs_write_inode_dotl, }; +static void v9fs_free_fc(struct fs_context *fc) +{ + struct v9fs_context *ctx = fc->fs_private; + + if (!ctx) + return; + + /* These should be NULL by now but guard against leaks */ + kfree(ctx->session_opts.uname); + kfree(ctx->session_opts.aname); +#ifdef CONFIG_9P_FSCACHE + kfree(ctx->session_opts.cachetag); +#endif + if (ctx->client_opts.trans_mod) + v9fs_put_trans(ctx->client_opts.trans_mod); + kfree(ctx); +} + +static const struct fs_context_operations v9fs_context_ops = { + .parse_param = v9fs_parse_param, + .get_tree = v9fs_get_tree, + .free = v9fs_free_fc, +}; + +static int v9fs_init_fs_context(struct fs_context *fc) +{ + struct v9fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + /* initialize core options */ + ctx->session_opts.afid = ~0; + ctx->session_opts.cache = CACHE_NONE; + ctx->session_opts.session_lock_timeout = P9_LOCK_TIMEOUT; + ctx->session_opts.uname = kstrdup(V9FS_DEFUSER, GFP_KERNEL); + if (!ctx->session_opts.uname) + goto error; + + ctx->session_opts.aname = kstrdup(V9FS_DEFANAME, GFP_KERNEL); + if (!ctx->session_opts.aname) + goto error; + + ctx->session_opts.uid = INVALID_UID; + ctx->session_opts.dfltuid = V9FS_DEFUID; + ctx->session_opts.dfltgid = V9FS_DEFGID; + + /* initialize client options */ + ctx->client_opts.proto_version = p9_proto_2000L; + ctx->client_opts.msize = DEFAULT_MSIZE; + + /* initialize fd transport options */ + ctx->fd_opts.port = P9_FD_PORT; + ctx->fd_opts.rfd = ~0; + ctx->fd_opts.wfd = ~0; + ctx->fd_opts.privport = false; + + /* initialize rdma transport options */ + ctx->rdma_opts.port = P9_RDMA_PORT; + ctx->rdma_opts.sq_depth = P9_RDMA_SQ_DEPTH; + ctx->rdma_opts.rq_depth = P9_RDMA_RQ_DEPTH; + ctx->rdma_opts.timeout = P9_RDMA_TIMEOUT; + ctx->rdma_opts.privport = false; + + fc->ops = &v9fs_context_ops; + fc->fs_private = ctx; + + return 0; +error: + fc->need_free = 1; + return -ENOMEM; +} + struct file_system_type v9fs_fs_type = { .name = "9p", - .mount = v9fs_mount, .kill_sb = v9fs_kill_super, .owner = THIS_MODULE, .fs_flags = FS_RENAME_DOES_D_MOVE, + .init_fs_context = v9fs_init_fs_context, + .parameters = v9fs_param_spec, }; MODULE_ALIAS_FS("9p"); diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index a58f9248b0f5..6743b3b64217 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -432,16 +432,6 @@ static int autofs_dev_ioctl_timeout(struct file *fp, if (!autofs_type_indirect(sbi->type)) return -EINVAL; - /* An expire timeout greater than the superblock timeout - * could be a problem at shutdown but the super block - * timeout itself can change so all we can really do is - * warn the user. - */ - if (timeout >= sbi->exp_timeout) - pr_warn("per-mount expire timeout is greater than " - "the parent autofs mount timeout which could " - "prevent shutdown\n"); - dentry = try_lookup_noperm(&QSTR_LEN(param->path, path_len), base); if (IS_ERR_OR_NULL(dentry)) @@ -470,6 +460,18 @@ static int autofs_dev_ioctl_timeout(struct file *fp, ino->flags |= AUTOFS_INF_EXPIRE_SET; ino->exp_timeout = timeout * HZ; } + + /* An expire timeout greater than the superblock timeout + * could be a problem at shutdown but the super block + * timeout itself can change so all we can really do is + * warn the user. + */ + if (ino->flags & AUTOFS_INF_EXPIRE_SET && + ino->exp_timeout > sbi->exp_timeout) + pr_warn("per-mount expire timeout is greater than " + "the parent autofs mount timeout which could " + "prevent shutdown\n"); + dput(dentry); } diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 732aee76a24c..b932b1719dfc 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -55,7 +55,7 @@ void autofs_kill_sb(struct super_block *sb) } pr_debug("shutting down\n"); - kill_litter_super(sb); + kill_anon_super(sb); if (sbi) kfree_rcu(sbi, rcu); } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index d10df9d89d1c..2c31002b314a 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -602,9 +602,8 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap, } inode->i_private = cp; inode->i_size = size; - d_add(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; @@ -631,12 +630,11 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap, static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); - struct autofs_info *ino = autofs_dentry_ino(dentry); struct autofs_info *p_ino; p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; - dput(ino->dentry); + d_make_discardable(dentry); d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); @@ -718,7 +716,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; - dput(ino->dentry); + d_make_discardable(dentry); d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); @@ -748,12 +746,11 @@ static struct dentry *autofs_dir_mkdir(struct mnt_idmap *idmap, inode = autofs_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return ERR_PTR(-ENOMEM); - d_add(dentry, inode); if (sbi->version < 5) autofs_set_leaf_automount_flags(dentry); - dget(dentry); + d_make_persistent(dentry, inode); p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; inc_nlink(dir); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index d7aec5b87c2b..8cb1a94339b8 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -765,14 +765,41 @@ static const struct file_operations bm_entry_operations = { /* /register */ +/* add to filesystem */ +static int add_entry(Node *e, struct super_block *sb) +{ + struct dentry *dentry = simple_start_creating(sb->s_root, e->name); + struct inode *inode; + struct binfmt_misc *misc; + + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + inode = bm_get_inode(sb, S_IFREG | 0644); + if (unlikely(!inode)) { + simple_done_creating(dentry); + return -ENOMEM; + } + + refcount_set(&e->users, 1); + e->dentry = dentry; + inode->i_private = e; + inode->i_fop = &bm_entry_operations; + + d_make_persistent(dentry, inode); + misc = i_binfmt_misc(inode); + write_lock(&misc->entries_lock); + list_add(&e->list, &misc->entries); + write_unlock(&misc->entries_lock); + simple_done_creating(dentry); + return 0; +} + static ssize_t bm_register_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { Node *e; - struct inode *inode; struct super_block *sb = file_inode(file)->i_sb; - struct dentry *root = sb->s_root, *dentry; - struct binfmt_misc *misc; int err = 0; struct file *f = NULL; @@ -800,39 +827,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, e->interp_file = f; } - inode_lock(d_inode(root)); - dentry = lookup_noperm(&QSTR(e->name), root); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out; - - err = -EEXIST; - if (d_really_is_positive(dentry)) - goto out2; - - inode = bm_get_inode(sb, S_IFREG | 0644); - - err = -ENOMEM; - if (!inode) - goto out2; - - refcount_set(&e->users, 1); - e->dentry = dget(dentry); - inode->i_private = e; - inode->i_fop = &bm_entry_operations; - - d_instantiate(dentry, inode); - misc = i_binfmt_misc(inode); - write_lock(&misc->entries_lock); - list_add(&e->list, &misc->entries); - write_unlock(&misc->entries_lock); - - err = 0; -out2: - dput(dentry); -out: - inode_unlock(d_inode(root)); - + err = add_entry(e, sb); if (err) { if (f) { exe_file_allow_write_access(f); @@ -1027,7 +1022,7 @@ static struct file_system_type bm_fs_type = { .name = "binfmt_misc", .init_fs_context = bm_init_fs_context, .fs_flags = FS_USERNS_MOUNT, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("binfmt_misc"); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7dda6cc68379..6b3357287b42 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -475,8 +475,8 @@ static noinline int add_ra_bio_pages(struct inode *inode, continue; } - folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, - ~__GFP_FS), 0); + folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, ~__GFP_FS), + 0, NULL); if (!folio) break; diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index 06dfcb461f53..a2ac3fb68bc8 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -736,7 +736,7 @@ again: } folio = filemap_alloc_folio(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS), - 0); + 0, NULL); if (!folio) return ERR_PTR(-ENOMEM); diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index 928746b92512..0ea4db650f85 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -15,59 +15,6 @@ #include "mds_client.h" #include "crypto.h" -/* - * The base64url encoding used by fscrypt includes the '_' character, which may - * cause problems in snapshot names (which can not start with '_'). Thus, we - * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead, - * which replaces '-' and '_' by '+' and ','. - */ -static const char base64_table[65] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; - -int ceph_base64_encode(const u8 *src, int srclen, char *dst) -{ - u32 ac = 0; - int bits = 0; - int i; - char *cp = dst; - - for (i = 0; i < srclen; i++) { - ac = (ac << 8) | src[i]; - bits += 8; - do { - bits -= 6; - *cp++ = base64_table[(ac >> bits) & 0x3f]; - } while (bits >= 6); - } - if (bits) - *cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; - return cp - dst; -} - -int ceph_base64_decode(const char *src, int srclen, u8 *dst) -{ - u32 ac = 0; - int bits = 0; - int i; - u8 *bp = dst; - - for (i = 0; i < srclen; i++) { - const char *p = strchr(base64_table, src[i]); - - if (p == NULL || src[i] == 0) - return -1; - ac = (ac << 6) | (p - base64_table); - bits += 6; - if (bits >= 8) { - bits -= 8; - *bp++ = (u8)(ac >> bits); - } - } - if (ac & ((1 << bits) - 1)) - return -1; - return bp - dst; -} - static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -318,7 +265,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int elen) } /* base64 encode the encrypted name */ - elen = ceph_base64_encode(cryptbuf, len, p); + elen = base64_encode(cryptbuf, len, p, false, BASE64_IMAP); doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, p); /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */ @@ -412,7 +359,8 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname, tname = &_tname; } - declen = ceph_base64_decode(name, name_len, tname->name); + declen = base64_decode(name, name_len, + tname->name, false, BASE64_IMAP); if (declen <= 0) { ret = -EIO; goto out; @@ -426,7 +374,7 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname, ret = fscrypt_fname_disk_to_usr(dir, 0, 0, &iname, oname); if (!ret && (dir != fname->dir)) { - char tmp_buf[CEPH_BASE64_CHARS(NAME_MAX)]; + char tmp_buf[BASE64_CHARS(NAME_MAX)]; name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld", oname->len, oname->name, dir->i_ino); diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h index 23612b2e9837..b748e2060bc9 100644 --- a/fs/ceph/crypto.h +++ b/fs/ceph/crypto.h @@ -8,6 +8,7 @@ #include <crypto/sha2.h> #include <linux/fscrypt.h> +#include <linux/base64.h> #define CEPH_FSCRYPT_BLOCK_SHIFT 12 #define CEPH_FSCRYPT_BLOCK_SIZE (_AC(1, UL) << CEPH_FSCRYPT_BLOCK_SHIFT) @@ -89,11 +90,6 @@ static inline u32 ceph_fscrypt_auth_len(struct ceph_fscrypt_auth *fa) */ #define CEPH_NOHASH_NAME_MAX (180 - SHA256_DIGEST_SIZE) -#define CEPH_BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) - -int ceph_base64_encode(const u8 *src, int srclen, char *dst); -int ceph_base64_decode(const char *src, int srclen, u8 *dst); - void ceph_fscrypt_set_ops(struct super_block *sb); void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index bf50c6e7a029..86d7aa594ea9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -998,13 +998,14 @@ static int prep_encrypted_symlink_target(struct ceph_mds_request *req, if (err) goto out; - req->r_path2 = kmalloc(CEPH_BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL); + req->r_path2 = kmalloc(BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL); if (!req->r_path2) { err = -ENOMEM; goto out; } - len = ceph_base64_encode(osd_link.name, osd_link.len, req->r_path2); + len = base64_encode(osd_link.name, osd_link.len, + req->r_path2, false, BASE64_IMAP); req->r_path2[len] = '\0'; out: fscrypt_fname_free_buffer(&osd_link); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a596cb53f1ac..2966f88310e3 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -947,7 +947,7 @@ static int decode_encrypted_symlink(struct ceph_mds_client *mdsc, if (!sym) return -ENOMEM; - declen = ceph_base64_decode(encsym, enclen, sym); + declen = base64_decode(encsym, enclen, sym, false, BASE64_IMAP); if (declen < 0) { pr_err_client(cl, "can't decode symlink (%d). Content: %.*s\n", diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 81f4f06bc87e..ba95f636a5ab 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -400,8 +400,14 @@ static void remove_dir(struct dentry * d) configfs_remove_dirent(d); - if (d_really_is_positive(d)) - simple_rmdir(d_inode(parent),d); + if (d_really_is_positive(d)) { + if (likely(simple_empty(d))) { + __simple_rmdir(d_inode(parent),d); + dput(d); + } else { + pr_warn("remove_dir (%pd): attributes remain", d); + } + } pr_debug(" o %pd removing done (%d)\n", d, d_count(d)); @@ -598,7 +604,7 @@ static void detach_attrs(struct config_item * item) static int populate_attrs(struct config_item *item) { const struct config_item_type *t = item->ci_type; - struct configfs_group_operations *ops; + const struct configfs_group_operations *ops; struct configfs_attribute *attr; struct configfs_bin_attribute *bin_attr; int error = 0; diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 0ad32150611e..affe4742bbb5 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -30,7 +30,7 @@ struct configfs_buffer { size_t count; loff_t pos; char * page; - struct configfs_item_operations * ops; + const struct configfs_item_operations *ops; struct mutex mutex; int needs_read_fill; bool read_in_progress; diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 1d2e3a5738d1..bcda3372e141 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -211,7 +211,8 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - simple_unlink(d_inode(parent), dentry); + __simple_unlink(d_inode(parent), dentry); + dput(dentry); } else spin_unlock(&dentry->d_lock); } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 456c4a2efb53..4929f3431189 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -116,7 +116,7 @@ static struct file_system_type configfs_fs_type = { .owner = THIS_MODULE, .name = "configfs", .init_fs_context = configfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("configfs"); diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 8e4c213d418b..a9a4432d12ba 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -16,6 +16,7 @@ #include <linux/export.h> #include <linux/namei.h> #include <linux/scatterlist.h> +#include <linux/base64.h> #include "fscrypt_private.h" @@ -71,7 +72,7 @@ struct fscrypt_nokey_name { /* Encoded size of max-size no-key name */ #define FSCRYPT_NOKEY_NAME_MAX_ENCODED \ - FSCRYPT_BASE64URL_CHARS(FSCRYPT_NOKEY_NAME_MAX) + BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { @@ -162,84 +163,6 @@ static int fname_decrypt(const struct inode *inode, return 0; } -static const char base64url_table[65] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - -#define FSCRYPT_BASE64URL_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) - -/** - * fscrypt_base64url_encode() - base64url-encode some binary data - * @src: the binary data to encode - * @srclen: the length of @src in bytes - * @dst: (output) the base64url-encoded string. Not NUL-terminated. - * - * Encodes data using base64url encoding, i.e. the "Base 64 Encoding with URL - * and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't used, - * as it's unneeded and not required by the RFC. base64url is used instead of - * base64 to avoid the '/' character, which isn't allowed in filenames. - * - * Return: the length of the resulting base64url-encoded string in bytes. - * This will be equal to FSCRYPT_BASE64URL_CHARS(srclen). - */ -static int fscrypt_base64url_encode(const u8 *src, int srclen, char *dst) -{ - u32 ac = 0; - int bits = 0; - int i; - char *cp = dst; - - for (i = 0; i < srclen; i++) { - ac = (ac << 8) | src[i]; - bits += 8; - do { - bits -= 6; - *cp++ = base64url_table[(ac >> bits) & 0x3f]; - } while (bits >= 6); - } - if (bits) - *cp++ = base64url_table[(ac << (6 - bits)) & 0x3f]; - return cp - dst; -} - -/** - * fscrypt_base64url_decode() - base64url-decode a string - * @src: the string to decode. Doesn't need to be NUL-terminated. - * @srclen: the length of @src in bytes - * @dst: (output) the decoded binary data - * - * Decodes a string using base64url encoding, i.e. the "Base 64 Encoding with - * URL and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't - * accepted, nor are non-encoding characters such as whitespace. - * - * This implementation hasn't been optimized for performance. - * - * Return: the length of the resulting decoded binary data in bytes, - * or -1 if the string isn't a valid base64url string. - */ -static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst) -{ - u32 ac = 0; - int bits = 0; - int i; - u8 *bp = dst; - - for (i = 0; i < srclen; i++) { - const char *p = strchr(base64url_table, src[i]); - - if (p == NULL || src[i] == 0) - return -1; - ac = (ac << 6) | (p - base64url_table); - bits += 6; - if (bits >= 8) { - bits -= 8; - *bp++ = (u8)(ac >> bits); - } - } - if (ac & ((1 << bits) - 1)) - return -1; - return bp - dst; -} - bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, u32 orig_len, u32 max_len, u32 *encrypted_len_ret) @@ -387,8 +310,8 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, nokey_name.sha256); size = FSCRYPT_NOKEY_NAME_MAX; } - oname->len = fscrypt_base64url_encode((const u8 *)&nokey_name, size, - oname->name); + oname->len = base64_encode((const u8 *)&nokey_name, size, + oname->name, false, BASE64_URLSAFE); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -467,8 +390,8 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = fscrypt_base64url_decode(iname->name, iname->len, - fname->crypto_buf.name); + ret = base64_decode(iname->name, iname->len, + fname->crypto_buf.name, false, BASE64_URLSAFE); if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) || (ret > offsetof(struct fscrypt_nokey_name, sha256) && ret != FSCRYPT_NOKEY_NAME_MAX)) { @@ -24,7 +24,7 @@ #include <linux/mmu_notifier.h> #include <linux/iomap.h> #include <linux/rmap.h> -#include <asm/pgalloc.h> +#include <linux/pgalloc.h> #define CREATE_TRACE_POINTS #include <trace/events/fs_dax.h> diff --git a/fs/dcache.c b/fs/dcache.c index 9143fd502def..dc2fff4811d1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -870,6 +870,24 @@ locked: return false; } +static void finish_dput(struct dentry *dentry) + __releases(dentry->d_lock) + __releases(RCU) +{ + while (lock_for_kill(dentry)) { + rcu_read_unlock(); + dentry = __dentry_kill(dentry); + if (!dentry) + return; + if (retain_dentry(dentry, true)) { + spin_unlock(&dentry->d_lock); + return; + } + rcu_read_lock(); + } + rcu_read_unlock(); + spin_unlock(&dentry->d_lock); +} /* * This is dput @@ -907,22 +925,21 @@ void dput(struct dentry *dentry) rcu_read_unlock(); return; } - while (lock_for_kill(dentry)) { - rcu_read_unlock(); - dentry = __dentry_kill(dentry); - if (!dentry) - return; - if (retain_dentry(dentry, true)) { - spin_unlock(&dentry->d_lock); - return; - } - rcu_read_lock(); - } - rcu_read_unlock(); - spin_unlock(&dentry->d_lock); + finish_dput(dentry); } EXPORT_SYMBOL(dput); +void d_make_discardable(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT)); + dentry->d_flags &= ~DCACHE_PERSISTENT; + dentry->d_lockref.count--; + rcu_read_lock(); + finish_dput(dentry); +} +EXPORT_SYMBOL(d_make_discardable); + static void to_shrink_list(struct dentry *dentry, struct list_head *list) __must_hold(&dentry->d_lock) { @@ -1087,6 +1104,15 @@ struct dentry *d_find_alias_rcu(struct inode *inode) return de; } +void d_dispose_if_unused(struct dentry *dentry, struct list_head *dispose) +{ + spin_lock(&dentry->d_lock); + if (!dentry->d_lockref.count) + to_shrink_list(dentry, dispose); + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(d_dispose_if_unused); + /* * Try to kill dentries associated with this inode. * WARNING: you must own a reference to inode. @@ -1097,12 +1123,8 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { - spin_lock(&dentry->d_lock); - if (!dentry->d_lockref.count) - to_shrink_list(dentry, &dispose); - spin_unlock(&dentry->d_lock); - } + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) + d_dispose_if_unused(dentry, &dispose); spin_unlock(&inode->i_lock); shrink_dentry_list(&dispose); } @@ -1142,6 +1164,7 @@ void shrink_dentry_list(struct list_head *list) shrink_kill(dentry); } } +EXPORT_SYMBOL(shrink_dentry_list); static enum lru_status dentry_lru_isolate(struct list_head *item, struct list_lru_one *lru, void *arg) @@ -1512,6 +1535,15 @@ out: return ret; } +static enum d_walk_ret select_collect_umount(void *_data, struct dentry *dentry) +{ + if (dentry->d_flags & DCACHE_PERSISTENT) { + dentry->d_flags &= ~DCACHE_PERSISTENT; + dentry->d_lockref.count--; + } + return select_collect(_data, dentry); +} + static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1540,18 +1572,20 @@ out: } /** - * shrink_dcache_parent - prune dcache + * shrink_dcache_tree - prune dcache * @parent: parent of entries to prune + * @for_umount: true if we want to unpin the persistent ones * * Prune the dcache to remove unused children of the parent dentry. */ -void shrink_dcache_parent(struct dentry *parent) +static void shrink_dcache_tree(struct dentry *parent, bool for_umount) { for (;;) { struct select_data data = {.start = parent}; INIT_LIST_HEAD(&data.dispose); - d_walk(parent, &data, select_collect); + d_walk(parent, &data, + for_umount ? select_collect_umount : select_collect); if (!list_empty(&data.dispose)) { shrink_dentry_list(&data.dispose); @@ -1576,6 +1610,11 @@ void shrink_dcache_parent(struct dentry *parent) shrink_dentry_list(&data.dispose); } } + +void shrink_dcache_parent(struct dentry *parent) +{ + shrink_dcache_tree(parent, false); +} EXPORT_SYMBOL(shrink_dcache_parent); static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) @@ -1602,7 +1641,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) static void do_one_tree(struct dentry *dentry) { - shrink_dcache_parent(dentry); + shrink_dcache_tree(dentry, true); d_walk(dentry, dentry, umount_check); d_drop(dentry); dput(dentry); @@ -1924,7 +1963,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) unsigned add_flags = d_flags_for_inode(inode); WARN_ON(d_in_lookup(dentry)); - spin_lock(&dentry->d_lock); /* * The negative counter only tracks dentries on the LRU. Don't dec if * d_lru is on another list. @@ -1937,7 +1975,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); fsnotify_update_flags(dentry); - spin_unlock(&dentry->d_lock); } /** @@ -1961,7 +1998,9 @@ void d_instantiate(struct dentry *entry, struct inode * inode) if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + spin_lock(&entry->d_lock); __d_instantiate(entry, inode); + spin_unlock(&entry->d_lock); spin_unlock(&inode->i_lock); } } @@ -1980,7 +2019,9 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) lockdep_annotate_inode_mutex_key(inode); security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + spin_lock(&entry->d_lock); __d_instantiate(entry, inode); + spin_unlock(&entry->d_lock); WARN_ON(!(inode_state_read(inode) & I_NEW)); inode_state_clear(inode, I_NEW | I_CREATING); inode_wake_up_bit(inode, __I_NEW); @@ -2742,6 +2783,24 @@ void d_add(struct dentry *entry, struct inode *inode) } EXPORT_SYMBOL(d_add); +struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode) +{ + WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); + WARN_ON(!inode); + security_d_instantiate(dentry, inode); + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); + __d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_PERSISTENT; + dget_dlock(dentry); + if (d_unhashed(dentry)) + __d_rehash(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + return dentry; +} +EXPORT_SYMBOL(d_make_persistent); + static void swap_names(struct dentry *dentry, struct dentry *target) { if (unlikely(dname_external(target))) { @@ -3111,26 +3170,6 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) } EXPORT_SYMBOL(is_subdir); -static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) -{ - struct dentry *root = data; - if (dentry != root) { - if (d_unhashed(dentry) || !dentry->d_inode) - return D_WALK_SKIP; - - if (!(dentry->d_flags & DCACHE_GENOCIDE)) { - dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_lockref.count--; - } - } - return D_WALK_CONTINUE; -} - -void d_genocide(struct dentry *parent) -{ - d_walk(parent, parent, d_genocide_kill); -} - void d_mark_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 532bd7c46baf..4b263c328ed2 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -35,7 +35,7 @@ static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; -static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS; +static bool debugfs_enabled __ro_after_init = IS_ENABLED(CONFIG_DEBUG_FS_ALLOW_ALL); /* * Don't allow access attributes to be changed whilst the kernel is locked down @@ -287,9 +287,6 @@ static int debugfs_get_tree(struct fs_context *fc) { int err; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) - return -EPERM; - err = get_tree_single(fc, debugfs_fill_super); if (err) return err; @@ -329,7 +326,7 @@ static struct file_system_type debug_fs_type = { .name = "debugfs", .init_fs_context = debugfs_init_fs_context, .parameters = debugfs_param_specs, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("debugfs"); @@ -368,7 +365,7 @@ static struct dentry *debugfs_start_creating(const char *name, struct dentry *dentry; int error; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) + if (!debugfs_enabled) return ERR_PTR(-EPERM); if (!debugfs_initialized()) @@ -405,16 +402,15 @@ static struct dentry *debugfs_start_creating(const char *name, static struct dentry *debugfs_failed_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - dput(dentry); + simple_done_creating(dentry); simple_release_fs(&debugfs_mount, &debugfs_mount_count); return ERR_PTR(-ENOMEM); } static struct dentry *debugfs_end_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed } static struct dentry *__debugfs_create_file(const char *name, umode_t mode, @@ -434,11 +430,6 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, if (IS_ERR(dentry)) return dentry; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - debugfs_failed_creating(dentry); - return ERR_PTR(-EPERM); - } - inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create file '%s'\n", @@ -456,7 +447,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, DEBUGFS_I(inode)->raw = real_fops; DEBUGFS_I(inode)->aux = (void *)aux; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return debugfs_end_creating(dentry); } @@ -584,11 +575,6 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) if (IS_ERR(dentry)) return dentry; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - debugfs_failed_creating(dentry); - return ERR_PTR(-EPERM); - } - inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create directory '%s'\n", @@ -602,7 +588,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return debugfs_end_creating(dentry); @@ -631,11 +617,6 @@ struct dentry *debugfs_create_automount(const char *name, if (IS_ERR(dentry)) return dentry; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - debugfs_failed_creating(dentry); - return ERR_PTR(-EPERM); - } - inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", @@ -649,7 +630,7 @@ struct dentry *debugfs_create_automount(const char *name, DEBUGFS_I(inode)->automount = f; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return debugfs_end_creating(dentry); @@ -704,7 +685,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_op = &debugfs_symlink_inode_operations; inode->i_link = link; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); return debugfs_end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_symlink); @@ -899,21 +880,25 @@ static int __init debugfs_kernel(char *str) { if (str) { if (!strcmp(str, "on")) - debugfs_allow = DEBUGFS_ALLOW_API | DEBUGFS_ALLOW_MOUNT; - else if (!strcmp(str, "no-mount")) - debugfs_allow = DEBUGFS_ALLOW_API; + debugfs_enabled = true; else if (!strcmp(str, "off")) - debugfs_allow = 0; + debugfs_enabled = false; + else if (!strcmp(str, "no-mount")) { + pr_notice("debugfs=no-mount is a deprecated alias " + "for debugfs=off\n"); + debugfs_enabled = false; + } } return 0; } early_param("debugfs", debugfs_kernel); + static int __init debugfs_init(void) { int retval; - if (!(debugfs_allow & DEBUGFS_ALLOW_MOUNT)) + if (!debugfs_enabled) return -EPERM; retval = sysfs_create_mount_point(kernel_kobj, "debug"); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index 427987f81571..c95699b27a56 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -55,17 +55,4 @@ enum { HAS_IOCTL = 16 }; -#define DEBUGFS_ALLOW_API BIT(0) -#define DEBUGFS_ALLOW_MOUNT BIT(1) - -#ifdef CONFIG_DEBUG_FS_ALLOW_ALL -#define DEFAULT_DEBUGFS_ALLOW_BITS (DEBUGFS_ALLOW_MOUNT | DEBUGFS_ALLOW_API) -#endif -#ifdef CONFIG_DEBUG_FS_DISALLOW_MOUNT -#define DEFAULT_DEBUGFS_ALLOW_BITS (DEBUGFS_ALLOW_API) -#endif -#ifdef CONFIG_DEBUG_FS_ALLOW_NONE -#define DEFAULT_DEBUGFS_ALLOW_BITS (0) -#endif - #endif /* _DEBUGFS_INTERNAL_H_ */ diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index fdf22264a8e9..9f3de528c358 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -102,7 +102,7 @@ struct pts_fs_info { struct ida allocated_ptys; struct pts_mount_opts mount_opts; struct super_block *sb; - struct dentry *ptmx_dentry; + struct inode *ptmx_inode; // borrowed }; static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) @@ -259,7 +259,6 @@ static int devpts_parse_param(struct fs_context *fc, struct fs_parameter *param) static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) { int mode; - int rc = -ENOMEM; struct dentry *dentry; struct inode *inode; struct dentry *root = sb->s_root; @@ -268,18 +267,10 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) kuid_t ptmx_uid = current_fsuid(); kgid_t ptmx_gid = current_fsgid(); - inode_lock(d_inode(root)); - - /* If we have already created ptmx node, return */ - if (fsi->ptmx_dentry) { - rc = 0; - goto out; - } - - dentry = d_alloc_name(root, "ptmx"); - if (!dentry) { + dentry = simple_start_creating(root, "ptmx"); + if (IS_ERR(dentry)) { pr_err("Unable to alloc dentry for ptmx node\n"); - goto out; + return PTR_ERR(dentry); } /* @@ -287,9 +278,9 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) */ inode = new_inode(sb); if (!inode) { + simple_done_creating(dentry); pr_err("Unable to alloc inode for ptmx node\n"); - dput(dentry); - goto out; + return -ENOMEM; } inode->i_ino = 2; @@ -299,23 +290,18 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); inode->i_uid = ptmx_uid; inode->i_gid = ptmx_gid; + fsi->ptmx_inode = inode; - d_add(dentry, inode); + d_make_persistent(dentry, inode); - fsi->ptmx_dentry = dentry; - rc = 0; -out: - inode_unlock(d_inode(root)); - return rc; + simple_done_creating(dentry); + + return 0; } static void update_ptmx_mode(struct pts_fs_info *fsi) { - struct inode *inode; - if (fsi->ptmx_dentry) { - inode = d_inode(fsi->ptmx_dentry); - inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; - } + fsi->ptmx_inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; } static int devpts_reconfigure(struct fs_context *fc) @@ -461,7 +447,7 @@ static void devpts_kill_sb(struct super_block *sb) if (fsi) ida_destroy(&fsi->allocated_ptys); kfree(fsi); - kill_litter_super(sb); + kill_anon_super(sb); } static struct file_system_type devpts_fs_type = { @@ -534,16 +520,15 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) sprintf(s, "%d", index); dentry = d_alloc_name(root, s); - if (dentry) { - dentry->d_fsdata = priv; - d_add(dentry, inode); - fsnotify_create(d_inode(root), dentry); - } else { + if (!dentry) { iput(inode); - dentry = ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); } - - return dentry; + dentry->d_fsdata = priv; + d_make_persistent(dentry, inode); + fsnotify_create(d_inode(root), dentry); + dput(dentry); + return dentry; // borrowed } /** @@ -573,7 +558,7 @@ void devpts_pty_kill(struct dentry *dentry) drop_nlink(dentry->d_inode); d_drop(dentry); fsnotify_unlink(d_inode(dentry->d_parent), dentry); - dput(dentry); /* d_alloc_name() in devpts_pty_new() */ + d_make_discardable(dentry); } static int __init init_devpts_fs(void) diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 2891614abf8d..95dcad83da11 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -113,8 +113,7 @@ static int efivarfs_create(struct mnt_idmap *idmap, struct inode *dir, inode->i_private = var; - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); return 0; } @@ -126,9 +125,7 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry) if (efivar_entry_delete(var)) return -EINVAL; - drop_nlink(d_inode(dentry)); - dput(dentry); - return 0; + return simple_unlink(dir, dentry); }; const struct inode_operations efivarfs_dir_inode_operations = { diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 6de97565d5f7..9da992925920 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -278,7 +278,8 @@ static int efivarfs_create_dentry(struct super_block *sb, efi_char16_t *name16, inode->i_private = entry; i_size_write(inode, size + sizeof(__u32)); /* attributes + data */ inode_unlock(inode); - d_add(dentry, inode); + d_make_persistent(dentry, inode); + dput(dentry); return 0; @@ -522,7 +523,7 @@ static void efivarfs_kill_sb(struct super_block *sb) struct efivarfs_fs_info *sfi = sb->s_fs_info; blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb); - kill_litter_super(sb); + kill_anon_super(sb); kfree(sfi); } diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 461a929e0825..65da21504632 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -562,7 +562,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe) * Allocate a managed folio for cached I/O, or it may be * then filled with a file-backed folio for in-place I/O */ - newfolio = filemap_alloc_folio(gfp, 0); + newfolio = filemap_alloc_folio(gfp, 0, NULL); if (!newfolio) continue; newfolio->private = Z_EROFS_PREALLOCATED_FOLIO; diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 2d2d510f2372..5429041c7eaf 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -106,7 +106,7 @@ static int exfat_allocate_bitmap(struct super_block *sb, (PAGE_SHIFT - sb->s_blocksize_bits); for (i = 0; i < sbi->map_sectors; i++) { /* Trigger the next readahead in advance. */ - if (0 == (i % max_ra_count)) { + if (max_ra_count && 0 == (i % max_ra_count)) { blk_start_plug(&plug); for (j = i; j < min(max_ra_count, sbi->map_sectors - i) + i; j++) sb_breadahead(sb, sector + j); @@ -183,11 +183,10 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi) kvfree(sbi->vol_amap); } -int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) +int exfat_set_bitmap(struct super_block *sb, unsigned int clu, bool sync) { int i, b; unsigned int ent_idx; - struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); if (!is_valid_cluster(sbi, clu)) @@ -202,11 +201,10 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) return 0; } -int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) +int exfat_clear_bitmap(struct super_block *sb, unsigned int clu, bool sync) { int i, b; unsigned int ent_idx; - struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); if (!is_valid_cluster(sbi, clu)) @@ -226,6 +224,28 @@ int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) return 0; } +bool exfat_test_bitmap(struct super_block *sb, unsigned int clu) +{ + int i, b; + unsigned int ent_idx; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + if (!sbi->vol_amap) + return true; + + if (!is_valid_cluster(sbi, clu)) + return false; + + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + + if (!test_bit_le(b, sbi->vol_amap[i]->b_data)) + return false; + + return true; +} + /* * If the value of "clu" is 0, it means cluster 2 which is the first cluster of * the cluster heap. diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 7229146fe2bf..3045a58e124a 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -604,6 +604,11 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir if (ret) return ret; + if (!exfat_test_bitmap(sb, clu)) { + exfat_err(sb, "failed to test cluster bit(%u)", clu); + return -EIO; + } + /* byte offset in cluster */ off = EXFAT_CLU_OFFSET(off, sbi); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 38210fb6901c..176fef62574c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -452,8 +452,9 @@ int exfat_count_num_clusters(struct super_block *sb, /* balloc.c */ int exfat_load_bitmap(struct super_block *sb); void exfat_free_bitmap(struct exfat_sb_info *sbi); -int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync); -int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); +int exfat_set_bitmap(struct super_block *sb, unsigned int clu, bool sync); +int exfat_clear_bitmap(struct super_block *sb, unsigned int clu, bool sync); +bool exfat_test_bitmap(struct super_block *sb, unsigned int clu); unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu); int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count); int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 825083634ba2..c9c5f2e3a05e 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -205,7 +205,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - err = exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + err = exfat_clear_bitmap(sb, clu, (sync && IS_DIRSYNC(inode))); if (err) break; clu++; @@ -233,7 +233,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - if (exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)))) + if (exfat_clear_bitmap(sb, clu, (sync && IS_DIRSYNC(inode)))) break; if (sbi->options.discard) { @@ -409,7 +409,7 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, } /* update allocation bitmap */ - if (exfat_set_bitmap(inode, new_clu, sync_bmap)) { + if (exfat_set_bitmap(sb, new_clu, sync_bmap)) { ret = -EIO; goto free_cluster; } diff --git a/fs/exfat/file.c b/fs/exfat/file.c index adc37b4d7fc2..536c8078f0c1 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -25,6 +25,8 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_chain clu; + truncate_pagecache(inode, i_size_read(inode)); + ret = inode_newsize_ok(inode, size); if (ret) return ret; @@ -639,6 +641,9 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) inode_lock(inode); + if (pos > i_size_read(inode)) + truncate_pagecache(inode, i_size_read(inode)); + valid_size = ei->valid_size; ret = generic_write_checks(iocb, iter); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 745dce29ddb5..dfe957493d49 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -645,16 +645,6 @@ static int exfat_find(struct inode *dir, const struct qstr *qname, info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size); info->size = le64_to_cpu(ep2->dentry.stream.size); - if (info->valid_size < 0) { - exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size); - return -EIO; - } - - if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { - exfat_fs_error(sb, "data size is invalid(%lld)", info->size); - return -EIO; - } - info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu); if (!is_valid_cluster(sbi, info->start_clu) && info->size) { exfat_warn(sb, "start_clu is invalid cluster(0x%x)", @@ -692,6 +682,16 @@ static int exfat_find(struct inode *dir, const struct qstr *qname, 0); exfat_put_dentry_set(&es, false); + if (info->valid_size < 0) { + exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size); + return -EIO; + } + + if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { + exfat_fs_error(sb, "data size is invalid(%lld)", info->size); + return -EIO; + } + if (ei->start_clu == EXFAT_FREE_CLUSTER) { exfat_fs_error(sb, "non-zero size file starts with zero cluster (size : %llu, p_dir : %u, entry : 0x%08x)", diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 74d451f732c7..10e872a99663 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -629,6 +629,17 @@ static int __exfat_fill_super(struct super_block *sb, goto free_bh; } + if (!exfat_test_bitmap(sb, sbi->root_dir)) { + exfat_warn(sb, "failed to test first cluster bit of root dir(%u)", + sbi->root_dir); + /* + * The first cluster bit of the root directory should never + * be unset except when storage is corrupted. This bit is + * set to allow operations after mount. + */ + exfat_set_bitmap(sb, sbi->root_dir, false); + } + ret = exfat_count_used_clusters(sb, &sbi->used_clusters); if (ret) { exfat_err(sb, "failed to scan clusters"); @@ -813,10 +824,21 @@ static int exfat_init_fs_context(struct fs_context *fc) ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - sbi->options.fs_uid = current_uid(); - sbi->options.fs_gid = current_gid(); - sbi->options.fs_fmask = current->fs->umask; - sbi->options.fs_dmask = current->fs->umask; + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && fc->root) { + struct super_block *sb = fc->root->d_sb; + struct exfat_mount_options *cur_opts = &EXFAT_SB(sb)->options; + + sbi->options.fs_uid = cur_opts->fs_uid; + sbi->options.fs_gid = cur_opts->fs_gid; + sbi->options.fs_fmask = cur_opts->fs_fmask; + sbi->options.fs_dmask = cur_opts->fs_dmask; + } else { + sbi->options.fs_uid = current_uid(); + sbi->options.fs_gid = current_gid(); + sbi->options.fs_fmask = current->fs->umask; + sbi->options.fs_dmask = current->fs->umask; + } + sbi->options.allow_utime = -1; sbi->options.errors = EXFAT_ERRORS_RO; exfat_set_iocharset(&sbi->options, exfat_default_iocharset); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bbe07e3a6c75..300664269eb6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1318,7 +1318,7 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) f2fs_submit_merged_write(sbi, DATA); prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - io_schedule_timeout(DEFAULT_IO_TIMEOUT); + io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); } @@ -1673,7 +1673,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; } - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, CP_PHASE_START_BLOCK_OPS); err = block_operations(sbi); if (err) @@ -1681,7 +1681,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) stat_cp_time(cpc, CP_TIME_OP_LOCK); - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, CP_PHASE_FINISH_BLOCK_OPS); f2fs_flush_merged_writes(sbi); @@ -1747,7 +1747,7 @@ stop: /* update CP_TIME to trigger checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, CP_PHASE_FINISH_CHECKPOINT); out: if (cpc->reason != CP_RESIZE) f2fs_up_write(&sbi->cp_global_sem); @@ -1974,7 +1974,7 @@ void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi) /* Let's wait for the previous dispatched checkpoint. */ while (atomic_read(&cprc->queued_ckpt)) - io_schedule_timeout(DEFAULT_IO_TIMEOUT); + io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); } void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index be53e06caf3d..7b68bf22989d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -120,7 +120,7 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) } static void f2fs_put_rpages_wbc(struct compress_ctx *cc, - struct writeback_control *wbc, bool redirty, int unlock) + struct writeback_control *wbc, bool redirty, bool unlock) { unsigned int i; @@ -759,10 +759,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) ret = -EFSCORRUPTED; /* Avoid f2fs_commit_super in irq context */ - if (!in_task) - f2fs_handle_error_async(sbi, ERROR_FAIL_DECOMPRESSION); - else - f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); + f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); goto out_release; } @@ -1060,7 +1057,7 @@ static void cancel_cluster_writeback(struct compress_ctx *cc, f2fs_submit_merged_write(F2FS_I_SB(cc->inode), DATA); while (atomic_read(&cic->pending_pages) != (cc->valid_nr_cpages - submitted + 1)) - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + f2fs_io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); } /* Cancel writeback and stay locked. */ @@ -1205,7 +1202,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, if (copied) set_cluster_dirty(&cc); - f2fs_put_rpages_wbc(&cc, NULL, false, 1); + f2fs_put_rpages_wbc(&cc, NULL, false, true); f2fs_destroy_compress_ctx(&cc, false); return first_index; @@ -1577,7 +1574,7 @@ continue_unlock: */ if (IS_NOQUOTA(cc->inode)) goto out; - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + f2fs_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); goto retry_write; } goto out; @@ -1608,7 +1605,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, add_compr_block_stat(cc->inode, cc->cluster_size); goto write; } else if (err) { - f2fs_put_rpages_wbc(cc, wbc, true, 1); + f2fs_put_rpages_wbc(cc, wbc, true, true); goto destroy_out; } @@ -1622,7 +1619,7 @@ write: f2fs_bug_on(F2FS_I_SB(cc->inode), *submitted); err = f2fs_write_raw_pages(cc, submitted, wbc, io_type); - f2fs_put_rpages_wbc(cc, wbc, false, 0); + f2fs_put_rpages_wbc(cc, wbc, false, false); destroy_out: f2fs_destroy_compress_ctx(cc, false); return err; @@ -1947,7 +1944,7 @@ static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, return; } - cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0); + cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0, NULL); if (!cfolio) return; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8bf4feda42b0..c30e69392a62 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -752,7 +752,7 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, } static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, - struct page *page, enum temp_type temp) + struct folio *folio, enum temp_type temp) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; struct bio_entry *be; @@ -761,8 +761,7 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, be->bio = bio; bio_get(bio); - if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) - f2fs_bug_on(sbi, 1); + bio_add_folio_nofail(bio, folio, folio_size(folio), 0); f2fs_down_write(&io->bio_list_lock); list_add_tail(&be->list, &io->bio_list); @@ -776,7 +775,7 @@ static void del_bio_entry(struct bio_entry *be) } static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, - struct page *page) + struct folio *folio) { struct folio *fio_folio = fio->folio; struct f2fs_sb_info *sbi = fio->sbi; @@ -802,8 +801,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, if (f2fs_crypt_mergeable_bio(*bio, fio_folio->mapping->host, fio_folio->index, fio) && - bio_add_page(*bio, page, PAGE_SIZE, 0) == - PAGE_SIZE) { + bio_add_folio(*bio, folio, folio_size(folio), 0)) { ret = 0; break; } @@ -904,9 +902,9 @@ alloc_new: f2fs_set_bio_crypt_ctx(bio, folio->mapping->host, folio->index, fio, GFP_NOIO); - add_bio_entry(fio->sbi, bio, &data_folio->page, fio->temp); + add_bio_entry(fio->sbi, bio, data_folio, fio->temp); } else { - if (add_ipu_page(fio, &bio, &data_folio->page)) + if (add_ipu_page(fio, &bio, data_folio)) goto alloc_new; } @@ -1275,7 +1273,7 @@ struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct folio *folio; - folio = __filemap_get_folio(mapping, index, FGP_ACCESSED, 0); + folio = f2fs_filemap_get_folio(mapping, index, FGP_ACCESSED, 0); if (IS_ERR(folio)) goto read; if (folio_test_uptodate(folio)) @@ -1420,6 +1418,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag) { + f2fs_down_read(&sbi->cp_enable_rwsem); if (flag == F2FS_GET_BLOCK_PRE_AIO) f2fs_down_read(&sbi->node_change); else @@ -1432,6 +1431,7 @@ static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag) f2fs_up_read(&sbi->node_change); else f2fs_unlock_op(sbi); + f2fs_up_read(&sbi->cp_enable_rwsem); } int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index) @@ -3138,8 +3138,8 @@ result: } else if (ret == -EAGAIN) { ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) { - f2fs_io_schedule_timeout( - DEFAULT_IO_TIMEOUT); + f2fs_schedule_timeout( + DEFAULT_SCHEDULE_TIMEOUT); goto retry_write; } goto next; @@ -3221,6 +3221,19 @@ static inline bool __should_serialize_io(struct inode *inode, return false; } +static inline void account_writeback(struct inode *inode, bool inc) +{ + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return; + + f2fs_down_read(&F2FS_I(inode)->i_sem); + if (inc) + atomic_inc(&F2FS_I(inode)->writeback); + else + atomic_dec(&F2FS_I(inode)->writeback); + f2fs_up_read(&F2FS_I(inode)->i_sem); +} + static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) @@ -3266,10 +3279,14 @@ static int __f2fs_write_data_pages(struct address_space *mapping, locked = true; } + account_writeback(inode, true); + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); + account_writeback(inode, false); + if (locked) mutex_unlock(&sbi->writepages); @@ -3566,8 +3583,9 @@ repeat: * Do not use FGP_STABLE to avoid deadlock. * Will wait that below with our IO control. */ - folio = __filemap_get_folio(mapping, index, - FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); + folio = f2fs_filemap_get_folio(mapping, index, + FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS, + mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto fail; @@ -3637,8 +3655,7 @@ repeat: return 0; put_folio: - folio_unlock(folio); - folio_put(folio); + f2fs_folio_put(folio, true); fail: f2fs_write_failed(inode, pos + len); return err; @@ -3694,8 +3711,7 @@ static int f2fs_write_end(const struct kiocb *iocb, pos + copied); } unlock_out: - folio_unlock(folio); - folio_put(folio); + f2fs_folio_put(folio, true); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 43a83bbd3bc5..032683835569 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -251,6 +251,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); + si->blkoff[i] = curseg->next_blkoff; si->curseg[i] = curseg->segno; si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); @@ -508,55 +509,63 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); - seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n", - "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk"); - seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " TYPE %8s %8s %8s %8s %10s %10s %10s\n", + "blkoff", "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk"); + seq_printf(s, " - COLD data: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_COLD_DATA], si->curseg[CURSEG_COLD_DATA], si->cursec[CURSEG_COLD_DATA], si->curzone[CURSEG_COLD_DATA], si->dirty_seg[CURSEG_COLD_DATA], si->full_seg[CURSEG_COLD_DATA], si->valid_blks[CURSEG_COLD_DATA]); - seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - WARM data: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_WARM_DATA], si->curseg[CURSEG_WARM_DATA], si->cursec[CURSEG_WARM_DATA], si->curzone[CURSEG_WARM_DATA], si->dirty_seg[CURSEG_WARM_DATA], si->full_seg[CURSEG_WARM_DATA], si->valid_blks[CURSEG_WARM_DATA]); - seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - HOT data: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_HOT_DATA], si->curseg[CURSEG_HOT_DATA], si->cursec[CURSEG_HOT_DATA], si->curzone[CURSEG_HOT_DATA], si->dirty_seg[CURSEG_HOT_DATA], si->full_seg[CURSEG_HOT_DATA], si->valid_blks[CURSEG_HOT_DATA]); - seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - Dir dnode: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_HOT_NODE], si->curseg[CURSEG_HOT_NODE], si->cursec[CURSEG_HOT_NODE], si->curzone[CURSEG_HOT_NODE], si->dirty_seg[CURSEG_HOT_NODE], si->full_seg[CURSEG_HOT_NODE], si->valid_blks[CURSEG_HOT_NODE]); - seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - File dnode: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_WARM_NODE], si->curseg[CURSEG_WARM_NODE], si->cursec[CURSEG_WARM_NODE], si->curzone[CURSEG_WARM_NODE], si->dirty_seg[CURSEG_WARM_NODE], si->full_seg[CURSEG_WARM_NODE], si->valid_blks[CURSEG_WARM_NODE]); - seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - Indir nodes: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_COLD_NODE], si->curseg[CURSEG_COLD_NODE], si->cursec[CURSEG_COLD_NODE], si->curzone[CURSEG_COLD_NODE], si->dirty_seg[CURSEG_COLD_NODE], si->full_seg[CURSEG_COLD_NODE], si->valid_blks[CURSEG_COLD_NODE]); - seq_printf(s, " - Pinned file: %8d %8d %8d\n", + seq_printf(s, " - Pinned file: %8d %8d %8d %8d\n", + si->blkoff[CURSEG_COLD_DATA_PINNED], si->curseg[CURSEG_COLD_DATA_PINNED], si->cursec[CURSEG_COLD_DATA_PINNED], si->curzone[CURSEG_COLD_DATA_PINNED]); - seq_printf(s, " - ATGC data: %8d %8d %8d\n", + seq_printf(s, " - ATGC data: %8d %8d %8d %8d\n", + si->blkoff[CURSEG_ALL_DATA_ATGC], si->curseg[CURSEG_ALL_DATA_ATGC], si->cursec[CURSEG_ALL_DATA_ATGC], si->curzone[CURSEG_ALL_DATA_ATGC]); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 33e09c453c70..0ed84cc065a7 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -808,7 +808,7 @@ static void __update_extent_tree_range(struct inode *inode, } goto out_read_extent_cache; update_age_extent_cache: - if (!tei->last_blocks) + if (tei->last_blocks == F2FS_EXTENT_AGE_INVALID) goto out_read_extent_cache; __set_extent_info(&ei, fofs, len, 0, false, @@ -912,7 +912,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, cur_age = cur_blocks - tei.last_blocks; else /* allocated_data_blocks overflow */ - cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; + cur_age = (ULLONG_MAX - 1) - tei.last_blocks + cur_blocks; if (tei.age) ei->age = __calculate_block_age(sbi, cur_age, tei.age); @@ -1114,6 +1114,7 @@ void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, struct extent_info ei = { .fofs = fofs, .len = len, + .last_blocks = F2FS_EXTENT_AGE_INVALID, }; if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5b4e9548a231..20edbb99b814 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -96,47 +96,52 @@ extern const char *f2fs_fault_name[FAULT_MAX]; /* * For mount options */ -#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000001 -#define F2FS_MOUNT_DISCARD 0x00000002 -#define F2FS_MOUNT_NOHEAP 0x00000004 -#define F2FS_MOUNT_XATTR_USER 0x00000008 -#define F2FS_MOUNT_POSIX_ACL 0x00000010 -#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000020 -#define F2FS_MOUNT_INLINE_XATTR 0x00000040 -#define F2FS_MOUNT_INLINE_DATA 0x00000080 -#define F2FS_MOUNT_INLINE_DENTRY 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 -#define F2FS_MOUNT_FASTBOOT 0x00000800 -#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00001000 -#define F2FS_MOUNT_DATA_FLUSH 0x00002000 -#define F2FS_MOUNT_FAULT_INJECTION 0x00004000 -#define F2FS_MOUNT_USRQUOTA 0x00008000 -#define F2FS_MOUNT_GRPQUOTA 0x00010000 -#define F2FS_MOUNT_PRJQUOTA 0x00020000 -#define F2FS_MOUNT_QUOTA 0x00040000 -#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00080000 -#define F2FS_MOUNT_RESERVE_ROOT 0x00100000 -#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x00200000 -#define F2FS_MOUNT_NORECOVERY 0x00400000 -#define F2FS_MOUNT_ATGC 0x00800000 -#define F2FS_MOUNT_MERGE_CHECKPOINT 0x01000000 -#define F2FS_MOUNT_GC_MERGE 0x02000000 -#define F2FS_MOUNT_COMPRESS_CACHE 0x04000000 -#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x08000000 -#define F2FS_MOUNT_NAT_BITS 0x10000000 -#define F2FS_MOUNT_INLINECRYPT 0x20000000 -/* - * Some f2fs environments expect to be able to pass the "lazytime" option - * string rather than using the MS_LAZYTIME flag, so this must remain. - */ -#define F2FS_MOUNT_LAZYTIME 0x40000000 -#define F2FS_MOUNT_RESERVE_NODE 0x80000000 +enum f2fs_mount_opt { + F2FS_MOUNT_DISABLE_ROLL_FORWARD, + F2FS_MOUNT_DISCARD, + F2FS_MOUNT_NOHEAP, + F2FS_MOUNT_XATTR_USER, + F2FS_MOUNT_POSIX_ACL, + F2FS_MOUNT_DISABLE_EXT_IDENTIFY, + F2FS_MOUNT_INLINE_XATTR, + F2FS_MOUNT_INLINE_DATA, + F2FS_MOUNT_INLINE_DENTRY, + F2FS_MOUNT_FLUSH_MERGE, + F2FS_MOUNT_NOBARRIER, + F2FS_MOUNT_FASTBOOT, + F2FS_MOUNT_READ_EXTENT_CACHE, + F2FS_MOUNT_DATA_FLUSH, + F2FS_MOUNT_FAULT_INJECTION, + F2FS_MOUNT_USRQUOTA, + F2FS_MOUNT_GRPQUOTA, + F2FS_MOUNT_PRJQUOTA, + F2FS_MOUNT_QUOTA, + F2FS_MOUNT_INLINE_XATTR_SIZE, + F2FS_MOUNT_RESERVE_ROOT, + F2FS_MOUNT_DISABLE_CHECKPOINT, + F2FS_MOUNT_NORECOVERY, + F2FS_MOUNT_ATGC, + F2FS_MOUNT_MERGE_CHECKPOINT, + F2FS_MOUNT_GC_MERGE, + F2FS_MOUNT_COMPRESS_CACHE, + F2FS_MOUNT_AGE_EXTENT_CACHE, + F2FS_MOUNT_NAT_BITS, + F2FS_MOUNT_INLINECRYPT, + /* + * Some f2fs environments expect to be able to pass the "lazytime" option + * string rather than using the MS_LAZYTIME flag, so this must remain. + */ + F2FS_MOUNT_LAZYTIME, + F2FS_MOUNT_RESERVE_NODE, +}; #define F2FS_OPTION(sbi) ((sbi)->mount_opt) -#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option) +#define clear_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt &= ~BIT(F2FS_MOUNT_##option)) +#define set_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt |= BIT(F2FS_MOUNT_##option)) +#define test_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt & BIT(F2FS_MOUNT_##option)) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -183,7 +188,7 @@ struct f2fs_rwsem { }; struct f2fs_mount_info { - unsigned int opt; + unsigned long long opt; block_t root_reserved_blocks; /* root reserved blocks */ block_t root_reserved_nodes; /* root reserved nodes */ kuid_t s_resuid; /* reserved blocks for uid */ @@ -245,6 +250,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_COMPRESSION 0x00002000 #define F2FS_FEATURE_RO 0x00004000 #define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 +#define F2FS_FEATURE_PACKED_SSA 0x00010000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -281,7 +287,7 @@ enum { #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ #define DEF_DISABLE_INTERVAL 5 /* 5 secs */ -#define DEF_ENABLE_INTERVAL 16 /* 16 secs */ +#define DEF_ENABLE_INTERVAL 5 /* 5 secs */ #define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */ #define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ @@ -313,6 +319,12 @@ struct cp_control { struct cp_stats stats; }; +enum f2fs_cp_phase { + CP_PHASE_START_BLOCK_OPS, + CP_PHASE_FINISH_BLOCK_OPS, + CP_PHASE_FINISH_CHECKPOINT, +}; + /* * indicate meta/data type */ @@ -406,6 +418,8 @@ struct discard_entry { #define DEFAULT_DISCARD_GRANULARITY 16 /* default maximum discard granularity of ordered discard, unit: block count */ #define DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY 16 +/* default interval of periodical discard submission */ +#define DEFAULT_DISCARD_INTERVAL (msecs_to_jiffies(20)) /* max discard pend list number */ #define MAX_PLIST_NUM 512 @@ -655,8 +669,8 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */ -/* congestion wait timeout value, default: 20ms */ -#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) +/* IO/non-IO congestion wait timeout value, default: 1ms */ +#define DEFAULT_SCHEDULE_TIMEOUT (msecs_to_jiffies(1)) /* timeout value injected, default: 1000ms */ #define DEFAULT_FAULT_TIMEOUT (msecs_to_jiffies(1000)) @@ -707,6 +721,12 @@ enum extent_type { NR_EXTENT_CACHES, }; +/* + * Reserved value to mark invalid age extents, hence valid block range + * from 0 to ULLONG_MAX-1 + */ +#define F2FS_EXTENT_AGE_INVALID ULLONG_MAX + struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ @@ -947,6 +967,7 @@ struct f2fs_inode_info { unsigned char i_compress_level; /* compress level (lz4hc,zstd) */ unsigned char i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ + atomic_t writeback; /* count # of writeback thread */ unsigned int atomic_write_cnt; loff_t original_i_size; /* original i_size before atomic write */ @@ -1661,6 +1682,7 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ + unsigned int unusable_blocks_per_sec; /* unusable blocks per section */ unsigned int max_open_zones; /* max open zone resources of the zoned device */ /* For adjust the priority writing position of data in zone UFS */ unsigned int blkzone_alloc_policy; @@ -1694,6 +1716,7 @@ struct f2fs_sb_info { long interval_time[MAX_TIME]; /* to store thresholds */ struct ckpt_req_control cprc_info; /* for checkpoint request control */ struct cp_stats cp_stats; /* for time stat of checkpoint */ + struct f2fs_rwsem cp_enable_rwsem; /* block cache/dio write */ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ @@ -1732,7 +1755,6 @@ struct f2fs_sb_info { unsigned int meta_ino_num; /* meta inode number*/ unsigned int log_blocks_per_seg; /* log2 blocks per segment */ unsigned int blocks_per_seg; /* blocks per segment */ - unsigned int unusable_blocks_per_sec; /* unusable blocks per section */ unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ @@ -1884,9 +1906,6 @@ struct f2fs_sb_info { spinlock_t error_lock; /* protect errors/stop_reason array */ bool error_dirty; /* errors of sb is dirty */ - struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ - unsigned int inline_xattr_slab_size; /* default inline xattr slab size */ - /* For reclaimed segs statistics per each GC mode */ unsigned int gc_segment_mode; /* GC state for reclaimed segments */ unsigned int gc_reclaimed_segs[MAX_GC_MODE]; /* Reclaimed segs for each mode */ @@ -2096,7 +2115,7 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio, pgoff_t index) { - pgoff_t idx_in_folio = index % (1 << folio_order(folio)); + pgoff_t idx_in_folio = index % folio_nr_pages(folio); return (struct f2fs_super_block *) (page_address(folio_page(folio, idx_in_folio)) + @@ -2961,16 +2980,6 @@ static inline struct folio *f2fs_filemap_get_folio( return __filemap_get_folio(mapping, index, fgp_flags, gfp_mask); } -static inline struct page *f2fs_pagecache_get_page( - struct address_space *mapping, pgoff_t index, - fgf_t fgp_flags, gfp_t gfp_mask) -{ - if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) - return NULL; - - return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); -} - static inline void f2fs_folio_put(struct folio *folio, bool unlock) { if (IS_ERR_OR_NULL(folio)) @@ -2983,7 +2992,7 @@ static inline void f2fs_folio_put(struct folio *folio, bool unlock) folio_put(folio); } -static inline void f2fs_put_page(struct page *page, int unlock) +static inline void f2fs_put_page(struct page *page, bool unlock) { if (!page) return; @@ -3810,7 +3819,6 @@ void f2fs_quota_off_umount(struct super_block *sb); void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); -void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); @@ -4186,6 +4194,7 @@ struct f2fs_stat_info { int gc_secs[2][2]; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; + int blkoff[NR_CURSEG_TYPE]; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; @@ -4674,7 +4683,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) f2fs_up_write(&fi->i_sem); return true; } - if (f2fs_is_mmap_file(inode) || + if (f2fs_is_mmap_file(inode) || atomic_read(&fi->writeback) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { f2fs_up_write(&fi->i_sem); return false; @@ -4710,6 +4719,7 @@ F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); +F2FS_FEATURE_FUNCS(packed_ssa, PACKED_SSA); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi, @@ -4764,6 +4774,18 @@ static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) return false; } +static inline unsigned int f2fs_hw_discard_granularity(struct f2fs_sb_info *sbi) +{ + int i = 1; + unsigned int discard_granularity = bdev_discard_granularity(sbi->sb->s_bdev); + + if (f2fs_is_multi_device(sbi)) + for (; i < sbi->s_ndevs && !bdev_is_zoned(FDEV(i).bdev); i++) + discard_granularity = max_t(unsigned int, discard_granularity, + bdev_discard_granularity(FDEV(i).bdev)); + return discard_granularity; +} + static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) { return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || @@ -4900,22 +4922,30 @@ static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK; } -static inline void f2fs_io_schedule_timeout(long timeout) +static inline void __f2fs_schedule_timeout(long timeout, bool io) { set_current_state(TASK_UNINTERRUPTIBLE); - io_schedule_timeout(timeout); + if (io) + io_schedule_timeout(timeout); + else + schedule_timeout(timeout); } +#define f2fs_io_schedule_timeout(timeout) \ + __f2fs_schedule_timeout(timeout, true) +#define f2fs_schedule_timeout(timeout) \ + __f2fs_schedule_timeout(timeout, false) + static inline void f2fs_io_schedule_timeout_killable(long timeout) { while (timeout) { if (fatal_signal_pending(current)) return; set_current_state(TASK_UNINTERRUPTIBLE); - io_schedule_timeout(DEFAULT_IO_TIMEOUT); - if (timeout <= DEFAULT_IO_TIMEOUT) + io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); + if (timeout <= DEFAULT_SCHEDULE_TIMEOUT) return; - timeout -= DEFAULT_IO_TIMEOUT; + timeout -= DEFAULT_SCHEDULE_TIMEOUT; } } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ffa045b39c01..d7047ca6b98d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1654,8 +1654,11 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, f2fs_set_data_blkaddr(dn, NEW_ADDR); } - f2fs_update_read_extent_cache_range(dn, start, 0, index - start); - f2fs_update_age_extent_cache_range(dn, start, index - start); + if (index > start) { + f2fs_update_read_extent_cache_range(dn, start, 0, + index - start); + f2fs_update_age_extent_cache_range(dn, start, index - start); + } return ret; } @@ -2125,8 +2128,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) f2fs_down_write(&fi->i_sem); if (!f2fs_may_compress(inode) || - (S_ISREG(inode->i_mode) && - F2FS_HAS_BLOCKS(inode))) { + atomic_read(&fi->writeback) || + (S_ISREG(inode->i_mode) && + F2FS_HAS_BLOCKS(inode))) { f2fs_up_write(&fi->i_sem); return -EINVAL; } @@ -2584,14 +2588,14 @@ static int f2fs_keep_noreuse_range(struct inode *inode, static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct super_block *sb = inode->i_sb; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct fstrim_range range; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!f2fs_hw_support_discard(F2FS_SB(sb))) + if (!f2fs_hw_support_discard(sbi)) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, @@ -2602,9 +2606,9 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (ret) return ret; - range.minlen = max((unsigned int)range.minlen, - bdev_discard_granularity(sb->s_bdev)); - ret = f2fs_trim_fs(F2FS_SB(sb), &range); + range.minlen = max_t(unsigned int, range.minlen, + f2fs_hw_discard_granularity(sbi)); + ret = f2fs_trim_fs(sbi, &range); mnt_drop_write_file(filp); if (ret < 0) return ret; @@ -2612,7 +2616,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); return 0; } @@ -5284,6 +5288,8 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, struct inode *inode = file_inode(filp); int err; + trace_f2fs_fadvise(inode, offset, len, advice); + if (advice == POSIX_FADV_SEQUENTIAL) { if (S_ISFIFO(inode->i_mode)) return -ESPIPE; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a7708cf80c04..384fa7e2085b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -38,13 +38,14 @@ static int gc_thread_func(void *data) struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, .should_migrate_blocks = false, - .err_gc_skipped = false }; + .err_gc_skipped = false, + .one_time = false }; wait_ms = gc_th->min_sleep_time; set_freezable(); do { - bool sync_mode, foreground = false; + bool sync_mode, foreground = false, gc_boost = false; wait_event_freezable_timeout(*wq, kthread_should_stop() || @@ -52,8 +53,12 @@ static int gc_thread_func(void *data) gc_th->gc_wake, msecs_to_jiffies(wait_ms)); - if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) + if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) { foreground = true; + gc_control.one_time = false; + } else if (f2fs_sb_has_blkzoned(sbi)) { + gc_control.one_time = true; + } /* give it a try one time */ if (gc_th->gc_wake) @@ -81,8 +86,6 @@ static int gc_thread_func(void *data) continue; } - gc_control.one_time = false; - /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -132,7 +135,7 @@ static int gc_thread_func(void *data) if (need_to_boost_gc(sbi)) { decrease_sleep_time(gc_th, &wait_ms); if (f2fs_sb_has_blkzoned(sbi)) - gc_control.one_time = true; + gc_boost = true; } else { increase_sleep_time(gc_th, &wait_ms); } @@ -141,7 +144,7 @@ do_gc: FOREGROUND : BACKGROUND); sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || - (gc_control.one_time && gc_th->boost_gc_greedy); + (gc_boost && gc_th->boost_gc_greedy); /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground && !f2fs_sb_has_blkzoned(sbi)) @@ -771,7 +774,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct sit_info *sm = SIT_I(sbi); - struct victim_sel_policy p; + struct victim_sel_policy p = {0}; unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched; @@ -1208,7 +1211,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) struct address_space *mapping = f2fs_is_cow_file(inode) ? F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct dnode_of_data dn; - struct folio *folio; + struct folio *folio, *efolio; struct f2fs_io_info fio = { .sbi = sbi, .ino = inode->i_ino, @@ -1263,18 +1266,19 @@ got_it: f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); - fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), - dn.data_blkaddr, + efolio = f2fs_filemap_get_folio(META_MAPPING(sbi), dn.data_blkaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); - if (!fio.encrypted_page) { - err = -ENOMEM; + if (IS_ERR(efolio)) { + err = PTR_ERR(efolio); goto put_folio; } + fio.encrypted_page = &efolio->page; + err = f2fs_submit_page_bio(&fio); if (err) goto put_encrypted_page; - f2fs_put_page(fio.encrypted_page, 0); + f2fs_put_page(fio.encrypted_page, false); f2fs_folio_put(folio, true); f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); @@ -1282,7 +1286,7 @@ got_it: return 0; put_encrypted_page: - f2fs_put_page(fio.encrypted_page, 1); + f2fs_put_page(fio.encrypted_page, true); put_folio: f2fs_folio_put(folio, true); return err; @@ -1310,7 +1314,7 @@ static int move_data_block(struct inode *inode, block_t bidx, struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - struct folio *folio, *mfolio; + struct folio *folio, *mfolio, *efolio; block_t newaddr; int err = 0; bool lfs_mode = f2fs_lfs_mode(fio.sbi); @@ -1404,14 +1408,16 @@ static int move_data_block(struct inode *inode, block_t bidx, goto up_out; } - fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), - newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); - if (!fio.encrypted_page) { - err = -ENOMEM; + efolio = f2fs_filemap_get_folio(META_MAPPING(fio.sbi), newaddr, + FGP_LOCK | FGP_CREAT, GFP_NOFS); + if (IS_ERR(efolio)) { + err = PTR_ERR(efolio); f2fs_folio_put(mfolio, true); goto recover_block; } + fio.encrypted_page = &efolio->page; + /* write target block */ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); memcpy(page_address(fio.encrypted_page), @@ -1436,7 +1442,7 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); - f2fs_put_page(fio.encrypted_page, 1); + f2fs_put_page(fio.encrypted_page, true); recover_block: if (err) f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, @@ -1729,7 +1735,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; - int submitted = 0; + int submitted = 0, sum_blk_cnt; if (__is_large_section(sbi)) { sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); @@ -1763,22 +1769,28 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); + segno = rounddown(segno, SUMS_PER_BLOCK); + sum_blk_cnt = DIV_ROUND_UP(end_segno - segno, SUMS_PER_BLOCK); /* readahead multi ssa blocks those have contiguous address */ if (__is_large_section(sbi)) f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), - end_segno - segno, META_SSA, true); + sum_blk_cnt, META_SSA, true); /* reference all summary page */ while (segno < end_segno) { - struct folio *sum_folio = f2fs_get_sum_folio(sbi, segno++); + struct folio *sum_folio = f2fs_get_sum_folio(sbi, segno); + + segno += SUMS_PER_BLOCK; if (IS_ERR(sum_folio)) { int err = PTR_ERR(sum_folio); - end_segno = segno - 1; - for (segno = start_segno; segno < end_segno; segno++) { + end_segno = segno - SUMS_PER_BLOCK; + segno = rounddown(start_segno, SUMS_PER_BLOCK); + while (segno < end_segno) { sum_folio = filemap_get_folio(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); folio_put_refs(sum_folio, 2); + segno += SUMS_PER_BLOCK; } return err; } @@ -1787,68 +1799,83 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, blk_start_plug(&plug); - for (segno = start_segno; segno < end_segno; segno++) { - struct f2fs_summary_block *sum; + segno = start_segno; + while (segno < end_segno) { + unsigned int cur_segno; /* find segment summary of victim */ struct folio *sum_folio = filemap_get_folio(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); + unsigned int block_end_segno = rounddown(segno, SUMS_PER_BLOCK) + + SUMS_PER_BLOCK; + + if (block_end_segno > end_segno) + block_end_segno = end_segno; if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) { f2fs_err(sbi, "%s: segment %u is used by log", __func__, segno); f2fs_bug_on(sbi, 1); - goto skip; + goto next_block; } - if (get_valid_blocks(sbi, segno, false) == 0) - goto freed; - if (gc_type == BG_GC && __is_large_section(sbi) && - migrated >= sbi->migration_granularity) - goto skip; if (!folio_test_uptodate(sum_folio) || unlikely(f2fs_cp_error(sbi))) - goto skip; + goto next_block; - sum = folio_address(sum_folio); - if (type != GET_SUM_TYPE((&sum->footer))) { - f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SIT and SSA", - segno, type, GET_SUM_TYPE((&sum->footer))); - f2fs_stop_checkpoint(sbi, false, - STOP_CP_REASON_CORRUPTED_SUMMARY); - goto skip; - } + for (cur_segno = segno; cur_segno < block_end_segno; + cur_segno++) { + struct f2fs_summary_block *sum; - /* - * this is to avoid deadlock: - * - lock_page(sum_page) - f2fs_replace_block - * - check_valid_map() - down_write(sentry_lock) - * - down_read(sentry_lock) - change_curseg() - * - lock_page(sum_page) - */ - if (type == SUM_TYPE_NODE) - submitted += gc_node_segment(sbi, sum->entries, segno, - gc_type); - else - submitted += gc_data_segment(sbi, sum->entries, gc_list, - segno, gc_type, - force_migrate); + if (get_valid_blocks(sbi, cur_segno, false) == 0) + goto freed; + if (gc_type == BG_GC && __is_large_section(sbi) && + migrated >= sbi->migration_granularity) + continue; - stat_inc_gc_seg_count(sbi, data_type, gc_type); - sbi->gc_reclaimed_segs[sbi->gc_mode]++; - migrated++; + sum = SUM_BLK_PAGE_ADDR(sum_folio, cur_segno); + if (type != GET_SUM_TYPE((&sum->footer))) { + f2fs_err(sbi, "Inconsistent segment (%u) type " + "[%d, %d] in SSA and SIT", + cur_segno, type, + GET_SUM_TYPE((&sum->footer))); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_CORRUPTED_SUMMARY); + continue; + } -freed: - if (gc_type == FG_GC && - get_valid_blocks(sbi, segno, false) == 0) - seg_freed++; + /* + * this is to avoid deadlock: + * - lock_page(sum_page) - f2fs_replace_block + * - check_valid_map() - down_write(sentry_lock) + * - down_read(sentry_lock) - change_curseg() + * - lock_page(sum_page) + */ + if (type == SUM_TYPE_NODE) + submitted += gc_node_segment(sbi, sum->entries, + cur_segno, gc_type); + else + submitted += gc_data_segment(sbi, sum->entries, + gc_list, cur_segno, + gc_type, force_migrate); - if (__is_large_section(sbi)) - sbi->next_victim_seg[gc_type] = - (segno + 1 < sec_end_segno) ? - segno + 1 : NULL_SEGNO; -skip: + stat_inc_gc_seg_count(sbi, data_type, gc_type); + sbi->gc_reclaimed_segs[sbi->gc_mode]++; + migrated++; + +freed: + if (gc_type == FG_GC && + get_valid_blocks(sbi, cur_segno, false) == 0) + seg_freed++; + + if (__is_large_section(sbi)) + sbi->next_victim_seg[gc_type] = + (cur_segno + 1 < sec_end_segno) ? + cur_segno + 1 : NULL_SEGNO; + } +next_block: folio_put_refs(sum_folio, 2); + segno = block_end_segno; } if (submitted) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 24e8b1c27acc..6c4d4567571e 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -25,7 +25,7 @@ #define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */ #define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */ #define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */ -#define DEF_GC_THREAD_VALID_THRESH_RATIO 95 /* do not GC over 95% valid block ratio for one time GC */ +#define DEF_GC_THREAD_VALID_THRESH_RATIO 80 /* do not GC over 80% valid block ratio for one time GC */ #define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 58ac831ef704..e5c6a08b7e4f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -287,7 +287,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio) set_inode_flag(inode, FI_DATA_EXIST); folio_clear_f2fs_inline(ifolio); - f2fs_folio_put(ifolio, 1); + f2fs_folio_put(ifolio, true); return 0; } @@ -577,7 +577,7 @@ recover: f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); folio_mark_dirty(ifolio); - f2fs_folio_put(ifolio, 1); + f2fs_folio_put(ifolio, true); kfree(backup_dentry); return err; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f1cda1900658..38b8994bc1b2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -294,6 +294,12 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) return false; } + if (S_ISDIR(inode->i_mode) && unlikely(inode->i_nlink == 1)) { + f2fs_warn(sbi, "%s: directory inode (ino=%lx) has a single i_nlink", + __func__, inode->i_ino); + return false; + } + if (f2fs_has_extra_attr(inode)) { if (!f2fs_sb_has_extra_attr(sbi)) { f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index af40282a6948..043d20516a21 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -552,30 +552,31 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; - goto fail; + goto out; } err = f2fs_dquot_initialize(dir); if (err) - goto fail; + goto out; err = f2fs_dquot_initialize(inode); if (err) - goto fail; + goto out; de = f2fs_find_entry(dir, &dentry->d_name, &folio); if (!de) { if (IS_ERR(folio)) err = PTR_ERR(folio); - goto fail; + goto out; } if (unlikely(inode->i_nlink == 0)) { - f2fs_warn(F2FS_I_SB(inode), "%s: inode (ino=%lx) has zero i_nlink", + f2fs_warn(sbi, "%s: inode (ino=%lx) has zero i_nlink", __func__, inode->i_ino); - err = -EFSCORRUPTED; - set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); - f2fs_folio_put(folio, false); - goto fail; + goto corrupted; + } else if (S_ISDIR(inode->i_mode) && unlikely(inode->i_nlink == 1)) { + f2fs_warn(sbi, "%s: directory inode (ino=%lx) has a single i_nlink", + __func__, inode->i_ino); + goto corrupted; } f2fs_balance_fs(sbi, true); @@ -585,7 +586,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (err) { f2fs_unlock_op(sbi); f2fs_folio_put(folio, false); - goto fail; + goto out; } f2fs_delete_entry(de, folio, dir, inode); f2fs_unlock_op(sbi); @@ -601,7 +602,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); -fail: + + goto out; +corrupted: + err = -EFSCORRUPTED; + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_folio_put(folio, false); +out: trace_f2fs_unlink_exit(inode, err); return err; } @@ -1053,9 +1060,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (whiteout) { set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); - if (err) + if (err) { + d_invalidate(old_dentry); + d_invalidate(new_dentry); goto put_out_dir; - + } spin_lock(&whiteout->i_lock); inode_state_clear(whiteout, I_LINKABLE); spin_unlock(&whiteout->i_lock); @@ -1247,11 +1256,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; out_new_dir: if (new_dir_entry) { - f2fs_folio_put(new_dir_folio, 0); + f2fs_folio_put(new_dir_folio, false); } out_old_dir: if (old_dir_entry) { - f2fs_folio_put(old_dir_folio, 0); + f2fs_folio_put(old_dir_folio, false); } out_new: f2fs_folio_put(new_folio, false); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 215e442db72c..c3415ebb9f50 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -399,7 +399,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, - bool check_only) + bool check_only, bool *new_inode) { struct curseg_info *curseg; block_t blkaddr, blkaddr_fast; @@ -447,16 +447,19 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, quota_inode = true; } - /* - * CP | dnode(F) | inode(DF) - * For this case, we should not give up now. - */ entry = add_fsync_inode(sbi, head, ino_of_node(folio), quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); - if (err == -ENOENT) + /* + * CP | dnode(F) | inode(DF) + * For this case, we should not give up now. + */ + if (err == -ENOENT) { + if (check_only) + *new_inode = true; goto next; + } f2fs_folio_put(folio, true); break; } @@ -519,7 +522,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, sum_folio = f2fs_get_sum_folio(sbi, segno); if (IS_ERR(sum_folio)) return PTR_ERR(sum_folio); - sum_node = folio_address(sum_folio); + sum_node = SUM_BLK_PAGE_ADDR(sum_folio, segno); sum = sum_node->entries[blkoff]; f2fs_folio_put(sum_folio, true); got_it: @@ -869,12 +872,14 @@ next: int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list, tmp_inode_list; - struct list_head dir_list; + LIST_HEAD(inode_list); + LIST_HEAD(tmp_inode_list); + LIST_HEAD(dir_list); int err; int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; + bool new_inode = false; f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, " "check_only: %d", check_only); @@ -882,16 +887,12 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); - INIT_LIST_HEAD(&inode_list); - INIT_LIST_HEAD(&tmp_inode_list); - INIT_LIST_HEAD(&dir_list); - /* prevent checkpoint */ f2fs_down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ - err = find_fsync_dnodes(sbi, &inode_list, check_only); - if (err || list_empty(&inode_list)) + err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode); + if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode))) goto skip; if (check_only) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b45eace879d7..c26424f47686 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -234,7 +234,7 @@ retry: err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) { if (err == -ENOMEM) { - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + memalloc_retry_wait(GFP_NOFS); goto retry; } return err; @@ -750,7 +750,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) do { ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + f2fs_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); } while (ret && --count); if (ret) { @@ -1343,15 +1343,9 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->di.len += len; + err = 0; if (time_to_inject(sbi, FAULT_DISCARD)) { err = -EIO; - } else { - err = __blkdev_issue_discard(bdev, - SECTOR_FROM_BLOCK(start), - SECTOR_FROM_BLOCK(len), - GFP_NOFS, &bio); - } - if (err) { spin_lock_irqsave(&dc->lock, flags); if (dc->state == D_PARTIAL) dc->state = D_SUBMIT; @@ -1360,6 +1354,8 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, break; } + __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), + SECTOR_FROM_BLOCK(len), GFP_NOFS, &bio); f2fs_bug_on(sbi, !bio); /* @@ -2712,7 +2708,15 @@ struct folio *f2fs_get_sum_folio(struct f2fs_sb_info *sbi, unsigned int segno) void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { - struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); + struct folio *folio; + + if (SUMS_PER_BLOCK == 1) + folio = f2fs_grab_meta_folio(sbi, blk_addr); + else + folio = f2fs_get_meta_folio_retry(sbi, blk_addr); + + if (IS_ERR(folio)) + return; memcpy(folio_address(folio), src, PAGE_SIZE); folio_mark_dirty(folio); @@ -2720,9 +2724,21 @@ void f2fs_update_meta_page(struct f2fs_sb_info *sbi, } static void write_sum_page(struct f2fs_sb_info *sbi, - struct f2fs_summary_block *sum_blk, block_t blk_addr) + struct f2fs_summary_block *sum_blk, unsigned int segno) { - f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); + struct folio *folio; + + if (SUMS_PER_BLOCK == 1) + return f2fs_update_meta_page(sbi, (void *)sum_blk, + GET_SUM_BLOCK(sbi, segno)); + + folio = f2fs_get_sum_folio(sbi, segno); + if (IS_ERR(folio)) + return; + + memcpy(SUM_BLK_PAGE_ADDR(folio, segno), sum_blk, sizeof(*sum_blk)); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } static void write_current_sum_page(struct f2fs_sb_info *sbi, @@ -2987,7 +3003,7 @@ static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) int ret; if (curseg->inited) - write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); + write_sum_page(sbi, curseg->sum_blk, segno); segno = __get_next_segno(sbi, type); ret = get_new_segment(sbi, &segno, new_sec, pinning); @@ -3046,7 +3062,7 @@ static int change_curseg(struct f2fs_sb_info *sbi, int type) struct folio *sum_folio; if (curseg->inited) - write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); + write_sum_page(sbi, curseg->sum_blk, curseg->segno); __set_test_and_inuse(sbi, new_segno); @@ -3065,7 +3081,7 @@ static int change_curseg(struct f2fs_sb_info *sbi, int type) memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); return PTR_ERR(sum_folio); } - sum_node = folio_address(sum_folio); + sum_node = SUM_BLK_PAGE_ADDR(sum_folio, new_segno); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_folio_put(sum_folio, true); return 0; @@ -3154,8 +3170,7 @@ static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) goto out; if (get_valid_blocks(sbi, curseg->segno, false)) { - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + write_sum_page(sbi, curseg->sum_blk, curseg->segno); } else { mutex_lock(&DIRTY_I(sbi)->seglist_lock); __set_test_and_free(sbi, curseg->segno, true); @@ -3452,7 +3467,7 @@ next: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + f2fs_schedule_timeout(DEFAULT_DISCARD_INTERVAL); goto next; } skip: @@ -3833,8 +3848,7 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, if (segment_full) { if (type == CURSEG_COLD_DATA_PINNED && !((curseg->segno + 1) % sbi->segs_per_sec)) { - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + write_sum_page(sbi, curseg->sum_blk, curseg->segno); reset_curseg_fields(curseg); goto skip_new_segment; } @@ -3863,8 +3877,13 @@ skip_new_segment: locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); - if (IS_DATASEG(curseg->seg_type)) - atomic64_inc(&sbi->allocated_data_blocks); + if (IS_DATASEG(curseg->seg_type)) { + unsigned long long new_val; + + new_val = atomic64_inc_return(&sbi->allocated_data_blocks); + if (unlikely(new_val == ULLONG_MAX)) + atomic64_set(&sbi->allocated_data_blocks, 0); + } up_write(&sit_i->sentry_lock); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 1ce2c8abaf48..07dcbcbeb7c6 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -69,11 +69,16 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) +#ifdef CONFIG_BLK_DEV_ZONED #define CAP_BLKS_PER_SEC(sbi) \ (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec) #define CAP_SEGS_PER_SEC(sbi) \ (SEGS_PER_SEC(sbi) - \ BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec)) +#else +#define CAP_BLKS_PER_SEC(sbi) BLKS_PER_SEC(sbi) +#define CAP_SEGS_PER_SEC(sbi) SEGS_PER_SEC(sbi) +#endif #define GET_START_SEG_FROM_SEC(sbi, segno) \ (rounddown(segno, SEGS_PER_SEC(sbi))) #define GET_SEC_FROM_SEG(sbi, segno) \ @@ -85,8 +90,12 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define GET_ZONE_FROM_SEG(sbi, segno) \ GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) -#define GET_SUM_BLOCK(sbi, segno) \ - ((sbi)->sm_info->ssa_blkaddr + (segno)) +#define SUMS_PER_BLOCK (F2FS_BLKSIZE / F2FS_SUM_BLKSIZE) +#define GET_SUM_BLOCK(sbi, segno) \ + (SM_I(sbi)->ssa_blkaddr + (segno / SUMS_PER_BLOCK)) +#define GET_SUM_BLKOFF(segno) (segno % SUMS_PER_BLOCK) +#define SUM_BLK_PAGE_ADDR(folio, segno) \ + (folio_address(folio) + GET_SUM_BLKOFF(segno) * F2FS_SUM_BLKSIZE) #define GET_SUM_TYPE(footer) ((footer)->entry_type) #define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type)) @@ -603,10 +612,12 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) static inline unsigned int get_left_section_blocks(struct f2fs_sb_info *sbi, enum log_type type, unsigned int segno) { - if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) - return CAP_BLKS_PER_SEC(sbi) - SEGS_TO_BLKS(sbi, - (segno - GET_START_SEG_FROM_SEC(sbi, segno))) - + if (f2fs_lfs_mode(sbi)) { + unsigned int used_blocks = __is_large_section(sbi) ? SEGS_TO_BLKS(sbi, + (segno - GET_START_SEG_FROM_SEC(sbi, segno))) : 0; + return CAP_BLKS_PER_SEC(sbi) - used_blocks - CURSEG_I(sbi, type)->next_blkoff; + } return CAP_BLKS_PER_SEC(sbi) - get_ckpt_valid_blocks(sbi, segno, true); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 47489d48f2b9..c4c225e09dc4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -352,7 +352,7 @@ static match_table_t f2fs_checkpoint_tokens = { struct f2fs_fs_context { struct f2fs_mount_info info; - unsigned int opt_mask; /* Bits changed */ + unsigned long long opt_mask; /* Bits changed */ unsigned int spec_mask; unsigned short qname_mask; }; @@ -360,23 +360,23 @@ struct f2fs_fs_context { #define F2FS_CTX_INFO(ctx) ((ctx)->info) static inline void ctx_set_opt(struct f2fs_fs_context *ctx, - unsigned int flag) + enum f2fs_mount_opt flag) { - ctx->info.opt |= flag; - ctx->opt_mask |= flag; + ctx->info.opt |= BIT(flag); + ctx->opt_mask |= BIT(flag); } static inline void ctx_clear_opt(struct f2fs_fs_context *ctx, - unsigned int flag) + enum f2fs_mount_opt flag) { - ctx->info.opt &= ~flag; - ctx->opt_mask |= flag; + ctx->info.opt &= ~BIT(flag); + ctx->opt_mask |= BIT(flag); } static inline bool ctx_test_opt(struct f2fs_fs_context *ctx, - unsigned int flag) + enum f2fs_mount_opt flag) { - return ctx->info.opt & flag; + return ctx->info.opt & BIT(flag); } void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, @@ -1371,7 +1371,7 @@ static int f2fs_check_compression(struct fs_context *fc, ctx_test_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE)) f2fs_info(sbi, "Image doesn't support compression"); clear_compression_spec(ctx); - ctx->opt_mask &= ~F2FS_MOUNT_COMPRESS_CACHE; + ctx->opt_mask &= ~BIT(F2FS_MOUNT_COMPRESS_CACHE); return 0; } if (ctx->spec_mask & F2FS_SPEC_compress_extension) { @@ -1439,42 +1439,42 @@ static int f2fs_check_opt_consistency(struct fs_context *fc, return -EINVAL; if (f2fs_hw_should_discard(sbi) && - (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + (ctx->opt_mask & BIT(F2FS_MOUNT_DISCARD)) && !ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } if (!f2fs_hw_support_discard(sbi) && - (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + (ctx->opt_mask & BIT(F2FS_MOUNT_DISCARD)) && ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { f2fs_warn(sbi, "device does not support discard"); ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); - ctx->opt_mask &= ~F2FS_MOUNT_DISCARD; + ctx->opt_mask &= ~BIT(F2FS_MOUNT_DISCARD); } if (f2fs_sb_has_device_alias(sbi) && - (ctx->opt_mask & F2FS_MOUNT_READ_EXTENT_CACHE) && + (ctx->opt_mask & BIT(F2FS_MOUNT_READ_EXTENT_CACHE)) && !ctx_test_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE)) { f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } if (test_opt(sbi, RESERVE_ROOT) && - (ctx->opt_mask & F2FS_MOUNT_RESERVE_ROOT) && + (ctx->opt_mask & BIT(F2FS_MOUNT_RESERVE_ROOT)) && ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_ROOT)) { f2fs_info(sbi, "Preserve previous reserve_root=%u", F2FS_OPTION(sbi).root_reserved_blocks); ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); - ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT; + ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_ROOT); } if (test_opt(sbi, RESERVE_NODE) && - (ctx->opt_mask & F2FS_MOUNT_RESERVE_NODE) && + (ctx->opt_mask & BIT(F2FS_MOUNT_RESERVE_NODE)) && ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_NODE)) { f2fs_info(sbi, "Preserve previous reserve_node=%u", F2FS_OPTION(sbi).root_reserved_nodes); ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE); - ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_NODE; + ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_NODE); } err = f2fs_check_test_dummy_encryption(fc, sb); @@ -1759,6 +1759,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); atomic_set(&fi->open_count, 0); + atomic_set(&fi->writeback, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); @@ -1988,14 +1989,6 @@ static void f2fs_put_super(struct super_block *sb) truncate_inode_pages_final(META_MAPPING(sbi)); } - for (i = 0; i < NR_COUNT_TYPE; i++) { - if (!get_pages(sbi, i)) - continue; - f2fs_err(sbi, "detect filesystem reference count leak during " - "umount, type: %d, count: %lld", i, get_pages(sbi, i)); - f2fs_bug_on(sbi, 1); - } - f2fs_bug_on(sbi, sbi->fsync_node_num); f2fs_destroy_compress_inode(sbi); @@ -2006,6 +1999,15 @@ static void f2fs_put_super(struct super_block *sb) iput(sbi->meta_inode); sbi->meta_inode = NULL; + /* Should check the page counts after dropping all node/meta pages */ + for (i = 0; i < NR_COUNT_TYPE; i++) { + if (!get_pages(sbi, i)) + continue; + f2fs_err(sbi, "detect filesystem reference count leak during " + "umount, type: %d, count: %lld", i, get_pages(sbi, i)); + f2fs_bug_on(sbi, 1); + } + /* * iput() can update stat information, if f2fs_write_checkpoint() * above failed with error. @@ -2026,7 +2028,6 @@ static void f2fs_put_super(struct super_block *sb) kfree(sbi->raw_super); f2fs_destroy_page_array_cache(sbi); - f2fs_destroy_xattr_caches(sbi); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); @@ -2632,12 +2633,14 @@ restore_flag: return err; } -static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) +static int f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) { unsigned int nr_pages = get_pages(sbi, F2FS_DIRTY_DATA) / 16; - long long start, writeback, end; + long long start, writeback, lock, sync_inode, end; + int ret; - f2fs_info(sbi, "f2fs_enable_checkpoint() starts, meta: %lld, node: %lld, data: %lld", + f2fs_info(sbi, "%s start, meta: %lld, node: %lld, data: %lld", + __func__, get_pages(sbi, F2FS_DIRTY_META), get_pages(sbi, F2FS_DIRTY_NODES), get_pages(sbi, F2FS_DIRTY_DATA)); @@ -2649,18 +2652,25 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) /* we should flush all the data to keep data consistency */ while (get_pages(sbi, F2FS_DIRTY_DATA)) { writeback_inodes_sb_nr(sbi->sb, nr_pages, WB_REASON_SYNC); - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + f2fs_io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); if (f2fs_time_over(sbi, ENABLE_TIME)) break; } writeback = ktime_get(); - sync_inodes_sb(sbi->sb); + f2fs_down_write(&sbi->cp_enable_rwsem); + + lock = ktime_get(); + + if (get_pages(sbi, F2FS_DIRTY_DATA)) + sync_inodes_sb(sbi->sb); if (unlikely(get_pages(sbi, F2FS_DIRTY_DATA))) - f2fs_warn(sbi, "checkpoint=enable has some unwritten data: %lld", - get_pages(sbi, F2FS_DIRTY_DATA)); + f2fs_warn(sbi, "%s: has some unwritten data: %lld", + __func__, get_pages(sbi, F2FS_DIRTY_DATA)); + + sync_inode = ktime_get(); f2fs_down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); @@ -2669,16 +2679,32 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) set_sbi_flag(sbi, SBI_IS_DIRTY); f2fs_up_write(&sbi->gc_lock); - f2fs_sync_fs(sbi->sb, 1); + f2fs_info(sbi, "%s sync_fs, meta: %lld, imeta: %lld, node: %lld, dents: %lld, qdata: %lld", + __func__, + get_pages(sbi, F2FS_DIRTY_META), + get_pages(sbi, F2FS_DIRTY_IMETA), + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DENTS), + get_pages(sbi, F2FS_DIRTY_QDATA)); + ret = f2fs_sync_fs(sbi->sb, 1); + if (ret) + f2fs_err(sbi, "%s sync_fs failed, ret: %d", __func__, ret); /* Let's ensure there's no pending checkpoint anymore */ f2fs_flush_ckpt_thread(sbi); + f2fs_up_write(&sbi->cp_enable_rwsem); + end = ktime_get(); - f2fs_info(sbi, "f2fs_enable_checkpoint() finishes, writeback:%llu, sync:%llu", - ktime_ms_delta(writeback, start), - ktime_ms_delta(end, writeback)); + f2fs_info(sbi, "%s end, writeback:%llu, " + "lock:%llu, sync_inode:%llu, sync_fs:%llu", + __func__, + ktime_ms_delta(writeback, start), + ktime_ms_delta(lock, writeback), + ktime_ms_delta(sync_inode, lock), + ktime_ms_delta(end, sync_inode)); + return ret; } static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) @@ -2892,7 +2918,9 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) goto restore_discard; need_enable_checkpoint = true; } else { - f2fs_enable_checkpoint(sbi); + err = f2fs_enable_checkpoint(sbi); + if (err) + goto restore_discard; need_disable_checkpoint = true; } } @@ -2935,7 +2963,8 @@ skip: return 0; restore_checkpoint: if (need_enable_checkpoint) { - f2fs_enable_checkpoint(sbi); + if (f2fs_enable_checkpoint(sbi)) + f2fs_warn(sbi, "checkpoint has not been enabled"); } else if (need_disable_checkpoint) { if (f2fs_disable_checkpoint(sbi)) f2fs_warn(sbi, "checkpoint has not been disabled"); @@ -3110,7 +3139,7 @@ retry: &folio, &fsdata); if (unlikely(err)) { if (err == -ENOMEM) { - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + memalloc_retry_wait(GFP_NOFS); goto retry; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); @@ -4051,6 +4080,20 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (sanity_check_area_boundary(sbi, folio, index)) return -EFSCORRUPTED; + /* + * Check for legacy summary layout on 16KB+ block devices. + * Modern f2fs-tools packs multiple 4KB summary areas into one block, + * whereas legacy versions used one block per summary, leading + * to a much larger SSA. + */ + if (SUMS_PER_BLOCK > 1 && + !(__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_PACKED_SSA))) { + f2fs_info(sbi, "Error: Device formatted with a legacy version. " + "Please reformat with a tool supporting the packed ssa " + "feature for block sizes larger than 4kb."); + return -EOPNOTSUPP; + } + return 0; } @@ -4544,50 +4587,9 @@ void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) spin_unlock_irqrestore(&sbi->error_lock, flags); } -static bool f2fs_update_errors(struct f2fs_sb_info *sbi) -{ - unsigned long flags; - bool need_update = false; - - spin_lock_irqsave(&sbi->error_lock, flags); - if (sbi->error_dirty) { - memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors, - MAX_F2FS_ERRORS); - sbi->error_dirty = false; - need_update = true; - } - spin_unlock_irqrestore(&sbi->error_lock, flags); - - return need_update; -} - -static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error) -{ - int err; - - f2fs_down_write(&sbi->sb_lock); - - if (!f2fs_update_errors(sbi)) - goto out_unlock; - - err = f2fs_commit_super(sbi, false); - if (err) - f2fs_err_ratelimited(sbi, - "f2fs_commit_super fails to record errors:%u, err:%d", - error, err); -out_unlock: - f2fs_up_write(&sbi->sb_lock); -} - void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) { f2fs_save_errors(sbi, error); - f2fs_record_errors(sbi, error); -} - -void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error) -{ - f2fs_save_errors(sbi, error); if (!sbi->error_dirty) return; @@ -4904,6 +4906,7 @@ try_onemore: init_f2fs_rwsem(&sbi->node_change); spin_lock_init(&sbi->stat_lock); init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->cp_enable_rwsem); init_f2fs_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); spin_lock_init(&sbi->error_lock); @@ -5015,13 +5018,9 @@ try_onemore: if (err) goto free_iostat; - /* init per sbi slab cache */ - err = f2fs_init_xattr_caches(sbi); - if (err) - goto free_percpu; err = f2fs_init_page_array_cache(sbi); if (err) - goto free_xattr_cache; + goto free_percpu; /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); @@ -5226,11 +5225,15 @@ try_onemore: } } else { err = f2fs_recover_fsync_data(sbi, true); - - if (!f2fs_readonly(sb) && err > 0) { - err = -EINVAL; - f2fs_err(sbi, "Need to recover fsync data"); - goto free_meta; + if (err > 0) { + if (!f2fs_readonly(sb)) { + f2fs_err(sbi, "Need to recover fsync data"); + err = -EINVAL; + goto free_meta; + } else { + f2fs_info(sbi, "drop all fsynced data"); + err = 0; + } } } @@ -5257,13 +5260,12 @@ reset_checkpoint: if (err) goto sync_free_meta; - if (test_opt(sbi, DISABLE_CHECKPOINT)) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) err = f2fs_disable_checkpoint(sbi); - if (err) - goto sync_free_meta; - } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { - f2fs_enable_checkpoint(sbi); - } + else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) + err = f2fs_enable_checkpoint(sbi); + if (err) + goto sync_free_meta; /* * If filesystem is not mounted as read-only then @@ -5350,8 +5352,6 @@ free_meta_inode: sbi->meta_inode = NULL; free_page_array_cache: f2fs_destroy_page_array_cache(sbi); -free_xattr_cache: - f2fs_destroy_xattr_caches(sbi); free_percpu: destroy_percpu_info(sbi); free_iostat: @@ -5554,10 +5554,15 @@ static int __init init_f2fs_fs(void) err = f2fs_create_casefold_cache(); if (err) goto free_compress_cache; - err = register_filesystem(&f2fs_fs_type); + err = f2fs_init_xattr_cache(); if (err) goto free_casefold_cache; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_xattr_cache; return 0; +free_xattr_cache: + f2fs_destroy_xattr_cache(); free_casefold_cache: f2fs_destroy_casefold_cache(); free_compress_cache: @@ -5598,6 +5603,7 @@ fail: static void __exit exit_f2fs_fs(void) { unregister_filesystem(&f2fs_fs_type); + f2fs_destroy_xattr_cache(); f2fs_destroy_casefold_cache(); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 6d2a4fba68a2..c42f4f979d13 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -235,6 +235,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_compression(sbi)) len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "compression"); + if (f2fs_sb_has_packed_ssa(sbi)) + len += sysfs_emit_at(buf, len, "%s%s", + len ? ", " : "", "packed_ssa"); len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "pin_file"); len += sysfs_emit_at(buf, len, "\n"); @@ -1210,6 +1213,7 @@ F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); F2FS_SBI_GENERAL_RW_ATTR(max_read_extent_count); #ifdef CONFIG_BLK_DEV_ZONED F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); +F2FS_SBI_GENERAL_RO_ATTR(max_open_zones); F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif F2FS_SBI_GENERAL_RW_ATTR(carve_out); @@ -1296,6 +1300,7 @@ F2FS_FEATURE_RO_ATTR(pin_file); #ifdef CONFIG_UNICODE F2FS_FEATURE_RO_ATTR(linear_lookup); #endif +F2FS_FEATURE_RO_ATTR(packed_ssa); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -1384,6 +1389,7 @@ static struct attribute *f2fs_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), + ATTR_LIST(max_open_zones), ATTR_LIST(blkzone_alloc_policy), #endif #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -1455,6 +1461,7 @@ static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_UNICODE BASE_ATTR_LIST(linear_lookup), #endif + BASE_ATTR_LIST(packed_ssa), NULL, }; ATTRIBUTE_GROUPS(f2fs_feat); @@ -1490,6 +1497,7 @@ F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); F2FS_SB_FEATURE_RO_ATTR(readonly, RO); F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); +F2FS_SB_FEATURE_RO_ATTR(packed_ssa, PACKED_SSA); static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_encryption), @@ -1507,6 +1515,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_compression), ATTR_LIST(sb_readonly), ATTR_LIST(sb_device_alias), + ATTR_LIST(sb_packed_ssa), NULL, }; ATTRIBUTE_GROUPS(f2fs_sb_feat); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index f0ab9a3c7a82..05b935b55216 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -263,7 +263,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); + folio = f2fs_filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); if (IS_ERR(folio) || !folio_test_uptodate(folio)) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 58632a2b6613..b4e5c406632f 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -23,11 +23,12 @@ #include "xattr.h" #include "segment.h" +static struct kmem_cache *inline_xattr_slab; static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline) { - if (likely(size == sbi->inline_xattr_slab_size)) { + if (likely(size == DEFAULT_XATTR_SLAB_SIZE)) { *is_inline = true; - return f2fs_kmem_cache_alloc(sbi->inline_xattr_slab, + return f2fs_kmem_cache_alloc(inline_xattr_slab, GFP_F2FS_ZERO, false, sbi); } *is_inline = false; @@ -38,7 +39,7 @@ static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr, bool is_inline) { if (is_inline) - kmem_cache_free(sbi->inline_xattr_slab, xattr_addr); + kmem_cache_free(inline_xattr_slab, xattr_addr); else kfree(xattr_addr); } @@ -830,25 +831,14 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, return err; } -int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) +int __init f2fs_init_xattr_cache(void) { - dev_t dev = sbi->sb->s_bdev->bd_dev; - char slab_name[32]; - - sprintf(slab_name, "f2fs_xattr_entry-%u:%u", MAJOR(dev), MINOR(dev)); - - sbi->inline_xattr_slab_size = F2FS_OPTION(sbi).inline_xattr_size * - sizeof(__le32) + XATTR_PADDING_SIZE; - - sbi->inline_xattr_slab = f2fs_kmem_cache_create(slab_name, - sbi->inline_xattr_slab_size); - if (!sbi->inline_xattr_slab) - return -ENOMEM; - - return 0; + inline_xattr_slab = f2fs_kmem_cache_create("f2fs_xattr_entry", + DEFAULT_XATTR_SLAB_SIZE); + return inline_xattr_slab ? 0 : -ENOMEM; } -void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) +void f2fs_destroy_xattr_cache(void) { - kmem_cache_destroy(sbi->inline_xattr_slab); -} + kmem_cache_destroy(inline_xattr_slab); +}
\ No newline at end of file diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 4fc0b2305fbd..bce3d93e4755 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -89,6 +89,8 @@ struct f2fs_xattr_entry { F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \ DEF_INLINE_RESERVED_SIZE - \ MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) +#define DEFAULT_XATTR_SLAB_SIZE (DEFAULT_INLINE_XATTR_ADDRS * \ + sizeof(__le32) + XATTR_PADDING_SIZE) /* * On-disk structure of f2fs_xattr @@ -132,8 +134,8 @@ int f2fs_setxattr(struct inode *, int, const char *, const void *, int f2fs_getxattr(struct inode *, int, const char *, void *, size_t, struct folio *); ssize_t f2fs_listxattr(struct dentry *, char *, size_t); -int f2fs_init_xattr_caches(struct f2fs_sb_info *); -void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); +int __init f2fs_init_xattr_cache(void); +void f2fs_destroy_xattr_cache(void); #else #define f2fs_xattr_handlers NULL @@ -150,8 +152,8 @@ static inline int f2fs_getxattr(struct inode *inode, int index, { return -EOPNOTSUPP; } -static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } -static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } +static inline int __init f2fs_init_xattr_cache(void) { return 0; } +static inline void f2fs_destroy_xattr_cache(void) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY diff --git a/fs/fuse/control.c b/fs/fuse/control.c index bb407705603c..140bd5730d99 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -205,8 +205,7 @@ static const struct file_operations fuse_conn_congestion_threshold_ops = { static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, struct fuse_conn *fc, - const char *name, - int mode, int nlink, + const char *name, int mode, const struct inode_operations *iop, const struct file_operations *fop) { @@ -232,10 +231,19 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, if (iop) inode->i_op = iop; inode->i_fop = fop; - set_nlink(inode, nlink); + if (S_ISDIR(mode)) { + inc_nlink(d_inode(parent)); + inc_nlink(inode); + } inode->i_private = fc; - d_add(dentry, inode); - + d_make_persistent(dentry, inode); + dput(dentry); + + /* + * We are returning a borrowed reference here - it's only good while + * fuse_mutex is held. Actually it's d_make_persistent() return + * value... + */ return dentry; } @@ -252,22 +260,21 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) return 0; parent = fuse_control_sb->s_root; - inc_nlink(d_inode(parent)); sprintf(name, "%u", fc->dev); - parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, + parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, &simple_dir_inode_operations, &simple_dir_operations); if (!parent) goto err; - if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, + if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, NULL, &fuse_ctl_waiting_ops) || - !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, + !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, NULL, &fuse_ctl_abort_ops) || !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, - 1, NULL, &fuse_conn_max_background_ops) || + NULL, &fuse_conn_max_background_ops) || !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", - S_IFREG | 0600, 1, NULL, + S_IFREG | 0600, NULL, &fuse_conn_congestion_threshold_ops)) goto err; @@ -289,18 +296,13 @@ static void remove_one(struct dentry *dentry) */ void fuse_ctl_remove_conn(struct fuse_conn *fc) { - struct dentry *dentry; char name[32]; if (!fuse_control_sb || fc->no_control) return; sprintf(name, "%u", fc->dev); - dentry = lookup_noperm_positive_unlocked(&QSTR(name), fuse_control_sb->s_root); - if (!IS_ERR(dentry)) { - simple_recursive_removal(dentry, remove_one); - dput(dentry); // paired with lookup_noperm_positive_unlocked() - } + simple_remove_by_name(fuse_control_sb->s_root, name, remove_one); } static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc) @@ -350,7 +352,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) fuse_control_sb = NULL; mutex_unlock(&fuse_mutex); - kill_litter_super(sb); + kill_anon_super(sb); } static struct file_system_type fuse_ctl_fs_type = { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 132f38619d70..6d59cbc877c6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -846,7 +846,7 @@ void fuse_copy_init(struct fuse_copy_state *cs, bool write, } /* Unmap and put previous page of userspace buffer */ -static void fuse_copy_finish(struct fuse_copy_state *cs) +void fuse_copy_finish(struct fuse_copy_state *cs) { if (cs->currbuf) { struct pipe_buffer *buf = cs->currbuf; @@ -2041,13 +2041,14 @@ static int fuse_notify_resend(struct fuse_conn *fc) /* * Increments the fuse connection epoch. This will result of dentries from - * previous epochs to be invalidated. - * - * XXX optimization: add call to shrink_dcache_sb()? + * previous epochs to be invalidated. Additionally, if inval_wq is set, a work + * queue is scheduled to trigger the invalidation. */ static int fuse_notify_inc_epoch(struct fuse_conn *fc) { atomic_inc(&fc->epoch); + if (inval_wq) + schedule_work(&fc->epoch_work); return 0; } diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index f8c93dc45768..5ceb217ced1b 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -86,6 +86,7 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, lockdep_assert_not_held(&queue->lock); spin_lock(&queue->lock); ent->fuse_req = NULL; + list_del_init(&req->list); if (test_bit(FR_BACKGROUND, &req->flags)) { queue->active_background--; spin_lock(&fc->bg_lock); @@ -598,12 +599,14 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring, cs.is_uring = true; cs.req = req; - return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz); + err = fuse_copy_out_args(&cs, args, ring_in_out.payload_sz); + fuse_copy_finish(&cs); + return err; } - /* - * Copy data from the req to the ring buffer - */ +/* + * Copy data from the req to the ring buffer + */ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, struct fuse_ring_ent *ent) { @@ -649,6 +652,7 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, /* copy the payload */ err = fuse_copy_args(&cs, num_args, args->in_pages, (struct fuse_arg *)in_args, 0); + fuse_copy_finish(&cs); if (err) { pr_info_ratelimited("%s fuse_copy_args failed\n", __func__); return err; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 87a63ae93a45..4b6b3d2758ff 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -27,6 +27,67 @@ module_param(allow_sys_admin_access, bool, 0644); MODULE_PARM_DESC(allow_sys_admin_access, "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check"); +struct dentry_bucket { + struct rb_root tree; + spinlock_t lock; +}; + +#define HASH_BITS 5 +#define HASH_SIZE (1 << HASH_BITS) +static struct dentry_bucket dentry_hash[HASH_SIZE]; +struct delayed_work dentry_tree_work; + +/* Minimum invalidation work queue frequency */ +#define FUSE_DENTRY_INVAL_FREQ_MIN 5 + +unsigned __read_mostly inval_wq; +static int inval_wq_set(const char *val, const struct kernel_param *kp) +{ + unsigned int num; + unsigned int old = inval_wq; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtouint(val, 0, &num); + if (ret) + return ret; + + if ((num < FUSE_DENTRY_INVAL_FREQ_MIN) && (num != 0)) + return -EINVAL; + + /* This should prevent overflow in secs_to_jiffies() */ + if (num > USHRT_MAX) + return -EINVAL; + + *((unsigned int *)kp->arg) = num; + + if (num && !old) + schedule_delayed_work(&dentry_tree_work, + secs_to_jiffies(num)); + else if (!num && old) + cancel_delayed_work_sync(&dentry_tree_work); + + return 0; +} +static const struct kernel_param_ops inval_wq_ops = { + .set = inval_wq_set, + .get = param_get_uint, +}; +module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644); +__MODULE_PARM_TYPE(inval_wq, "uint"); +MODULE_PARM_DESC(inval_wq, + "Dentries invalidation work queue period in secs (>= " + __stringify(FUSE_DENTRY_INVAL_FREQ_MIN) ")."); + +static inline struct dentry_bucket *get_dentry_bucket(struct dentry *dentry) +{ + int i = hash_ptr(dentry, HASH_BITS); + + return &dentry_hash[i]; +} + static void fuse_advise_use_readdirplus(struct inode *dir) { struct fuse_inode *fi = get_fuse_inode(dir); @@ -34,33 +95,151 @@ static void fuse_advise_use_readdirplus(struct inode *dir) set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); } -#if BITS_PER_LONG >= 64 -static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) +struct fuse_dentry { + u64 time; + union { + struct rcu_head rcu; + struct rb_node node; + }; + struct dentry *dentry; +}; + +static void __fuse_dentry_tree_del_node(struct fuse_dentry *fd, + struct dentry_bucket *bucket) { - entry->d_fsdata = (void *) time; + if (!RB_EMPTY_NODE(&fd->node)) { + rb_erase(&fd->node, &bucket->tree); + RB_CLEAR_NODE(&fd->node); + } } -static inline u64 fuse_dentry_time(const struct dentry *entry) +static void fuse_dentry_tree_del_node(struct dentry *dentry) { - return (u64)entry->d_fsdata; + struct fuse_dentry *fd = dentry->d_fsdata; + struct dentry_bucket *bucket = get_dentry_bucket(dentry); + + spin_lock(&bucket->lock); + __fuse_dentry_tree_del_node(fd, bucket); + spin_unlock(&bucket->lock); } -#else -union fuse_dentry { - u64 time; - struct rcu_head rcu; -}; +static void fuse_dentry_tree_add_node(struct dentry *dentry) +{ + struct fuse_dentry *fd = dentry->d_fsdata; + struct dentry_bucket *bucket; + struct fuse_dentry *cur; + struct rb_node **p, *parent = NULL; + + if (!inval_wq) + return; + + bucket = get_dentry_bucket(dentry); + + spin_lock(&bucket->lock); + + __fuse_dentry_tree_del_node(fd, bucket); + + p = &bucket->tree.rb_node; + while (*p) { + parent = *p; + cur = rb_entry(*p, struct fuse_dentry, node); + if (fd->time < cur->time) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&fd->node, parent, p); + rb_insert_color(&fd->node, &bucket->tree); + spin_unlock(&bucket->lock); +} + +/* + * work queue which, when enabled, will periodically check for expired dentries + * in the dentries tree. + */ +static void fuse_dentry_tree_work(struct work_struct *work) +{ + LIST_HEAD(dispose); + struct fuse_dentry *fd; + struct rb_node *node; + int i; + + for (i = 0; i < HASH_SIZE; i++) { + spin_lock(&dentry_hash[i].lock); + node = rb_first(&dentry_hash[i].tree); + while (node) { + fd = rb_entry(node, struct fuse_dentry, node); + if (time_after64(get_jiffies_64(), fd->time)) { + rb_erase(&fd->node, &dentry_hash[i].tree); + RB_CLEAR_NODE(&fd->node); + spin_unlock(&dentry_hash[i].lock); + d_dispose_if_unused(fd->dentry, &dispose); + cond_resched(); + spin_lock(&dentry_hash[i].lock); + } else + break; + node = rb_first(&dentry_hash[i].tree); + } + spin_unlock(&dentry_hash[i].lock); + shrink_dentry_list(&dispose); + } + + if (inval_wq) + schedule_delayed_work(&dentry_tree_work, + secs_to_jiffies(inval_wq)); +} + +void fuse_epoch_work(struct work_struct *work) +{ + struct fuse_conn *fc = container_of(work, struct fuse_conn, + epoch_work); + struct fuse_mount *fm; + struct inode *inode; + + down_read(&fc->killsb); + + inode = fuse_ilookup(fc, FUSE_ROOT_ID, &fm); + if (inode) { + iput(inode); + /* Remove all possible active references to cached inodes */ + shrink_dcache_sb(fm->sb); + } else + pr_warn("Failed to get root inode"); + + up_read(&fc->killsb); +} + +void fuse_dentry_tree_init(void) +{ + int i; + + for (i = 0; i < HASH_SIZE; i++) { + spin_lock_init(&dentry_hash[i].lock); + dentry_hash[i].tree = RB_ROOT; + } + INIT_DELAYED_WORK(&dentry_tree_work, fuse_dentry_tree_work); +} + +void fuse_dentry_tree_cleanup(void) +{ + int i; + + inval_wq = 0; + cancel_delayed_work_sync(&dentry_tree_work); + + for (i = 0; i < HASH_SIZE; i++) + WARN_ON_ONCE(!RB_EMPTY_ROOT(&dentry_hash[i].tree)); +} static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) { - ((union fuse_dentry *) dentry->d_fsdata)->time = time; + ((struct fuse_dentry *) dentry->d_fsdata)->time = time; } static inline u64 fuse_dentry_time(const struct dentry *entry) { - return ((union fuse_dentry *) entry->d_fsdata)->time; + return ((struct fuse_dentry *) entry->d_fsdata)->time; } -#endif static void fuse_dentry_settime(struct dentry *dentry, u64 time) { @@ -81,6 +260,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time) } __fuse_dentry_settime(dentry, time); + fuse_dentry_tree_add_node(dentry); } /* @@ -283,21 +463,36 @@ invalid: goto out; } -#if BITS_PER_LONG < 64 static int fuse_dentry_init(struct dentry *dentry) { - dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), - GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); + struct fuse_dentry *fd; - return dentry->d_fsdata ? 0 : -ENOMEM; + fd = kzalloc(sizeof(struct fuse_dentry), + GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); + if (!fd) + return -ENOMEM; + + fd->dentry = dentry; + RB_CLEAR_NODE(&fd->node); + dentry->d_fsdata = fd; + + return 0; } + +static void fuse_dentry_prune(struct dentry *dentry) +{ + struct fuse_dentry *fd = dentry->d_fsdata; + + if (!RB_EMPTY_NODE(&fd->node)) + fuse_dentry_tree_del_node(dentry); +} + static void fuse_dentry_release(struct dentry *dentry) { - union fuse_dentry *fd = dentry->d_fsdata; + struct fuse_dentry *fd = dentry->d_fsdata; kfree_rcu(fd, rcu); } -#endif static int fuse_dentry_delete(const struct dentry *dentry) { @@ -331,10 +526,9 @@ static struct vfsmount *fuse_dentry_automount(struct path *path) const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_delete = fuse_dentry_delete, -#if BITS_PER_LONG < 64 .d_init = fuse_dentry_init, + .d_prune = fuse_dentry_prune, .d_release = fuse_dentry_release, -#endif .d_automount = fuse_dentry_automount, }; @@ -471,7 +665,7 @@ static int get_security_context(struct dentry *entry, umode_t mode, u32 total_len = sizeof(*header); int err, nr_ctx = 0; const char *name = NULL; - size_t namelen; + size_t namesize; err = security_dentry_init_security(entry, mode, &entry->d_name, &name, &lsmctx); @@ -482,12 +676,12 @@ static int get_security_context(struct dentry *entry, umode_t mode, if (lsmctx.len) { nr_ctx = 1; - namelen = strlen(name) + 1; + namesize = strlen(name) + 1; err = -EIO; - if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || + if (WARN_ON(namesize > XATTR_NAME_MAX + 1 || lsmctx.len > S32_MAX)) goto out_err; - total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + + total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namesize + lsmctx.len); } @@ -504,8 +698,8 @@ static int get_security_context(struct dentry *entry, umode_t mode, fctx->size = lsmctx.len; ptr += sizeof(*fctx); - strcpy(ptr, name); - ptr += namelen; + strscpy(ptr, name, namesize); + ptr += namesize; memcpy(ptr, lsmctx.context, lsmctx.len); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7bcb650a9f26..01bc894e9c2b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) fuse_file_io_release(ff, ra->inode); if (!args) { - /* Do nothing when server does not implement 'open' */ + /* Do nothing when server does not implement 'opendir' */ + } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) { + fuse_release_end(ff->fm, args, 0); } else if (sync) { fuse_simple_request(ff->fm, args); fuse_release_end(ff->fm, args, 0); @@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, struct fuse_file *ff; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; bool open = isdir ? !fc->no_opendir : !fc->no_open; + bool release = !isdir || open; - ff = fuse_file_alloc(fm, open); + /* + * ff->args->release_args still needs to be allocated (so we can hold an + * inode reference while there are pending inflight file operations when + * ->release() is called, see fuse_prepare_release()) even if + * fc->no_open is set else it becomes possible for reclaim to deadlock + * if while servicing the readahead request the server triggers reclaim + * and reclaim evicts the inode of the file being read ahead. + */ + ff = fuse_file_alloc(fm, release); if (!ff) return ERR_PTR(-ENOMEM); @@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, fuse_file_free(ff); return ERR_PTR(err); } else { - /* No release needed */ - kfree(ff->args); - ff->args = NULL; - if (isdir) + if (isdir) { + /* No release needed */ + kfree(ff->args); + ff->args = NULL; fc->no_opendir = 1; - else + } else { fc->no_open = 1; + } } } @@ -1652,7 +1664,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!ia) return -ENOMEM; - if (fopen_direct_io && fc->direct_io_allow_mmap) { + if (fopen_direct_io) { res = filemap_write_and_wait_range(mapping, pos, pos + count - 1); if (res) { fuse_io_free(ia); @@ -1726,6 +1738,15 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (res > 0) *ppos = pos; + if (res > 0 && write && fopen_direct_io) { + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + invalidate_inode_pages2_range(mapping, idx_from, idx_to); + } + return res > 0 ? res : err; } EXPORT_SYMBOL_GPL(fuse_direct_io); diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 6e8373f97040..134bf44aff0d 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -62,6 +62,7 @@ void fuse_dev_end_requests(struct list_head *head); void fuse_copy_init(struct fuse_copy_state *cs, bool write, struct iov_iter *iter); +void fuse_copy_finish(struct fuse_copy_state *cs); int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs, unsigned int argpages, struct fuse_arg *args, int zeroing); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index f616c1991fed..7f16049387d1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -54,6 +54,13 @@ /** Frequency (in jiffies) of request timeout checks, if opted into */ extern const unsigned long fuse_timeout_timer_freq; +/* + * Dentries invalidation workqueue period, in seconds. The value of this + * parameter shall be >= FUSE_DENTRY_INVAL_FREQ_MIN seconds, or 0 (zero), in + * which case no workqueue will be created. + */ +extern unsigned inval_wq __read_mostly; + /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; /* @@ -232,6 +239,11 @@ enum { FUSE_I_BTIME, /* Wants or already has page cache IO */ FUSE_I_CACHE_IO_MODE, + /* + * Client has exclusive access to the inode, either because fs is local + * or the fuse server has an exclusive "lease" on distributed fs + */ + FUSE_I_EXCLUSIVE, }; struct fuse_conn; @@ -642,6 +654,8 @@ struct fuse_conn { /** Current epoch for up-to-date dentries */ atomic_t epoch; + struct work_struct epoch_work; + struct rcu_head rcu; /** The user id for this mount */ @@ -1038,7 +1052,7 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode) return get_fuse_mount_super(inode->i_sb)->fc; } -static inline struct fuse_inode *get_fuse_inode(struct inode *inode) +static inline struct fuse_inode *get_fuse_inode(const struct inode *inode) { return container_of(inode, struct fuse_inode, inode); } @@ -1080,6 +1094,13 @@ static inline bool fuse_is_bad(struct inode *inode) return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); } +static inline bool fuse_inode_is_exclusive(const struct inode *inode) +{ + const struct fuse_inode *fi = get_fuse_inode(inode); + + return test_bit(FUSE_I_EXCLUSIVE, &fi->state); +} + static inline struct folio **fuse_folios_alloc(unsigned int nfolios, gfp_t flags, struct fuse_folio_desc **desc) { @@ -1269,6 +1290,11 @@ void fuse_wait_aborted(struct fuse_conn *fc); /* Check if any requests timed out */ void fuse_check_timeout(struct work_struct *work); +void fuse_dentry_tree_init(void); +void fuse_dentry_tree_cleanup(void); + +void fuse_epoch_work(struct work_struct *work); + /** * Invalidate inode attributes */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1a397be53f49..819e50d66622 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -977,6 +977,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, refcount_set(&fc->count, 1); atomic_set(&fc->dev_count, 1); atomic_set(&fc->epoch, 1); + INIT_WORK(&fc->epoch_work, fuse_epoch_work); init_waitqueue_head(&fc->blocked_waitq); fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); INIT_LIST_HEAD(&fc->bg_queue); @@ -1021,26 +1022,28 @@ static void delayed_release(struct rcu_head *p) void fuse_conn_put(struct fuse_conn *fc) { - if (refcount_dec_and_test(&fc->count)) { - struct fuse_iqueue *fiq = &fc->iq; - struct fuse_sync_bucket *bucket; - - if (IS_ENABLED(CONFIG_FUSE_DAX)) - fuse_dax_conn_free(fc); - if (fc->timeout.req_timeout) - cancel_delayed_work_sync(&fc->timeout.work); - if (fiq->ops->release) - fiq->ops->release(fiq); - put_pid_ns(fc->pid_ns); - bucket = rcu_dereference_protected(fc->curr_bucket, 1); - if (bucket) { - WARN_ON(atomic_read(&bucket->count) != 1); - kfree(bucket); - } - if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) - fuse_backing_files_free(fc); - call_rcu(&fc->rcu, delayed_release); + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_sync_bucket *bucket; + + if (!refcount_dec_and_test(&fc->count)) + return; + + if (IS_ENABLED(CONFIG_FUSE_DAX)) + fuse_dax_conn_free(fc); + if (fc->timeout.req_timeout) + cancel_delayed_work_sync(&fc->timeout.work); + cancel_work_sync(&fc->epoch_work); + if (fiq->ops->release) + fiq->ops->release(fiq); + put_pid_ns(fc->pid_ns); + bucket = rcu_dereference_protected(fc->curr_bucket, 1); + if (bucket) { + WARN_ON(atomic_read(&bucket->count) != 1); + kfree(bucket); } + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) + fuse_backing_files_free(fc); + call_rcu(&fc->rcu, delayed_release); } EXPORT_SYMBOL_GPL(fuse_conn_put); @@ -2283,6 +2286,8 @@ static int __init fuse_init(void) if (res) goto err_sysfs_cleanup; + fuse_dentry_tree_init(); + sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_congthresh); @@ -2302,6 +2307,7 @@ static void __exit fuse_exit(void) { pr_debug("exit\n"); + fuse_dentry_tree_cleanup(); fuse_ctl_cleanup(); fuse_sysfs_cleanup(); fuse_fs_cleanup(); diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 15b2f094d36e..aa02599b770f 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -3,40 +3,8 @@ #define __UM_FS_HOSTFS #include <os.h> +#include <generated/asm-offsets.h> -/* - * These are exactly the same definitions as in fs.h, but the names are - * changed so that this file can be included in both kernel and user files. - */ - -#define HOSTFS_ATTR_MODE 1 -#define HOSTFS_ATTR_UID 2 -#define HOSTFS_ATTR_GID 4 -#define HOSTFS_ATTR_SIZE 8 -#define HOSTFS_ATTR_ATIME 16 -#define HOSTFS_ATTR_MTIME 32 -#define HOSTFS_ATTR_CTIME 64 -#define HOSTFS_ATTR_ATIME_SET 128 -#define HOSTFS_ATTR_MTIME_SET 256 - -/* This one is unused by hostfs. */ -#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ -#define HOSTFS_ATTR_ATTR_FLAG 1024 - -/* - * If you are very careful, you'll notice that these two are missing: - * - * #define ATTR_KILL_SUID 2048 - * #define ATTR_KILL_SGID 4096 - * - * and this is because they were added in 2.5 development. - * Actually, they are not needed by most ->setattr() methods - they are set by - * callers of notify_change() to notify that the setuid/setgid bits must be - * dropped. - * notify_change() will delete those flags, make sure attr->ia_valid & ATTR_MODE - * is on, and remove the appropriate bits from attr->ia_mode (attr is a - * "struct iattr *"). -BlaisorBlade - */ struct hostfs_timespec { long long tv_sec; long long tv_nsec; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index f42548ee9083..3b4c152c5c73 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -96,8 +96,15 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = { #define PGOFF_LOFFT_MAX \ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1))) -static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int hugetlb_file_mmap_prepare_success(const struct vm_area_struct *vma) { + /* Unfortunate we have to reassign vma->vm_private_data. */ + return hugetlb_vma_lock_alloc((struct vm_area_struct *)vma); +} + +static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc) +{ + struct file *file = desc->file; struct inode *inode = file_inode(file); loff_t len, vma_len; int ret; @@ -112,8 +119,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap unwinds (may be important on powerpc * and ia64). */ - vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND); - vma->vm_ops = &hugetlb_vm_ops; + desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; + desc->vm_ops = &hugetlb_vm_ops; /* * page based offset in vm_pgoff could be sufficiently large to @@ -122,16 +129,16 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * sizeof(unsigned long). So, only check in those instances. */ if (sizeof(unsigned long) == sizeof(loff_t)) { - if (vma->vm_pgoff & PGOFF_LOFFT_MAX) + if (desc->pgoff & PGOFF_LOFFT_MAX) return -EINVAL; } /* must be huge page aligned */ - if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) + if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; - vma_len = (loff_t)(vma->vm_end - vma->vm_start); - len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + vma_len = (loff_t)vma_desc_size(desc); + len = vma_len + ((loff_t)desc->pgoff << PAGE_SHIFT); /* check for overflow */ if (len < vma_len) return -EINVAL; @@ -141,7 +148,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = -ENOMEM; - vm_flags = vma->vm_flags; + vm_flags = desc->vm_flags; /* * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip * reserving here. Note: only for SHM hugetlbfs file, the inode @@ -151,17 +158,30 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vm_flags |= VM_NORESERVE; if (hugetlb_reserve_pages(inode, - vma->vm_pgoff >> huge_page_order(h), - len >> huge_page_shift(h), vma, - vm_flags) < 0) + desc->pgoff >> huge_page_order(h), + len >> huge_page_shift(h), desc, + vm_flags) < 0) goto out; ret = 0; - if (vma->vm_flags & VM_WRITE && inode->i_size < len) + if ((desc->vm_flags & VM_WRITE) && inode->i_size < len) i_size_write(inode, len); out: inode_unlock(inode); + if (!ret) { + /* Allocate the VMA lock after we set it up. */ + desc->action.success_hook = hugetlb_file_mmap_prepare_success; + /* + * We cannot permit the rmap finding this VMA in the time + * between the VMA being inserted into the VMA tree and the + * completion/success hook being invoked. + * + * This is because we establish a per-VMA hugetlb lock which can + * be raced by rmap. + */ + desc->action.hide_from_rmap_until_complete = true; + } return ret; } @@ -184,8 +204,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) addr0 = ALIGN(addr, huge_page_size(h)); - return mm_get_unmapped_area_vmflags(current->mm, file, addr0, len, pgoff, - flags, 0); + return mm_get_unmapped_area_vmflags(file, addr0, len, pgoff, flags, 0); } /* @@ -975,8 +994,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir, if (!inode) return -ENOSPC; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); - d_instantiate(dentry, inode); - dget(dentry);/* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); return 0; } @@ -1023,10 +1041,9 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap, if (inode) { int l = strlen(symname)+1; error = page_symlink(inode, symname, l); - if (!error) { - d_instantiate(dentry, inode); - dget(dentry); - } else + if (!error) + d_make_persistent(dentry, inode); + else iput(inode); } inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); @@ -1221,7 +1238,7 @@ static void init_once(void *foo) static const struct file_operations hugetlbfs_file_operations = { .read_iter = hugetlbfs_read_iter, - .mmap = hugetlbfs_file_mmap, + .mmap_prepare = hugetlbfs_file_mmap_prepare, .fsync = noop_fsync, .get_unmapped_area = hugetlb_get_unmapped_area, .llseek = default_llseek, @@ -1483,7 +1500,7 @@ static struct file_system_type hugetlbfs_fs_type = { .name = "hugetlbfs", .init_fs_context = hugetlbfs_init_fs_context, .parameters = hugetlb_fs_parameters, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .fs_flags = FS_ALLOW_IDMAP, }; diff --git a/fs/inode.c b/fs/inode.c index cc8265cfe80e..521383223d8a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1968,7 +1968,7 @@ void iput(struct inode *inode) retry: lockdep_assert_not_held(&inode->i_lock); - VFS_BUG_ON_INODE(inode_state_read_once(inode) & I_CLEAR, inode); + VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_CLEAR), inode); /* * Note this assert is technically racy as if the count is bogusly * equal to one, then two CPUs racing to further drop it can both @@ -2010,6 +2010,7 @@ EXPORT_SYMBOL(iput); */ void iput_not_last(struct inode *inode) { + VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_CLEAR), inode); VFS_BUG_ON_INODE(atomic_read(&inode->i_count) < 2, inode); WARN_ON(atomic_sub_return(1, &inode->i_count) == 0); diff --git a/fs/internal.h b/fs/internal.h index d08d5e2235e9..ab638d41ab81 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -230,7 +230,6 @@ extern void shrink_dcache_for_umount(struct super_block *); extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seq); -extern void d_genocide(struct dentry *); /* * pipe.c diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a670ba3e565e..5c0efd6b239f 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -675,11 +675,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, if (parent) { ret = security_kernfs_init_security(parent, kn); if (ret) - goto err_out3; + goto err_out4; } return kn; + err_out4: + simple_xattrs_free(&kn->iattr->xattrs, NULL); + kmem_cache_free(kernfs_iattrs_cache, kn->iattr); err_out3: spin_lock(&root->kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 76eaf64b9d9e..3ac52e141766 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -298,6 +298,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) sb->s_export_op = &kernfs_export_ops; sb->s_time_gran = 1; + sb->s_maxbytes = MAX_LFS_FILESIZE; /* sysfs dentries and inodes don't require IO to create */ sb->s_shrink->seeks = 0; diff --git a/fs/libfs.c b/fs/libfs.c index 2d6657947abd..9264523be85c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -630,7 +630,7 @@ static void __simple_recursive_removal(struct dentry *dentry, if (callback) callback(victim); fsnotify_delete(inode, d_inode(victim), victim); - dput(victim); // unpin it + d_make_discardable(victim); } if (victim == dentry) { inode_set_mtime_to_ts(inode, @@ -655,6 +655,19 @@ void simple_recursive_removal(struct dentry *dentry, } EXPORT_SYMBOL(simple_recursive_removal); +void simple_remove_by_name(struct dentry *parent, const char *name, + void (*callback)(struct dentry *)) +{ + struct dentry *dentry; + + dentry = lookup_noperm_positive_unlocked(&QSTR(name), parent); + if (!IS_ERR(dentry)) { + simple_recursive_removal(dentry, callback); + dput(dentry); // paired with lookup_noperm_positive_unlocked() + } +} +EXPORT_SYMBOL(simple_remove_by_name); + /* caller holds parent directory with I_MUTEX_PARENT */ void locked_recursive_removal(struct dentry *dentry, void (*callback)(struct dentry *)) @@ -752,8 +765,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inc_nlink(inode); ihold(inode); - dget(dentry); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); return 0; } EXPORT_SYMBOL(simple_link); @@ -779,14 +791,28 @@ out: } EXPORT_SYMBOL(simple_empty); -int simple_unlink(struct inode *dir, struct dentry *dentry) +void __simple_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); drop_nlink(inode); - dput(dentry); +} +EXPORT_SYMBOL(__simple_unlink); + +void __simple_rmdir(struct inode *dir, struct dentry *dentry) +{ + drop_nlink(d_inode(dentry)); + __simple_unlink(dir, dentry); + drop_nlink(dir); +} +EXPORT_SYMBOL(__simple_rmdir); + +int simple_unlink(struct inode *dir, struct dentry *dentry) +{ + __simple_unlink(dir, dentry); + d_make_discardable(dentry); return 0; } EXPORT_SYMBOL(simple_unlink); @@ -796,9 +822,8 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) if (!simple_empty(dentry)) return -ENOTEMPTY; - drop_nlink(d_inode(dentry)); - simple_unlink(dir, dentry); - drop_nlink(dir); + __simple_rmdir(dir, dentry); + d_make_discardable(dentry); return 0; } EXPORT_SYMBOL(simple_rmdir); @@ -1066,7 +1091,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic, simple_inode_init_ts(inode); inode->i_fop = files->ops; inode->i_ino = i; - d_add(dentry, inode); + d_make_persistent(dentry, inode); + dput(dentry); } return 0; } @@ -2312,3 +2338,11 @@ struct dentry *simple_start_creating(struct dentry *parent, const char *name) return start_dirop(parent, &qname, LOOKUP_CREATE | LOOKUP_EXCL); } EXPORT_SYMBOL(simple_start_creating); + +/* parent must have been held exclusive since simple_start_creating() */ +void simple_done_creating(struct dentry *child) +{ + inode_unlock(child->d_parent->d_inode); + dput(child); +} +EXPORT_SYMBOL(simple_done_creating); diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index a31dc9588eb8..3a3d05cfe09a 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -495,6 +495,9 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_end, wait); + if (nlmsvc_file_cannot_lock(file)) + return nlm_lck_denied_nolocks; + if (!locks_can_async_lock(nlmsvc_file_file(file)->f_op)) { async_block = wait; wait = 0; @@ -621,6 +624,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); + if (nlmsvc_file_cannot_lock(file)) + return nlm_lck_denied_nolocks; + if (locks_in_grace(SVC_NET(rqstp))) { ret = nlm_lck_denied_grace_period; goto out; @@ -678,6 +684,9 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); + if (nlmsvc_file_cannot_lock(file)) + return nlm_lck_denied_nolocks; + /* First, cancel any lock that might be there */ nlmsvc_cancel_blocked(net, file, lock); @@ -715,6 +724,9 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); + if (nlmsvc_file_cannot_lock(file)) + return nlm_lck_denied_nolocks; + if (locks_in_grace(net)) return nlm_lck_denied_grace_period; diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c index ade4931b2da2..88c81ce1148d 100644 --- a/fs/lockd/svcshare.c +++ b/fs/lockd/svcshare.c @@ -32,6 +32,9 @@ nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file, struct xdr_netobj *oh = &argp->lock.oh; u8 *ohdata; + if (nlmsvc_file_cannot_lock(file)) + return nlm_lck_denied_nolocks; + for (share = file->f_shares; share; share = share->s_next) { if (share->s_host == host && nlm_cmp_owner(share, oh)) goto update; @@ -72,6 +75,9 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file, struct nlm_share *share, **shpp; struct xdr_netobj *oh = &argp->lock.oh; + if (nlmsvc_file_cannot_lock(file)) + return nlm_lck_denied_nolocks; + for (shpp = &file->f_shares; (share = *shpp) != NULL; shpp = &share->s_next) { if (share->s_host == host && nlm_cmp_owner(share, oh)) { diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 49ed90c6b9f2..f33bfa7b58e6 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -615,8 +615,11 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ } -static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *filp) +static void nfs_local_call_read(struct work_struct *work) { + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + struct file *filp = iocb->kiocb.ki_filp; bool force_done = false; ssize_t status; int n_iters; @@ -633,7 +636,9 @@ static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *fi } else iocb->kiocb.ki_flags &= ~IOCB_DIRECT; - status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); + scoped_with_creds(filp->f_cred) + status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); + if (status != -EIOCBQUEUED) { if (unlikely(status >= 0 && status < iocb->iters[i].count)) force_done = true; /* Partial read */ @@ -645,16 +650,6 @@ static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *fi } } -static void nfs_local_call_read(struct work_struct *work) -{ - struct nfs_local_kiocb *iocb = - container_of(work, struct nfs_local_kiocb, work); - struct file *filp = iocb->kiocb.ki_filp; - - scoped_with_creds(filp->f_cred) - do_nfs_local_call_read(iocb, filp); -} - static int nfs_local_do_read(struct nfs_local_kiocb *iocb, const struct rpc_call_ops *call_ops) @@ -822,13 +817,18 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ } -static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb, - struct file *filp) +static void nfs_local_call_write(struct work_struct *work) { + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + struct file *filp = iocb->kiocb.ki_filp; + unsigned long old_flags = current->flags; bool force_done = false; ssize_t status; int n_iters; + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; + file_start_write(filp); n_iters = atomic_read(&iocb->n_iters); for (int i = 0; i < n_iters ; i++) { @@ -842,7 +842,9 @@ static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb, } else iocb->kiocb.ki_flags &= ~IOCB_DIRECT; - status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); + scoped_with_creds(filp->f_cred) + status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); + if (status != -EIOCBQUEUED) { if (unlikely(status >= 0 && status < iocb->iters[i].count)) force_done = true; /* Partial write */ @@ -854,22 +856,6 @@ static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb, } file_end_write(filp); - return status; -} - -static void nfs_local_call_write(struct work_struct *work) -{ - struct nfs_local_kiocb *iocb = - container_of(work, struct nfs_local_kiocb, work); - struct file *filp = iocb->kiocb.ki_filp; - unsigned long old_flags = current->flags; - ssize_t status; - - current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; - - scoped_with_creds(filp->f_cred) - status = do_nfs_local_call_write(iocb, filp); - current->flags = old_flags; } diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index e134dce45e35..0b5c1a0bf1cf 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -5,6 +5,7 @@ config NFSD depends on FILE_LOCKING depends on FSNOTIFY select CRC32 + select CRYPTO_LIB_MD5 if NFSD_LEGACY_CLIENT_TRACKING select CRYPTO_LIB_SHA256 if NFSD_V4 select LOCKD select SUNRPC @@ -77,8 +78,7 @@ config NFSD_V4 depends on NFSD && PROC_FS select FS_POSIX_ACL select RPCSEC_GSS_KRB5 - select CRYPTO - select CRYPTO_MD5 + select CRYPTO # required by RPCSEC_GSS_KRB5 select GRACE_PERIOD select NFS_V4_2_SSC_HELPER if NFS_V4_2 help @@ -164,7 +164,7 @@ config NFSD_V4_SECURITY_LABEL config NFSD_LEGACY_CLIENT_TRACKING bool "Support legacy NFSv4 client tracking methods (DEPRECATED)" depends on NFSD_V4 - default y + default n help The NFSv4 server needs to store a small amount of information on stable storage in order to handle state recovery after reboot. Most diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index fde5539cf6a6..afa16d7a8013 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -13,70 +13,49 @@ #include "pnfs.h" #include "filecache.h" #include "vfs.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS +/* + * Get an extent from the file system that starts at offset or below + * and may be shorter than the requested length. + */ static __be32 -nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode, - const struct svc_fh *fhp, struct nfsd4_layoutget *args) +nfsd4_block_map_extent(struct inode *inode, const struct svc_fh *fhp, + u64 offset, u64 length, u32 iomode, u64 minlength, + struct pnfs_block_extent *bex) { - struct nfsd4_layout_seg *seg = &args->lg_seg; struct super_block *sb = inode->i_sb; - u32 block_size = i_blocksize(inode); - struct pnfs_block_extent *bex; struct iomap iomap; u32 device_generation = 0; int error; - if (locks_in_grace(SVC_NET(rqstp))) - return nfserr_grace; - - if (seg->offset & (block_size - 1)) { - dprintk("pnfsd: I/O misaligned\n"); - goto out_layoutunavailable; - } - - /* - * Some clients barf on non-zero block numbers for NONE or INVALID - * layouts, so make sure to zero the whole structure. - */ - error = -ENOMEM; - bex = kzalloc(sizeof(*bex), GFP_KERNEL); - if (!bex) - goto out_error; - args->lg_content = bex; - - error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length, - &iomap, seg->iomode != IOMODE_READ, - &device_generation); + error = sb->s_export_op->map_blocks(inode, offset, length, &iomap, + iomode != IOMODE_READ, &device_generation); if (error) { if (error == -ENXIO) - goto out_layoutunavailable; - goto out_error; - } - - if (iomap.length < args->lg_minlength) { - dprintk("pnfsd: extent smaller than minlength\n"); - goto out_layoutunavailable; + return nfserr_layoutunavailable; + return nfserrno(error); } switch (iomap.type) { case IOMAP_MAPPED: - if (seg->iomode == IOMODE_READ) + if (iomode == IOMODE_READ) bex->es = PNFS_BLOCK_READ_DATA; else bex->es = PNFS_BLOCK_READWRITE_DATA; bex->soff = iomap.addr; break; case IOMAP_UNWRITTEN: - if (seg->iomode & IOMODE_RW) { + if (iomode & IOMODE_RW) { /* * Crack monkey special case from section 2.3.1. */ - if (args->lg_minlength == 0) { + if (minlength == 0) { dprintk("pnfsd: no soup for you!\n"); - goto out_layoutunavailable; + return nfserr_layoutunavailable; } bex->es = PNFS_BLOCK_INVALID_DATA; @@ -85,7 +64,7 @@ nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode, } fallthrough; case IOMAP_HOLE: - if (seg->iomode == IOMODE_READ) { + if (iomode == IOMODE_READ) { bex->es = PNFS_BLOCK_NONE_DATA; break; } @@ -93,27 +72,107 @@ nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode, case IOMAP_DELALLOC: default: WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type); - goto out_layoutunavailable; + return nfserr_layoutunavailable; } error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation); if (error) - goto out_error; + return nfserrno(error); + bex->foff = iomap.offset; bex->len = iomap.length; + return nfs_ok; +} - seg->offset = iomap.offset; - seg->length = iomap.length; +static __be32 +nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode, + const struct svc_fh *fhp, struct nfsd4_layoutget *args) +{ + struct nfsd4_layout_seg *seg = &args->lg_seg; + struct pnfs_block_layout *bl; + struct pnfs_block_extent *first_bex, *last_bex; + u64 offset = seg->offset, length = seg->length; + u32 i, nr_extents_max, block_size = i_blocksize(inode); + __be32 nfserr; - dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es); - return 0; + if (locks_in_grace(SVC_NET(rqstp))) + return nfserr_grace; + + nfserr = nfserr_layoutunavailable; + if (seg->offset & (block_size - 1)) { + dprintk("pnfsd: I/O misaligned\n"); + goto out_error; + } + + /* + * RFC 8881, section 3.3.17: + * The layout4 data type defines a layout for a file. + * + * RFC 8881, section 18.43.3: + * The loga_maxcount field specifies the maximum layout size + * (in bytes) that the client can handle. If the size of the + * layout structure exceeds the size specified by maxcount, + * the metadata server will return the NFS4ERR_TOOSMALL error. + */ + nfserr = nfserr_toosmall; + if (args->lg_maxcount < PNFS_BLOCK_LAYOUT4_SIZE + + PNFS_BLOCK_EXTENT_SIZE) + goto out_error; + + /* + * Limit the maximum layout size to avoid allocating + * a large buffer on the server for each layout request. + */ + nr_extents_max = (min(args->lg_maxcount, PAGE_SIZE) - + PNFS_BLOCK_LAYOUT4_SIZE) / PNFS_BLOCK_EXTENT_SIZE; + + /* + * Some clients barf on non-zero block numbers for NONE or INVALID + * layouts, so make sure to zero the whole structure. + */ + nfserr = nfserrno(-ENOMEM); + bl = kzalloc(struct_size(bl, extents, nr_extents_max), GFP_KERNEL); + if (!bl) + goto out_error; + bl->nr_extents = nr_extents_max; + args->lg_content = bl; + + for (i = 0; i < bl->nr_extents; i++) { + struct pnfs_block_extent *bex = bl->extents + i; + u64 bex_length; + + nfserr = nfsd4_block_map_extent(inode, fhp, offset, length, + seg->iomode, args->lg_minlength, bex); + if (nfserr != nfs_ok) + goto out_error; + + bex_length = bex->len - (offset - bex->foff); + if (bex_length >= length) { + bl->nr_extents = i + 1; + break; + } + + offset = bex->foff + bex->len; + length -= bex_length; + } + + first_bex = bl->extents; + last_bex = bl->extents + bl->nr_extents - 1; + + nfserr = nfserr_layoutunavailable; + length = last_bex->foff + last_bex->len - seg->offset; + if (length < args->lg_minlength) { + dprintk("pnfsd: extent smaller than minlength\n"); + goto out_error; + } + + seg->offset = first_bex->foff; + seg->length = last_bex->foff - first_bex->foff + last_bex->len; + return nfs_ok; out_error: seg->length = 0; - return nfserrno(error); -out_layoutunavailable: - seg->length = 0; - return nfserr_layoutunavailable; + return nfserr; } static __be32 @@ -340,9 +399,12 @@ nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file) { struct nfs4_client *clp = ls->ls_stid.sc_client; struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev; + int status; - bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, - nfsd4_scsi_pr_key(clp), 0, true); + status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, + nfsd4_scsi_pr_key(clp), + PR_EXCLUSIVE_ACCESS_REG_ONLY, true); + trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status); } const struct nfsd4_layout_ops scsi_layout_ops = { diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index e50afe340737..196ef4245604 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -14,12 +14,25 @@ #define NFSDDBG_FACILITY NFSDDBG_PNFS +/** + * nfsd4_block_encode_layoutget - encode block/scsi layout extent array + * @xdr: stream for data encoding + * @lgp: layoutget content, actually an array of extents to encode + * + * Encode the opaque loc_body field in the layoutget response. Since the + * pnfs_block_layout4 and pnfs_scsi_layout4 structures on the wire are + * the same, this function is used by both layout drivers. + * + * Return values: + * %nfs_ok: Success, all extents encoded into @xdr + * %nfserr_toosmall: Not enough space in @xdr to encode all the data + */ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, const struct nfsd4_layoutget *lgp) { - const struct pnfs_block_extent *b = lgp->lg_content; - int len = sizeof(__be32) + 5 * sizeof(__be64) + sizeof(__be32); + const struct pnfs_block_layout *bl = lgp->lg_content; + u32 i, len = sizeof(__be32) + bl->nr_extents * PNFS_BLOCK_EXTENT_SIZE; __be32 *p; p = xdr_reserve_space(xdr, sizeof(__be32) + len); @@ -27,14 +40,19 @@ nfsd4_block_encode_layoutget(struct xdr_stream *xdr, return nfserr_toosmall; *p++ = cpu_to_be32(len); - *p++ = cpu_to_be32(1); /* we always return a single extent */ + *p++ = cpu_to_be32(bl->nr_extents); - p = svcxdr_encode_deviceid4(p, &b->vol_id); - p = xdr_encode_hyper(p, b->foff); - p = xdr_encode_hyper(p, b->len); - p = xdr_encode_hyper(p, b->soff); - *p++ = cpu_to_be32(b->es); - return 0; + for (i = 0; i < bl->nr_extents; i++) { + const struct pnfs_block_extent *bex = bl->extents + i; + + p = svcxdr_encode_deviceid4(p, &bex->vol_id); + p = xdr_encode_hyper(p, bex->foff); + p = xdr_encode_hyper(p, bex->len); + p = xdr_encode_hyper(p, bex->soff); + *p++ = cpu_to_be32(bex->es); + } + + return nfs_ok; } static int diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h index 7d25ef689671..2e0c6c7d2b42 100644 --- a/fs/nfsd/blocklayoutxdr.h +++ b/fs/nfsd/blocklayoutxdr.h @@ -8,6 +8,15 @@ struct iomap; struct xdr_stream; +/* On the wire size of the layout4 struct with zero number of extents */ +#define PNFS_BLOCK_LAYOUT4_SIZE \ + (sizeof(__be32) * 2 + /* offset4 */ \ + sizeof(__be32) * 2 + /* length4 */ \ + sizeof(__be32) + /* layoutiomode4 */ \ + sizeof(__be32) + /* layouttype4 */ \ + sizeof(__be32) + /* number of bytes */ \ + sizeof(__be32)) /* number of extents */ + struct pnfs_block_extent { struct nfsd4_deviceid vol_id; u64 foff; @@ -21,6 +30,11 @@ struct pnfs_block_range { u64 len; }; +struct pnfs_block_layout { + u32 nr_extents; + struct pnfs_block_extent extents[] __counted_by(nr_extents); +}; + /* * Random upper cap for the uuid length to avoid unbounded allocation. * Not actually limited by the protocol. diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c index ed2b9e066206..7f44689e0a53 100644 --- a/fs/nfsd/debugfs.c +++ b/fs/nfsd/debugfs.c @@ -44,6 +44,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n"); * Contents: * %0: NFS READ will use buffered IO * %1: NFS READ will use dontcache (buffered IO w/ dropbehind) + * %2: NFS READ will use direct IO * * This setting takes immediate effect for all NFS versions, * all exports, and in all NFSD net namespaces. @@ -64,6 +65,7 @@ static int nfsd_io_cache_read_set(void *data, u64 val) nfsd_io_cache_read = NFSD_IO_BUFFERED; break; case NFSD_IO_DONTCACHE: + case NFSD_IO_DIRECT: /* * Must disable splice_read when enabling * NFSD_IO_DONTCACHE. @@ -106,6 +108,7 @@ static int nfsd_io_cache_write_set(void *data, u64 val) switch (val) { case NFSD_IO_BUFFERED: case NFSD_IO_DONTCACHE: + case NFSD_IO_DIRECT: nfsd_io_cache_write = val; break; default: diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index b39d4cbdfd35..441dfbfe2d2b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -32,7 +32,7 @@ * */ -#include <crypto/hash.h> +#include <crypto/md5.h> #include <crypto/sha2.h> #include <linux/file.h> #include <linux/slab.h> @@ -92,79 +92,29 @@ nfs4_reset_creds(const struct cred *original) put_cred(revert_creds(original)); } -static int +static void nfs4_make_rec_clidname(char dname[HEXDIR_LEN], const struct xdr_netobj *clname) { u8 digest[MD5_DIGEST_SIZE]; - struct crypto_shash *tfm; - int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); - tfm = crypto_alloc_shash("md5", 0, 0); - if (IS_ERR(tfm)) { - status = PTR_ERR(tfm); - goto out_no_tfm; - } - status = crypto_shash_tfm_digest(tfm, clname->data, clname->len, - digest); - if (status) - goto out; + md5(clname->data, clname->len, digest); static_assert(HEXDIR_LEN == 2 * MD5_DIGEST_SIZE + 1); sprintf(dname, "%*phN", MD5_DIGEST_SIZE, digest); - - status = 0; -out: - crypto_free_shash(tfm); -out_no_tfm: - return status; -} - -/* - * If we had an error generating the recdir name for the legacy tracker - * then warn the admin. If the error doesn't appear to be transient, - * then disable recovery tracking. - */ -static void -legacy_recdir_name_error(struct nfs4_client *clp, int error) -{ - printk(KERN_ERR "NFSD: unable to generate recoverydir " - "name (%d).\n", error); - - /* - * if the algorithm just doesn't exist, then disable the recovery - * tracker altogether. The crypto libs will generally return this if - * FIPS is enabled as well. - */ - if (error == -ENOENT) { - printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " - "Reboot recovery will not function correctly!\n"); - nfsd4_client_tracking_exit(clp->net); - } } static void __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp, - const char *dname, int len, struct nfsd_net *nn) + char *dname, struct nfsd_net *nn) { - struct xdr_netobj name; + struct xdr_netobj name = { .len = strlen(dname), .data = dname }; struct xdr_netobj princhash = { .len = 0, .data = NULL }; struct nfs4_client_reclaim *crp; - name.data = kmemdup(dname, len, GFP_KERNEL); - if (!name.data) { - dprintk("%s: failed to allocate memory for name.data!\n", - __func__); - return; - } - name.len = len; crp = nfs4_client_to_reclaim(name, princhash, nn); - if (!crp) { - kfree(name.data); - return; - } crp->cr_clp = clp; } @@ -182,9 +132,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (!nn->rec_file) return; - status = nfs4_make_rec_clidname(dname, &clp->cl_name); - if (status) - return legacy_recdir_name_error(clp, status); + nfs4_make_rec_clidname(dname, &clp->cl_name); status = nfs4_save_creds(&original_cred); if (status < 0) @@ -219,8 +167,7 @@ out_end: out: if (status == 0) { if (nn->in_grace) - __nfsd4_create_reclaim_record_grace(clp, dname, - HEXDIR_LEN, nn); + __nfsd4_create_reclaim_record_grace(clp, dname, nn); vfs_fsync(nn->rec_file, 0); } else { printk(KERN_ERR "NFSD: failed to write recovery record" @@ -233,7 +180,7 @@ out_creds: nfs4_reset_creds(original_cred); } -typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *); +typedef int (recdir_func)(struct dentry *, char *, struct nfsd_net *); struct name_list { char name[HEXDIR_LEN]; @@ -287,24 +234,14 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) } status = iterate_dir(nn->rec_file, &ctx.ctx); - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); list_for_each_entry_safe(entry, tmp, &ctx.names, list) { - if (!status) { - struct dentry *dentry; - dentry = lookup_one(&nop_mnt_idmap, - &QSTR(entry->name), dir); - if (IS_ERR(dentry)) { - status = PTR_ERR(dentry); - break; - } - status = f(dir, dentry, nn); - dput(dentry); - } + if (!status) + status = f(dir, entry->name, nn); + list_del(&entry->list); kfree(entry); } - inode_unlock(d_inode(dir)); nfs4_reset_creds(original_cred); list_for_each_entry_safe(entry, tmp, &ctx.names, list) { @@ -364,9 +301,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - status = nfs4_make_rec_clidname(dname, &clp->cl_name); - if (status) - return legacy_recdir_name_error(clp, status); + nfs4_make_rec_clidname(dname, &clp->cl_name); status = mnt_want_write_file(nn->rec_file); if (status) @@ -394,18 +329,19 @@ out: } static int -purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) +purge_old(struct dentry *parent, char *cname, struct nfsd_net *nn) { int status; + struct dentry *child; struct xdr_netobj name; - if (child->d_name.len != HEXDIR_LEN - 1) { - printk("%s: illegal name %pd in recovery directory\n", - __func__, child); + if (strlen(cname) != HEXDIR_LEN - 1) { + printk("%s: illegal name %s in recovery directory\n", + __func__, cname); /* Keep trying; maybe the others are OK: */ return 0; } - name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); + name.data = kstrdup(cname, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); @@ -415,10 +351,17 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) if (nfs4_has_reclaimed_state(name, nn)) goto out_free; - status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL); - if (status) - printk("failed to remove client recovery directory %pd\n", - child); + inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); + child = lookup_one(&nop_mnt_idmap, &QSTR(cname), parent); + if (!IS_ERR(child)) { + status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL); + if (status) + printk("failed to remove client recovery directory %pd\n", + child); + dput(child); + } + inode_unlock(d_inode(parent)); + out_free: kfree(name.data); out: @@ -449,27 +392,18 @@ out: } static int -load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) +load_recdir(struct dentry *parent, char *cname, struct nfsd_net *nn) { - struct xdr_netobj name; + struct xdr_netobj name = { .len = HEXDIR_LEN, .data = cname }; struct xdr_netobj princhash = { .len = 0, .data = NULL }; - if (child->d_name.len != HEXDIR_LEN - 1) { - printk("%s: illegal name %pd in recovery directory\n", - __func__, child); + if (strlen(cname) != HEXDIR_LEN - 1) { + printk("%s: illegal name %s in recovery directory\n", + __func__, cname); /* Keep trying; maybe the others are OK: */ return 0; } - name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); - if (!name.data) { - dprintk("%s: failed to allocate memory for name.data!\n", - __func__); - goto out; - } - name.len = HEXDIR_LEN; - if (!nfs4_client_to_reclaim(name, princhash, nn)) - kfree(name.data); -out: + nfs4_client_to_reclaim(name, princhash, nn); return 0; } @@ -647,7 +581,6 @@ nfs4_recoverydir(void) static int nfsd4_check_legacy_client(struct nfs4_client *clp) { - int status; char dname[HEXDIR_LEN]; struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -657,11 +590,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; - status = nfs4_make_rec_clidname(dname, &clp->cl_name); - if (status) { - legacy_recdir_name_error(clp, status); - return status; - } + nfs4_make_rec_clidname(dname, &clp->cl_name); /* look for it in the reclaim hashtable otherwise */ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); @@ -767,6 +696,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, { uint8_t cmd, princhashlen; struct xdr_netobj name, princhash = { .len = 0, .data = NULL }; + char *namecopy __free(kfree) = NULL; + char *princhashcopy __free(kfree) = NULL; uint16_t namelen; if (get_user(cmd, &cmsg->cm_cmd)) { @@ -784,19 +715,19 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, dprintk("%s: invalid namelen (%u)", __func__, namelen); return -EINVAL; } - name.data = memdup_user(&ci->cc_name.cn_id, namelen); - if (IS_ERR(name.data)) - return PTR_ERR(name.data); + namecopy = memdup_user(&ci->cc_name.cn_id, namelen); + if (IS_ERR(namecopy)) + return PTR_ERR(namecopy); + name.data = namecopy; name.len = namelen; get_user(princhashlen, &ci->cc_princhash.cp_len); if (princhashlen > 0) { - princhash.data = memdup_user( - &ci->cc_princhash.cp_data, - princhashlen); - if (IS_ERR(princhash.data)) { - kfree(name.data); - return PTR_ERR(princhash.data); - } + princhashcopy = memdup_user( + &ci->cc_princhash.cp_data, + princhashlen); + if (IS_ERR(princhashcopy)) + return PTR_ERR(princhashcopy); + princhash.data = princhashcopy; princhash.len = princhashlen; } else princhash.len = 0; @@ -810,9 +741,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, dprintk("%s: invalid namelen (%u)", __func__, namelen); return -EINVAL; } - name.data = memdup_user(&cnm->cn_id, namelen); - if (IS_ERR(name.data)) - return PTR_ERR(name.data); + namecopy = memdup_user(&cnm->cn_id, namelen); + if (IS_ERR(namecopy)) + return PTR_ERR(namecopy); + name.data = namecopy; name.len = namelen; } #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING @@ -820,15 +752,12 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, struct cld_net *cn = nn->cld_net; name.len = name.len - 5; - memmove(name.data, name.data + 5, name.len); + name.data = name.data + 5; cn->cn_has_legacy = true; } #endif - if (!nfs4_client_to_reclaim(name, princhash, nn)) { - kfree(name.data); - kfree(princhash.data); + if (!nfs4_client_to_reclaim(name, princhash, nn)) return -EFAULT; - } return nn->client_tracking_ops->msglen; } return -EFAULT; @@ -1254,13 +1183,10 @@ nfsd4_cld_check(struct nfs4_client *clp) #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING if (nn->cld_net->cn_has_legacy) { - int status; char dname[HEXDIR_LEN]; struct xdr_netobj name; - status = nfs4_make_rec_clidname(dname, &clp->cl_name); - if (status) - return -ENOENT; + nfs4_make_rec_clidname(dname, &clp->cl_name); name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); if (!name.data) { @@ -1305,11 +1231,8 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) if (cn->cn_has_legacy) { struct xdr_netobj name; char dname[HEXDIR_LEN]; - int status; - status = nfs4_make_rec_clidname(dname, &clp->cl_name); - if (status) - return -ENOENT; + nfs4_make_rec_clidname(dname, &clp->cl_name); name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); if (!name.data) { @@ -1682,11 +1605,7 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) return NULL; } - copied = nfs4_make_rec_clidname(result + copied, name); - if (copied) { - kfree(result); - return NULL; - } + nfs4_make_rec_clidname(result + copied, name); return result; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6791fc239dbd..808c24fb5c9a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3508,7 +3508,7 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) free_svc_cred(&slot->sl_cred); copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); - if (!nfsd4_cache_this(resp)) { + if (!(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { slot->sl_flags &= ~NFSD4_SLOT_CACHED; return; } @@ -3523,41 +3523,6 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) } /* - * Encode the replay sequence operation from the slot values. - * If cachethis is FALSE encode the uncached rep error on the next - * operation which sets resp->p and increments resp->opcnt for - * nfs4svc_encode_compoundres. - * - */ -static __be32 -nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, - struct nfsd4_compoundres *resp) -{ - struct nfsd4_op *op; - struct nfsd4_slot *slot = resp->cstate.slot; - - /* Encode the replayed sequence operation */ - op = &args->ops[resp->opcnt - 1]; - nfsd4_encode_operation(resp, op); - - if (slot->sl_flags & NFSD4_SLOT_CACHED) - return op->status; - if (args->opcnt == 1) { - /* - * The original operation wasn't a solo sequence--we - * always cache those--so this retry must not match the - * original: - */ - op->status = nfserr_seq_false_retry; - } else { - op = &args->ops[resp->opcnt++]; - op->status = nfserr_retry_uncached_rep; - nfsd4_encode_operation(resp, op); - } - return op->status; -} - -/* * The sequence operation is not cached because we can use the slot and * session values. */ @@ -3565,17 +3530,30 @@ static __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { + struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; struct nfsd4_slot *slot = resp->cstate.slot; struct xdr_stream *xdr = resp->xdr; __be32 *p; - __be32 status; dprintk("--> %s slot %p\n", __func__, slot); - status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); - if (status) - return status; + /* Always encode the SEQUENCE response. */ + nfsd4_encode_operation(resp, &args->ops[0]); + if (args->opcnt == 1) + /* A solo SEQUENCE - nothing was cached */ + return args->ops[0].status; + if (!(slot->sl_flags & NFSD4_SLOT_CACHED)) { + /* We weren't asked to cache this. */ + struct nfsd4_op *op; + + op = &args->ops[resp->opcnt++]; + op->status = nfserr_retry_uncached_rep; + nfsd4_encode_operation(resp, op); + return op->status; + } + + /* return reply from cache */ p = xdr_reserve_space(xdr, slot->sl_datalen); if (!p) { WARN_ON_ONCE(1); @@ -6362,11 +6340,6 @@ nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open, return; out_no_deleg: open->op_delegate_type = OPEN_DELEGATE_NONE; - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != OPEN_DELEGATE_NONE) { - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - open->op_recall = true; - } /* 4.1 client asking for a delegation? */ if (open->op_deleg_want) @@ -8802,9 +8775,6 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn) /* * failure => all reset bets are off, nfserr_no_grace... - * - * The caller is responsible for freeing name.data if NULL is returned (it - * will be freed in nfs4_remove_reclaim_record in the normal case). */ struct nfs4_client_reclaim * nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, @@ -8813,6 +8783,22 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, unsigned int strhashval; struct nfs4_client_reclaim *crp; + name.data = kmemdup(name.data, name.len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + return NULL; + } + if (princhash.len) { + princhash.data = kmemdup(princhash.data, princhash.len, GFP_KERNEL); + if (!princhash.data) { + dprintk("%s: failed to allocate memory for princhash.data!\n", + __func__); + kfree(name.data); + return NULL; + } + } else + princhash.data = NULL; crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); @@ -8824,6 +8810,9 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, crp->cr_princhash.len = princhash.len; crp->cr_clp = NULL; nn->reclaim_str_hashtbl_size++; + } else { + kfree(name.data); + kfree(princhash.data); } return crp; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 67bb9c0b9fcb..30ce5851fe4c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4472,7 +4472,7 @@ out_err: static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct nfsd4_read *read, - struct file *file, unsigned long maxcount) + unsigned long maxcount) { struct xdr_stream *xdr = resp->xdr; unsigned int base = xdr->buf->page_len & ~PAGE_MASK; @@ -4480,18 +4480,30 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, __be32 zero = xdr_zero; __be32 nfserr; - if (xdr_reserve_space_vec(xdr, maxcount) < 0) - return nfserr_resource; - - nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file, + nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, read->rd_nf, read->rd_offset, &maxcount, base, &read->rd_eof); read->rd_length = maxcount; if (nfserr) return nfserr; + + /* + * svcxdr_encode_opaque_pages() is not used here because + * we don't want to encode subsequent results in this + * COMPOUND into the xdr->buf's tail, but rather those + * results should follow the NFS READ payload in the + * buf's pages. + */ + if (xdr_reserve_space_vec(xdr, maxcount) < 0) + return nfserr_resource; + + /* + * Mark the buffer location of the NFS READ payload so that + * direct placement-capable transports send only the + * payload bytes out-of-band. + */ if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount)) return nfserr_io; - xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount)); write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero, xdr_pad_size(maxcount)); @@ -4530,7 +4542,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else - nfserr = nfsd4_encode_readv(resp, read, file, maxcount); + nfserr = nfsd4_encode_readv(resp, read, maxcount); if (nfserr) { xdr_truncate_encode(xdr, eof_offset); return nfserr; @@ -5426,7 +5438,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else - nfserr = nfsd4_encode_readv(resp, read, file, maxcount); + nfserr = nfsd4_encode_readv(resp, read, maxcount); if (nfserr) return nfserr; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2b79129703d5..5ce9a49e76ba 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1137,11 +1137,11 @@ static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *nc inode->i_private = ncl; kref_get(&ncl->cl_ref); } - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(dir); fsnotify_mkdir(dir, dentry); - inode_unlock(dir); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed } #if IS_ENABLED(CONFIG_SUNRPC_GSS) @@ -1170,9 +1170,9 @@ static void _nfsd_symlink(struct dentry *parent, const char *name, inode->i_link = (char *)content; inode->i_size = strlen(content); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(dir, dentry); - inode_unlock(dir); + simple_done_creating(dentry); } #else static inline void _nfsd_symlink(struct dentry *parent, const char *name, @@ -1228,11 +1228,11 @@ static int nfsdfs_create_files(struct dentry *root, kref_get(&ncl->cl_ref); inode->i_fop = files->ops; inode->i_private = ncl; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(dir, dentry); if (fdentries) - fdentries[i] = dentry; - inode_unlock(dir); + fdentries[i] = dentry; // borrowed + simple_done_creating(dentry); } return 0; } @@ -1346,7 +1346,7 @@ static void nfsd_umount(struct super_block *sb) nfsd_shutdown_threads(net); - kill_litter_super(sb); + kill_anon_super(sb); put_net(net); } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index b752433c3c2c..e4263326ca4a 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -160,6 +160,7 @@ enum { /* Any new NFSD_IO enum value must be added at the end */ NFSD_IO_BUFFERED, NFSD_IO_DONTCACHE, + NFSD_IO_DIRECT, }; extern u64 nfsd_io_cache_read __read_mostly; @@ -397,14 +398,13 @@ enum { #define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT /* - * The following attributes are currently not supported by the NFSv4 server: + * The following attributes are not implemented by NFSD: * ARCHIVE (deprecated anyway) * HIDDEN (unlikely to be supported any time soon) * MIMETYPE (unlikely to be supported any time soon) * QUOTA_* (will be supported in a forthcoming patch) * SYSTEM (unlikely to be supported any time soon) * TIME_BACKUP (unlikely to be supported any time soon) - * TIME_CREATE (unlikely to be supported any time soon) */ #define NFSD4_SUPPORTED_ATTRS_WORD0 \ (FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7057ddd7a0a8..b08ae85d53ef 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -249,27 +249,6 @@ int nfsd_nrthreads(struct net *net) return rv; } -static int nfsd_init_socks(struct net *net, const struct cred *cred) -{ - int error; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - if (!list_empty(&nn->nfsd_serv->sv_permsocks)) - return 0; - - error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); - if (error < 0) - return error; - - error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); - if (error < 0) - return error; - - return 0; -} - static int nfsd_users = 0; static int nfsd_startup_generic(void) @@ -377,9 +356,12 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) ret = nfsd_startup_generic(); if (ret) return ret; - ret = nfsd_init_socks(net, cred); - if (ret) + + if (list_empty(&nn->nfsd_serv->sv_permsocks)) { + pr_warn("NFSD: Failed to start, no listeners configured.\n"); + ret = -EIO; goto out_socks; + } if (nfsd_needs_lockd(nn) && !nn->lockd_up) { ret = lockd_up(net, cred); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 6e2c8e2aab10..5ae2a611e57f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -464,10 +464,13 @@ DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ DEFINE_NFSD_IO_EVENT(read_start); DEFINE_NFSD_IO_EVENT(read_splice); DEFINE_NFSD_IO_EVENT(read_vector); +DEFINE_NFSD_IO_EVENT(read_direct); DEFINE_NFSD_IO_EVENT(read_io_done); DEFINE_NFSD_IO_EVENT(read_done); DEFINE_NFSD_IO_EVENT(write_start); DEFINE_NFSD_IO_EVENT(write_opened); +DEFINE_NFSD_IO_EVENT(write_direct); +DEFINE_NFSD_IO_EVENT(write_vector); DEFINE_NFSD_IO_EVENT(write_io_done); DEFINE_NFSD_IO_EVENT(write_done); DEFINE_NFSD_IO_EVENT(commit_start); @@ -2613,6 +2616,44 @@ DEFINE_EVENT(nfsd_vfs_getattr_class, __name, \ DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr); DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs); +DECLARE_EVENT_CLASS(nfsd_pnfs_class, + TP_PROTO( + const struct nfs4_client *clp, + const char *dev, + int error + ), + TP_ARGS(clp, dev, error), + TP_STRUCT__entry( + __sockaddr(addr, sizeof(struct sockaddr_in6)) + __field(unsigned int, netns_ino) + __string(dev, dev) + __field(int, error) + ), + TP_fast_assign( + __assign_sockaddr(addr, &clp->cl_addr, + sizeof(struct sockaddr_in6)); + __entry->netns_ino = clp->net->ns.inum; + __assign_str(dev); + __entry->error = error; + ), + TP_printk("client=%pISpc nn=%d dev=%s error=%d", + __get_sockaddr(addr), + __entry->netns_ino, + __get_str(dev), + __entry->error + ) +); + +#define DEFINE_NFSD_PNFS_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_pnfs_class, nfsd_pnfs_##name, \ + TP_PROTO( \ + const struct nfs4_client *clp, \ + const char *dev, \ + int error \ + ), \ + TP_ARGS(clp, dev, error)) + +DEFINE_NFSD_PNFS_ERR_EVENT(fence); #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 31cbf46b47b1..964cf922ad83 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1075,11 +1075,88 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } +/* + * The byte range of the client's READ request is expanded on both ends + * until it meets the underlying file system's direct I/O alignment + * requirements. After the internal read is complete, the byte range of + * the NFS READ payload is reduced to the byte range that was originally + * requested. + * + * Note that a direct read can be done only when the xdr_buf containing + * the NFS READ reply does not already have contents in its .pages array. + * This is due to potentially restrictive alignment requirements on the + * read buffer. When .page_len and @base are zero, the .pages array is + * guaranteed to be page-aligned. + */ +static noinline_for_stack __be32 +nfsd_direct_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file *nf, loff_t offset, unsigned long *count, + u32 *eof) +{ + u64 dio_start, dio_end; + unsigned long v, total; + struct iov_iter iter; + struct kiocb kiocb; + ssize_t host_err; + size_t len; + + init_sync_kiocb(&kiocb, nf->nf_file); + kiocb.ki_flags |= IOCB_DIRECT; + + /* Read a properly-aligned region of bytes into rq_bvec */ + dio_start = round_down(offset, nf->nf_dio_read_offset_align); + dio_end = round_up((u64)offset + *count, nf->nf_dio_read_offset_align); + + kiocb.ki_pos = dio_start; + + v = 0; + total = dio_end - dio_start; + while (total && v < rqstp->rq_maxpages && + rqstp->rq_next_page < rqstp->rq_page_end) { + len = min_t(size_t, total, PAGE_SIZE); + bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page, + len, 0); + + total -= len; + ++rqstp->rq_next_page; + ++v; + } + + trace_nfsd_read_direct(rqstp, fhp, offset, *count - total); + iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, + dio_end - dio_start - total); + + host_err = vfs_iocb_iter_read(nf->nf_file, &kiocb, &iter); + if (host_err >= 0) { + unsigned int pad = offset - dio_start; + + /* The returned payload starts after the pad */ + rqstp->rq_res.page_base = pad; + + /* Compute the count of bytes to be returned */ + if (host_err > pad + *count) + host_err = *count; + else if (host_err > pad) + host_err -= pad; + else + host_err = 0; + } else if (unlikely(host_err == -EINVAL)) { + struct inode *inode = d_inode(fhp->fh_dentry); + + pr_info_ratelimited("nfsd: Direct I/O alignment failure on %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + host_err = -ESERVERFAULT; + } + + return nfsd_finish_read(rqstp, fhp, nf->nf_file, offset, count, + eof, host_err); +} + /** * nfsd_iter_read - Perform a VFS read using an iterator * @rqstp: RPC transaction context * @fhp: file handle of file to be read - * @file: opened struct file of file to be read + * @nf: opened struct nfsd_file of file to be read * @offset: starting byte offset * @count: IN: requested number of bytes; OUT: number of bytes read * @base: offset in first page of read buffer @@ -1092,9 +1169,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, * returned. */ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, unsigned long *count, + struct nfsd_file *nf, loff_t offset, unsigned long *count, unsigned int base, u32 *eof) { + struct file *file = nf->nf_file; unsigned long v, total; struct iov_iter iter; struct kiocb kiocb; @@ -1106,6 +1184,12 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, switch (nfsd_io_cache_read) { case NFSD_IO_BUFFERED: break; + case NFSD_IO_DIRECT: + /* When dio_read_offset_align is zero, dio is not supported */ + if (nf->nf_dio_read_offset_align && !rqstp->rq_res.page_len) + return nfsd_direct_read(rqstp, fhp, nf, offset, + count, eof); + fallthrough; case NFSD_IO_DONTCACHE: if (file->f_op->fop_flags & FOP_DONTCACHE) kiocb.ki_flags = IOCB_DONTCACHE; @@ -1116,18 +1200,20 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, v = 0; total = *count; - while (total) { + while (total && v < rqstp->rq_maxpages && + rqstp->rq_next_page < rqstp->rq_page_end) { len = min_t(size_t, total, PAGE_SIZE - base); - bvec_set_page(&rqstp->rq_bvec[v], *(rqstp->rq_next_page++), + bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page, len, base); + total -= len; + ++rqstp->rq_next_page; ++v; base = 0; } - WARN_ON_ONCE(v > rqstp->rq_maxpages); - trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count); + trace_nfsd_read_vector(rqstp, fhp, offset, *count - total); + iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count - total); host_err = vfs_iocb_iter_read(file, &kiocb, &iter); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } @@ -1169,6 +1255,136 @@ static int wait_for_concurrent_writes(struct file *file) return err; } +struct nfsd_write_dio_seg { + struct iov_iter iter; + int flags; +}; + +static unsigned long +iov_iter_bvec_offset(const struct iov_iter *iter) +{ + return (unsigned long)(iter->bvec->bv_offset + iter->iov_offset); +} + +static void +nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment, + struct bio_vec *bvec, unsigned int nvecs, + unsigned long total, size_t start, size_t len, + struct kiocb *iocb) +{ + iov_iter_bvec(&segment->iter, ITER_SOURCE, bvec, nvecs, total); + if (start) + iov_iter_advance(&segment->iter, start); + iov_iter_truncate(&segment->iter, len); + segment->flags = iocb->ki_flags; +} + +static unsigned int +nfsd_write_dio_iters_init(struct nfsd_file *nf, struct bio_vec *bvec, + unsigned int nvecs, struct kiocb *iocb, + unsigned long total, + struct nfsd_write_dio_seg segments[3]) +{ + u32 offset_align = nf->nf_dio_offset_align; + loff_t prefix_end, orig_end, middle_end; + u32 mem_align = nf->nf_dio_mem_align; + size_t prefix, middle, suffix; + loff_t offset = iocb->ki_pos; + unsigned int nsegs = 0; + + /* + * Check if direct I/O is feasible for this write request. + * If alignments are not available, the write is too small, + * or no alignment can be found, fall back to buffered I/O. + */ + if (unlikely(!mem_align || !offset_align) || + unlikely(total < max(offset_align, mem_align))) + goto no_dio; + + prefix_end = round_up(offset, offset_align); + orig_end = offset + total; + middle_end = round_down(orig_end, offset_align); + + prefix = prefix_end - offset; + middle = middle_end - prefix_end; + suffix = orig_end - middle_end; + + if (!middle) + goto no_dio; + + if (prefix) + nfsd_write_dio_seg_init(&segments[nsegs++], bvec, + nvecs, total, 0, prefix, iocb); + + nfsd_write_dio_seg_init(&segments[nsegs], bvec, nvecs, + total, prefix, middle, iocb); + + /* + * Check if the bvec iterator is aligned for direct I/O. + * + * bvecs generated from RPC receive buffers are contiguous: After + * the first bvec, all subsequent bvecs start at bv_offset zero + * (page-aligned). Therefore, only the first bvec is checked. + */ + if (iov_iter_bvec_offset(&segments[nsegs].iter) & (mem_align - 1)) + goto no_dio; + segments[nsegs].flags |= IOCB_DIRECT; + nsegs++; + + if (suffix) + nfsd_write_dio_seg_init(&segments[nsegs++], bvec, nvecs, total, + prefix + middle, suffix, iocb); + + return nsegs; + +no_dio: + /* No DIO alignment possible - pack into single non-DIO segment. */ + nfsd_write_dio_seg_init(&segments[0], bvec, nvecs, total, 0, + total, iocb); + return 1; +} + +static noinline_for_stack int +nfsd_direct_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file *nf, unsigned int nvecs, + unsigned long *cnt, struct kiocb *kiocb) +{ + struct nfsd_write_dio_seg segments[3]; + struct file *file = nf->nf_file; + unsigned int nsegs, i; + ssize_t host_err; + + nsegs = nfsd_write_dio_iters_init(nf, rqstp->rq_bvec, nvecs, + kiocb, *cnt, segments); + + *cnt = 0; + for (i = 0; i < nsegs; i++) { + kiocb->ki_flags = segments[i].flags; + if (kiocb->ki_flags & IOCB_DIRECT) + trace_nfsd_write_direct(rqstp, fhp, kiocb->ki_pos, + segments[i].iter.count); + else { + trace_nfsd_write_vector(rqstp, fhp, kiocb->ki_pos, + segments[i].iter.count); + /* + * Mark the I/O buffer as evict-able to reduce + * memory contention. + */ + if (nf->nf_file->f_op->fop_flags & FOP_DONTCACHE) + kiocb->ki_flags |= IOCB_DONTCACHE; + } + + host_err = vfs_iocb_iter_write(file, kiocb, &segments[i].iter); + if (host_err < 0) + return host_err; + *cnt += host_err; + if (host_err < segments[i].iter.count) + break; /* partial write */ + } + + return 0; +} + /** * nfsd_vfs_write - write data to an already-open file * @rqstp: RPC execution context @@ -1229,29 +1445,46 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, stable = NFS_UNSTABLE; init_sync_kiocb(&kiocb, file); kiocb.ki_pos = offset; - if (stable && !fhp->fh_use_wgather) - kiocb.ki_flags |= IOCB_DSYNC; + if (likely(!fhp->fh_use_wgather)) { + switch (stable) { + case NFS_FILE_SYNC: + /* persist data and timestamps */ + kiocb.ki_flags |= IOCB_DSYNC | IOCB_SYNC; + break; + case NFS_DATA_SYNC: + /* persist data only */ + kiocb.ki_flags |= IOCB_DSYNC; + break; + } + } nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload); - iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt); + since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); switch (nfsd_io_cache_write) { - case NFSD_IO_BUFFERED: + case NFSD_IO_DIRECT: + host_err = nfsd_direct_write(rqstp, fhp, nf, nvecs, + cnt, &kiocb); break; case NFSD_IO_DONTCACHE: if (file->f_op->fop_flags & FOP_DONTCACHE) kiocb.ki_flags |= IOCB_DONTCACHE; + fallthrough; + case NFSD_IO_BUFFERED: + iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt); + host_err = vfs_iocb_iter_write(file, &kiocb, &iter); + if (host_err < 0) + break; + *cnt = host_err; break; } - host_err = vfs_iocb_iter_write(file, &kiocb, &iter); if (host_err < 0) { commit_reset_write_verifier(nn, rqstp, host_err); goto out_nfserr; } - *cnt = host_err; nfsd_stats_io_write_add(nn, exp, *cnt); fsnotify_modify(file); host_err = filemap_check_wb_err(file->f_mapping, since); @@ -1335,7 +1568,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, if (file->f_op->splice_read && nfsd_read_splice_ok(rqstp)) err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof); else - err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof); + err = nfsd_iter_read(rqstp, fhp, nf, offset, count, 0, eof); nfsd_file_put(nf); trace_nfsd_read_done(rqstp, fhp, offset, *count); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 09de48c50cbe..ded2900d423f 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -121,7 +121,7 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long *count, u32 *eof); __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, + struct nfsd_file *nf, loff_t offset, unsigned long *count, unsigned int base, u32 *eof); bool nfsd_read_splice_ok(struct svc_rqst *rqstp); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 1ce8e12ae335..ae75846b3cd7 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -924,27 +924,6 @@ struct nfsd4_compoundres { struct nfsd4_compound_state cstate; }; -static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) -{ - struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; - return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; -} - -/* - * The session reply cache only needs to cache replies that the client - * actually asked us to. But it's almost free for us to cache compounds - * consisting of only a SEQUENCE op, so we may as well cache those too. - * Also, the protocol doesn't give us a convenient response in the case - * of a replay of a solo SEQUENCE op that wasn't cached - * (RETRY_UNCACHED_REP can only be returned in the second op of a - * compound). - */ -static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp) -{ - return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) - || nfsd4_is_solo_sequence(resp); -} - static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) { struct nfsd4_compoundres *resp = rqstp->rq_resp; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 3288c3b4be9e..e17b8da66491 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -49,7 +49,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, void *, size_t, size_t)) { void *buf; - void __user *base = (void __user *)(unsigned long)argv->v_base; + void __user *base = u64_to_user_ptr(argv->v_base); size_t maxmembs, total, n; ssize_t nr; int ret, i; @@ -836,7 +836,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, sizeof(struct nilfs_bdesc), sizeof(__u64), }; - void __user *base; void *kbufs[5]; struct the_nilfs *nilfs; size_t len, nsegs; @@ -863,7 +862,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, * use kmalloc() for its buffer because the memory used for the * segment numbers is small enough. */ - kbufs[4] = memdup_array_user((void __user *)(unsigned long)argv[4].v_base, + kbufs[4] = memdup_array_user(u64_to_user_ptr(argv[4].v_base), nsegs, sizeof(__u64)); if (IS_ERR(kbufs[4])) { ret = PTR_ERR(kbufs[4]); @@ -883,20 +882,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; len = argv[n].v_size * argv[n].v_nmembs; - base = (void __user *)(unsigned long)argv[n].v_base; if (len == 0) { kbufs[n] = NULL; continue; } - kbufs[n] = vmalloc(len); - if (!kbufs[n]) { - ret = -ENOMEM; - goto out_free; - } - if (copy_from_user(kbufs[n], base, len)) { - ret = -EFAULT; - vfree(kbufs[n]); + kbufs[n] = vmemdup_user(u64_to_user_ptr(argv[n].v_base), len); + if (IS_ERR(kbufs[n])) { + ret = PTR_ERR(kbufs[n]); goto out_free; } } @@ -928,7 +921,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, out_free: while (--n >= 0) - vfree(kbufs[n]); + kvfree(kbufs[n]); kfree(kbufs[4]); out: mnt_drop_write_file(filp); @@ -1181,7 +1174,6 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, struct nilfs_transaction_info ti; struct nilfs_argv argv; size_t len; - void __user *base; void *kbuf; int ret; @@ -1212,18 +1204,12 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, goto out; } - base = (void __user *)(unsigned long)argv.v_base; - kbuf = vmalloc(len); - if (!kbuf) { - ret = -ENOMEM; + kbuf = vmemdup_user(u64_to_user_ptr(argv.v_base), len); + if (IS_ERR(kbuf)) { + ret = PTR_ERR(kbuf); goto out; } - if (copy_from_user(kbuf, base, len)) { - ret = -EFAULT; - goto out_free; - } - nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size, argv.v_nmembs); @@ -1232,8 +1218,7 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, else nilfs_transaction_commit(inode->i_sb); /* never fails */ -out_free: - vfree(kbuf); + kvfree(kbuf); out: mnt_drop_write_file(filp); return ret; diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 18d597e49a19..a5c3a9f1b8dc 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -67,19 +67,22 @@ int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu) l &= t->lmask; if (l < t->lval || l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) - return -1; + return -EILSEQ; + *pu = (unicode_t) l; return nc; } if (inlen <= nc) - return -1; + return -EOVERFLOW; + s++; c = (*s ^ 0x80) & 0xFF; if (c & 0xC0) - return -1; + return -EILSEQ; + l = (l << 6) | c; } - return -1; + return -EILSEQ; } EXPORT_SYMBOL(utf8_to_utf32); @@ -94,7 +97,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout) l = u; if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) - return -1; + return -EILSEQ; nc = 0; for (t = utf8_table; t->cmask && maxout; t++, maxout--) { @@ -110,7 +113,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout) return nc; } } - return -1; + return -EOVERFLOW; } EXPORT_SYMBOL(utf32_to_utf8); @@ -217,8 +220,16 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, inlen--; } size = utf32_to_utf8(u, op, maxout); - if (size == -1) { - /* Ignore character and move on */ + if (size < 0) { + if (size == -EILSEQ) { + /* Ignore character and move on */ + continue; + } + /* + * Stop filling the buffer with data once a character + * does not fit anymore. + */ + break; } else { op += size; maxout -= size; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 5016bccc2ac5..2e7b2e566ebe 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -379,7 +379,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) if (rw) { u64 to = min_t(loff_t, i_size_read(inode), - from + desc->end - desc->start); + from + vma_desc_size(desc)); if (is_sparsed(ni)) { /* Allocate clusters for rw map. */ diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8c9c4825f984..2785ff245e79 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -302,8 +302,21 @@ static int ocfs2_check_dir_entry(struct inode *dir, unsigned long offset) { const char *error_msg = NULL; - const int rlen = le16_to_cpu(de->rec_len); - const unsigned long next_offset = ((char *) de - buf) + rlen; + unsigned long next_offset; + int rlen; + + if (offset > size - OCFS2_DIR_REC_LEN(1)) { + /* Dirent is (maybe partially) beyond the buffer + * boundaries so touching 'de' members is unsafe. + */ + mlog(ML_ERROR, "directory entry (#%llu: offset=%lu) " + "too close to end or out-of-bounds", + (unsigned long long)OCFS2_I(dir)->ip_blkno, offset); + return 0; + } + + rlen = le16_to_cpu(de->rec_len); + next_offset = ((char *) de - buf) + rlen; if (unlikely(rlen < OCFS2_DIR_REC_LEN(1))) error_msg = "rec_len is smaller than minimal"; @@ -778,6 +791,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, struct ocfs2_extent_block *eb; struct ocfs2_extent_rec *rec = NULL; + if (le16_to_cpu(el->l_count) != + ocfs2_extent_recs_per_dx_root(inode->i_sb)) { + ret = ocfs2_error(inode->i_sb, + "Inode %lu has invalid extent list length %u\n", + inode->i_ino, le16_to_cpu(el->l_count)); + goto out; + } + if (el->l_tree_depth) { ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash, &eb_bh); @@ -3423,6 +3444,14 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, offset += le16_to_cpu(de->rec_len); } + if (!last_de) { + ret = ocfs2_error(sb, "Directory entry (#%llu: size=%lld) " + "is unexpectedly short", + (unsigned long long)OCFS2_I(dir)->ip_blkno, + i_size_read(dir)); + goto out; + } + /* * We're going to require expansion of the directory - figure * out how many blocks we'll need so that a place for the @@ -4104,10 +4133,15 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, } dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE; - memset(&dx_root->dr_list, 0, osb->sb->s_blocksize - - offsetof(struct ocfs2_dx_root_block, dr_list)); + + dx_root->dr_list.l_tree_depth = 0; dx_root->dr_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); + dx_root->dr_list.l_next_free_rec = 0; + memset(&dx_root->dr_list.l_recs, 0, + osb->sb->s_blocksize - + (offsetof(struct ocfs2_dx_root_block, dr_list) + + offsetof(struct ocfs2_extent_list, l_recs))); /* This should never fail considering we start with an empty * dx_root. */ diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index cccaa1d6fbba..339f0b11cdc8 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -441,8 +441,7 @@ static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap, ip->ip_conn = conn; inc_nlink(dir); - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); status = 0; bail: @@ -480,8 +479,7 @@ static int dlmfs_create(struct mnt_idmap *idmap, goto bail; } - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); bail: return status; } @@ -574,7 +572,7 @@ static int dlmfs_init_fs_context(struct fs_context *fc) static struct file_system_type dlmfs_fs_type = { .owner = THIS_MODULE, .name = "ocfs2_dlmfs", - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .init_fs_context = dlmfs_init_fs_context, }; MODULE_ALIAS_FS("ocfs2_dlmfs"); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 78f81950c9ee..8340525e5589 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -201,13 +201,15 @@ bail: static int ocfs2_dinode_has_extents(struct ocfs2_dinode *di) { /* inodes flagged with other stuff in id2 */ - if (di->i_flags & (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL | - OCFS2_CHAIN_FL | OCFS2_DEALLOC_FL)) + if (le32_to_cpu(di->i_flags) & + (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL | OCFS2_CHAIN_FL | + OCFS2_DEALLOC_FL)) return 0; /* i_flags doesn't indicate when id2 is a fast symlink */ - if (S_ISLNK(di->i_mode) && di->i_size && di->i_clusters == 0) + if (S_ISLNK(le16_to_cpu(di->i_mode)) && le64_to_cpu(di->i_size) && + !le32_to_cpu(di->i_clusters)) return 0; - if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) + if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) return 0; return 1; @@ -1460,7 +1462,7 @@ int ocfs2_validate_inode_block(struct super_block *sb, goto bail; } - if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { + if (!(le32_to_cpu(di->i_flags) & OCFS2_VALID_FL)) { rc = ocfs2_error(sb, "Invalid dinode #%llu: OCFS2_VALID_FL not set\n", (unsigned long long)bh->b_blocknr); @@ -1484,6 +1486,41 @@ int ocfs2_validate_inode_block(struct super_block *sb, goto bail; } + if ((le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) && + le32_to_cpu(di->i_clusters)) { + rc = ocfs2_error(sb, "Invalid dinode %llu: %u clusters\n", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(di->i_clusters)); + goto bail; + } + + if (le32_to_cpu(di->i_flags) & OCFS2_CHAIN_FL) { + struct ocfs2_chain_list *cl = &di->id2.i_chain; + u16 bpc = 1 << (OCFS2_SB(sb)->s_clustersize_bits - + sb->s_blocksize_bits); + + if (le16_to_cpu(cl->cl_count) != ocfs2_chain_recs_per_inode(sb)) { + rc = ocfs2_error(sb, "Invalid dinode %llu: chain list count %u\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(cl->cl_count)); + goto bail; + } + if (le16_to_cpu(cl->cl_next_free_rec) > le16_to_cpu(cl->cl_count)) { + rc = ocfs2_error(sb, "Invalid dinode %llu: chain list index %u\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(cl->cl_next_free_rec)); + goto bail; + } + if (OCFS2_SB(sb)->bitmap_blkno && + OCFS2_SB(sb)->bitmap_blkno != le64_to_cpu(di->i_blkno) && + le16_to_cpu(cl->cl_bpc) != bpc) { + rc = ocfs2_error(sb, "Invalid dinode %llu: bits per cluster %u\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(cl->cl_bpc)); + goto bail; + } + } + rc = 0; bail: @@ -1671,6 +1708,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno, 1, &tmp, flags, ocfs2_validate_inode_block); + if (rc < 0) + make_bad_inode(inode); /* If ocfs2_read_blocks() got us a new bh, pass it up. */ if (!rc && !*bh) *bh = tmp; diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 10923bf7c8b8..ce978a2497d9 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -98,7 +98,13 @@ static int __ocfs2_move_extent(handle_t *handle, rec = &el->l_recs[index]; - BUG_ON(ext_flags != rec->e_flags); + if (ext_flags != rec->e_flags) { + ret = ocfs2_error(inode->i_sb, + "Inode %llu has corrupted extent %d with flags 0x%x at cpos %u\n", + (unsigned long long)ino, index, rec->e_flags, cpos); + goto out; + } + /* * after moving/defraging to new location, the extent is not going * to be refcounted anymore. @@ -1036,6 +1042,12 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) if (range.me_threshold > i_size_read(inode)) range.me_threshold = i_size_read(inode); + if (range.me_flags & ~(OCFS2_MOVE_EXT_FL_AUTO_DEFRAG | + OCFS2_MOVE_EXT_FL_PART_DEFRAG)) { + status = -EINVAL; + goto out_free; + } + if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { context->auto_defrag = 1; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index ae0e44e5f2ad..f7763da5c4a2 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -468,7 +468,8 @@ struct ocfs2_extent_list { __le16 l_reserved1; __le64 l_reserved2; /* Pad to sizeof(ocfs2_extent_rec) */ -/*10*/ struct ocfs2_extent_rec l_recs[]; /* Extent records */ + /* Extent records */ +/*10*/ struct ocfs2_extent_rec l_recs[] __counted_by_le(l_count); }; /* @@ -482,7 +483,8 @@ struct ocfs2_chain_list { __le16 cl_count; /* Total chains in this list */ __le16 cl_next_free_rec; /* Next unused chain slot */ __le64 cl_reserved1; -/*10*/ struct ocfs2_chain_rec cl_recs[]; /* Chain records */ + /* Chain records */ +/*10*/ struct ocfs2_chain_rec cl_recs[] __counted_by_le(cl_count); }; /* @@ -494,7 +496,8 @@ struct ocfs2_truncate_log { /*00*/ __le16 tl_count; /* Total records in this log */ __le16 tl_used; /* Number of records in use */ __le32 tl_reserved1; -/*08*/ struct ocfs2_truncate_rec tl_recs[]; /* Truncate records */ + /* Truncate records */ +/*08*/ struct ocfs2_truncate_rec tl_recs[] __counted_by_le(tl_count); }; /* @@ -796,9 +799,10 @@ struct ocfs2_dx_entry_list { * possible in de_entries */ __le16 de_num_used; /* Current number of * de_entries entries */ - struct ocfs2_dx_entry de_entries[]; /* Indexed dir entries - * in a packed array of - * length de_num_used */ + /* Indexed dir entries in a packed + * array of length de_num_used. + */ + struct ocfs2_dx_entry de_entries[] __counted_by_le(de_count); }; #define OCFS2_DX_FLAG_INLINE 0x01 @@ -934,7 +938,8 @@ struct ocfs2_refcount_list { __le16 rl_used; /* Current number of used records */ __le32 rl_reserved2; __le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */ -/*10*/ struct ocfs2_refcount_rec rl_recs[]; /* Refcount records */ + /* Refcount records */ +/*10*/ struct ocfs2_refcount_rec rl_recs[] __counted_by_le(rl_count); }; @@ -1020,7 +1025,8 @@ struct ocfs2_xattr_header { buckets. A block uses xb_check and sets this field to zero.) */ - struct ocfs2_xattr_entry xh_entries[]; /* xattr entry list. */ + /* xattr entry list. */ + struct ocfs2_xattr_entry xh_entries[] __counted_by_le(xh_count); }; /* diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 267b50e8e42e..c92e0ea85bca 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -34,6 +34,7 @@ #include <linux/pagevec.h> #include <linux/swap.h> #include <linux/security.h> +#include <linux/string.h> #include <linux/fsnotify.h> #include <linux/quotaops.h> #include <linux/namei.h> @@ -621,7 +622,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, /* Initialize ocfs2_refcount_block. */ rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(rb, 0, inode->i_sb->s_blocksize); - strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); + strscpy(rb->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE); rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); @@ -1562,7 +1563,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, /* Initialize ocfs2_refcount_block. */ new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(new_rb, 0, sb->s_blocksize); - strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); + strscpy(new_rb->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE); new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d70a20d29e3e..dc1761e84814 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2908,7 +2908,7 @@ static int ocfs2_create_xattr_block(struct inode *inode, /* Initialize ocfs2_xattr_block */ xblk = (struct ocfs2_xattr_block *)new_bh->b_data; memset(xblk, 0, inode->i_sb->s_blocksize); - strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); + strscpy(xblk->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE); xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); @@ -6351,7 +6351,7 @@ static int ocfs2_reflink_xattr_header(handle_t *handle, trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count)); - last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; + last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1; for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { xe = &xh->xh_entries[i]; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 06b860b9ded6..ff3dbd1ca61f 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -581,7 +581,8 @@ out_cleanup_unlocked: goto out_dput; } -static const struct cred *ovl_override_creator_creds(struct dentry *dentry, struct inode *inode, umode_t mode) +static const struct cred *ovl_override_creator_creds(const struct cred *original_creds, + struct dentry *dentry, struct inode *inode, umode_t mode) { int err; @@ -596,7 +597,7 @@ static const struct cred *ovl_override_creator_creds(struct dentry *dentry, stru override_cred->fsgid = inode->i_gid; err = security_dentry_create_files_as(dentry, mode, &dentry->d_name, - current->cred, override_cred); + original_creds, override_cred); if (err) return ERR_PTR(err); @@ -614,8 +615,11 @@ static void ovl_revert_creator_creds(const struct cred *old_cred) DEFINE_CLASS(ovl_override_creator_creds, const struct cred *, if (!IS_ERR_OR_NULL(_T)) ovl_revert_creator_creds(_T), - ovl_override_creator_creds(dentry, inode, mode), - struct dentry *dentry, struct inode *inode, umode_t mode) + ovl_override_creator_creds(original_creds, dentry, inode, mode), + const struct cred *original_creds, + struct dentry *dentry, + struct inode *inode, + umode_t mode) static int ovl_create_handle_whiteouts(struct dentry *dentry, struct inode *inode, @@ -633,7 +637,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, int err; struct dentry *parent = dentry->d_parent; - with_ovl_creds(dentry->d_sb) { + scoped_class(override_creds_ovl, original_creds, dentry->d_sb) { /* * When linking a file with copy up origin into a new parent, mark the * new parent dir "impure". @@ -661,7 +665,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, if (attr->hardlink) return ovl_create_handle_whiteouts(dentry, inode, attr); - scoped_class(ovl_override_creator_creds, cred, dentry, inode, attr->mode) { + scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, attr->mode) { if (IS_ERR(cred)) return PTR_ERR(cred); return ovl_create_handle_whiteouts(dentry, inode, attr); @@ -1364,8 +1368,8 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry, int flags = file->f_flags | OVL_OPEN_FLAGS; int err; - with_ovl_creds(dentry->d_sb) { - scoped_class(ovl_override_creator_creds, cred, dentry, inode, mode) { + scoped_class(override_creds_ovl, original_creds, dentry->d_sb) { + scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, mode) { if (IS_ERR(cred)) return PTR_ERR(cred); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 28b2f707cfbc..ba9146f22a2c 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -128,9 +128,17 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry, unsigned int i; int ret = 1; - /* Careful in RCU mode */ - if (!inode) + if (!inode) { + /* + * Lookup of negative dentries will call ovl_dentry_init_flags() + * with NULL upperdentry and NULL oe, resulting in the + * DCACHE_OP*_REVALIDATE flags being cleared. Hence the only + * way to get a negative inode is due to a race with dentry + * destruction. + */ + WARN_ON(!(flags & LOOKUP_RCU)); return -ECHILD; + } oe = OVL_I_E(inode); lowerstack = ovl_lowerstack(oe); diff --git a/fs/pipe.c b/fs/pipe.c index 2d0fed2ecbfd..9e6a01475815 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1481,31 +1481,16 @@ static struct file_system_type pipe_fs_type = { }; #ifdef CONFIG_SYSCTL -static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, - unsigned int *valp, - int write, void *data) -{ - if (write) { - unsigned int val; - - val = round_pipe_size(*lvalp); - if (val == 0) - return -EINVAL; - - *valp = val; - } else { - unsigned int val = *valp; - *lvalp = (unsigned long) val; - } - - return 0; -} +static SYSCTL_USER_TO_KERN_UINT_CONV(_pipe_maxsz, round_pipe_size) +static SYSCTL_UINT_CONV_CUSTOM(_pipe_maxsz, + sysctl_user_to_kern_uint_conv_pipe_maxsz, + sysctl_kern_to_user_uint_conv, true) static int proc_dopipe_max_size(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_douintvec(table, write, buffer, lenp, ppos, - do_proc_dopipe_max_size_conv, NULL); + return proc_douintvec_conv(table, write, buffer, lenp, ppos, + do_proc_uint_conv_pipe_maxsz); } static const struct ctl_table fs_pipe_sysctls[] = { @@ -1515,6 +1500,7 @@ static const struct ctl_table fs_pipe_sysctls[] = { .maxlen = sizeof(pipe_max_size), .mode = 0644, .proc_handler = proc_dopipe_max_size, + .extra1 = SYSCTL_ONE, }, { .procname = "pipe-user-pages-hard", diff --git a/fs/proc/array.c b/fs/proc/array.c index cbd4bc4a58e4..42932f88141a 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -537,27 +537,27 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); - scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) { - cmin_flt = sig->cmin_flt; - cmaj_flt = sig->cmaj_flt; - cutime = sig->cutime; - cstime = sig->cstime; - cgtime = sig->cgtime; - - if (whole) { - struct task_struct *t; - - min_flt = sig->min_flt; - maj_flt = sig->maj_flt; - gtime = sig->gtime; - - rcu_read_lock(); - __for_each_thread(sig, t) { - min_flt += t->min_flt; - maj_flt += t->maj_flt; - gtime += task_gtime(t); + scoped_guard(rcu) { + scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) { + cmin_flt = sig->cmin_flt; + cmaj_flt = sig->cmaj_flt; + cutime = sig->cutime; + cstime = sig->cstime; + cgtime = sig->cgtime; + + if (whole) { + struct task_struct *t; + + min_flt = sig->min_flt; + maj_flt = sig->maj_flt; + gtime = sig->gtime; + + __for_each_thread(sig, t) { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + gtime += task_gtime(t); + } } - rcu_read_unlock(); } } diff --git a/fs/proc/base.c b/fs/proc/base.c index 407b41cb6e7c..4eec684baca9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3578,14 +3578,12 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) return 0; if (pos == TGID_OFFSET - 2) { - struct inode *inode = d_inode(fs_info->proc_self); - if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) + if (!dir_emit(ctx, "self", 4, self_inum, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } if (pos == TGID_OFFSET - 1) { - struct inode *inode = d_inode(fs_info->proc_thread_self); - if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) + if (!dir_emit(ctx, "thread-self", 11, thread_self_inum, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d9b7ef122343..b7634f975d98 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -303,7 +303,7 @@ static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter) static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos) { - __auto_type read = pde->proc_ops->proc_read; + const auto read = pde->proc_ops->proc_read; if (read) return read(file, buf, count, ppos); return -EIO; @@ -325,7 +325,7 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - __auto_type write = pde->proc_ops->proc_write; + const auto write = pde->proc_ops->proc_write; if (write) return write(file, buf, count, ppos); return -EIO; @@ -347,7 +347,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts) { - __auto_type poll = pde->proc_ops->proc_poll; + const auto poll = pde->proc_ops->proc_poll; if (poll) return poll(file, pts); return DEFAULT_POLLMASK; @@ -369,7 +369,7 @@ static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) { - __auto_type ioctl = pde->proc_ops->proc_ioctl; + const auto ioctl = pde->proc_ops->proc_ioctl; if (ioctl) return ioctl(file, cmd, arg); return -ENOTTY; @@ -392,7 +392,7 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne #ifdef CONFIG_COMPAT static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) { - __auto_type compat_ioctl = pde->proc_ops->proc_compat_ioctl; + const auto compat_ioctl = pde->proc_ops->proc_compat_ioctl; if (compat_ioctl) return compat_ioctl(file, cmd, arg); return -ENOTTY; @@ -414,7 +414,7 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma) { - __auto_type mmap = pde->proc_ops->proc_mmap; + const auto mmap = pde->proc_ops->proc_mmap; if (mmap) return mmap(file, vma); return -EIO; @@ -443,7 +443,7 @@ pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned lo return pde->proc_ops->proc_get_unmapped_area(file, orig_addr, len, pgoff, flags); #ifdef CONFIG_MMU - return mm_get_unmapped_area(current->mm, file, orig_addr, len, pgoff, flags); + return mm_get_unmapped_area(file, orig_addr, len, pgoff, flags); #endif return orig_addr; @@ -497,7 +497,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (!use_pde(pde)) return -ENOENT; - __auto_type release = pde->proc_ops->proc_release; + const auto release = pde->proc_ops->proc_release; if (release) { pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); if (!pdeo) { @@ -534,10 +534,9 @@ static int proc_reg_release(struct inode *inode, struct file *file) struct pde_opener *pdeo; if (pde_is_permanent(pde)) { - __auto_type release = pde->proc_ops->proc_release; - if (release) { + const auto release = pde->proc_ops->proc_release; + if (release) return release(inode, file); - } return 0; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d1598576506c..c1e8eb984da8 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -373,6 +373,7 @@ static inline void proc_tty_init(void) {} extern struct proc_dir_entry proc_root; extern void proc_self_init(void); +extern unsigned self_inum, thread_self_inum; /* * task_[no]mmu.c diff --git a/fs/proc/page.c b/fs/proc/page.c index fc64f23e05e5..f9b2c2c906cd 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -20,7 +20,6 @@ #define KPMSIZE sizeof(u64) #define KPMMASK (KPMSIZE - 1) -#define KPMBITS (KPMSIZE * BITS_PER_BYTE) enum kpage_operation { KPAGE_FLAGS, diff --git a/fs/proc/root.c b/fs/proc/root.c index 1e24e085c7d5..d8ca41d823e4 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -347,17 +347,11 @@ static void proc_kill_sb(struct super_block *sb) { struct proc_fs_info *fs_info = proc_sb_info(sb); - if (!fs_info) { - kill_anon_super(sb); - return; - } - - dput(fs_info->proc_self); - dput(fs_info->proc_thread_self); - kill_anon_super(sb); - put_pid_ns(fs_info->pid_ns); - kfree_rcu(fs_info, rcu); + if (fs_info) { + put_pid_ns(fs_info->pid_ns); + kfree_rcu(fs_info, rcu); + } } static struct file_system_type proc_fs_type = { diff --git a/fs/proc/self.c b/fs/proc/self.c index b46fbfd22681..62d2c0cfe35c 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -31,12 +31,11 @@ static const struct inode_operations proc_self_inode_operations = { .get_link = proc_self_get_link, }; -static unsigned self_inum __ro_after_init; +unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *self; int ret = -ENOMEM; @@ -51,18 +50,15 @@ int proc_setup_self(struct super_block *s) inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_self_inode_operations; - d_add(self, inode); + d_make_persistent(self, inode); ret = 0; - } else { - dput(self); } + dput(self); } inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/self\n"); - else - fs_info->proc_self = self; return ret; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fc35a0543f01..81dfc26bfae8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -14,7 +14,7 @@ #include <linux/rmap.h> #include <linux/swap.h> #include <linux/sched/mm.h> -#include <linux/swapops.h> +#include <linux/leafops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> #include <linux/shmem_fs.h> @@ -1017,14 +1017,16 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, young = pte_young(ptent); dirty = pte_dirty(ptent); present = true; - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + } else if (pte_none(ptent)) { + smaps_pte_hole_lookup(addr, walk); + } else { + const softleaf_t entry = softleaf_from_pte(ptent); - if (!non_swap_entry(swpent)) { + if (softleaf_is_swap(entry)) { int mapcount; mss->swap += PAGE_SIZE; - mapcount = swp_swapcount(swpent); + mapcount = swp_swapcount(entry); if (mapcount >= 2) { u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; @@ -1033,14 +1035,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_pfn_swap_entry(swpent)) { - if (is_device_private_entry(swpent)) + } else if (softleaf_has_pfn(entry)) { + if (softleaf_is_device_private(entry)) present = true; - page = pfn_swap_entry_to_page(swpent); + page = softleaf_to_page(entry); } - } else { - smaps_pte_hole_lookup(addr, walk); - return; } if (!page) @@ -1060,14 +1059,16 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, bool present = false; struct folio *folio; + if (pmd_none(*pmd)) + return; if (pmd_present(*pmd)) { page = vm_normal_page_pmd(vma, addr, *pmd); present = true; - } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); + } else if (unlikely(thp_migration_supported())) { + const softleaf_t entry = softleaf_from_pmd(*pmd); - if (is_pfn_swap_entry(entry)) - page = pfn_swap_entry_to_page(entry); + if (softleaf_has_pfn(entry)) + page = softleaf_to_page(entry); } if (IS_ERR_OR_NULL(page)) return; @@ -1146,6 +1147,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_MAYSHARE)] = "ms", [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", + [ilog2(VM_MAYBE_GUARD)] = "gu", [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", @@ -1181,10 +1183,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_PKEY_BIT0)] = "", [ilog2(VM_PKEY_BIT1)] = "", [ilog2(VM_PKEY_BIT2)] = "", -#if VM_PKEY_BIT3 +#if CONFIG_ARCH_PKEY_BITS > 3 [ilog2(VM_PKEY_BIT3)] = "", #endif -#if VM_PKEY_BIT4 +#if CONFIG_ARCH_PKEY_BITS > 4 [ilog2(VM_PKEY_BIT4)] = "", #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ @@ -1230,11 +1232,11 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + } else { + const softleaf_t entry = softleaf_from_pte(ptent); - if (is_pfn_swap_entry(swpent)) - folio = pfn_swap_entry_folio(swpent); + if (softleaf_has_pfn(entry)) + folio = softleaf_to_folio(entry); } if (folio) { @@ -1582,8 +1584,6 @@ struct clear_refs_private { enum clear_refs_types type; }; -#ifdef CONFIG_MEM_SOFT_DIRTY - static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { struct folio *folio; @@ -1603,6 +1603,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { + if (!pgtable_supports_soft_dirty()) + return; /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the @@ -1611,6 +1613,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, */ pte_t ptent = ptep_get(pte); + if (pte_none(ptent)) + return; + if (pte_present(ptent)) { pte_t old_pte; @@ -1620,24 +1625,21 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); - } else if (is_swap_pte(ptent)) { + } else { ptent = pte_swp_clear_soft_dirty(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); } } -#else -static inline void clear_soft_dirty(struct vm_area_struct *vma, - unsigned long addr, pte_t *pte) -{ -} -#endif -#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { pmd_t old, pmd = *pmdp; + if (!pgtable_supports_soft_dirty()) + return; + if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ old = pmdp_invalidate(vma, addr, pmdp); @@ -1650,7 +1652,7 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, pmd = pmd_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); - } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + } else if (pmd_is_migration_entry(pmd)) { pmd = pmd_swp_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } @@ -1923,6 +1925,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, struct page *page = NULL; struct folio *folio; + if (pte_none(pte)) + goto out; + if (pte_present(pte)) { if (pm->show_pfn) frame = pte_pfn(pte); @@ -1932,32 +1937,34 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, flags |= PM_SOFT_DIRTY; if (pte_uffd_wp(pte)) flags |= PM_UFFD_WP; - } else if (is_swap_pte(pte)) { - swp_entry_t entry; + } else { + softleaf_t entry; + if (pte_swp_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; if (pte_swp_uffd_wp(pte)) flags |= PM_UFFD_WP; - entry = pte_to_swp_entry(pte); + entry = softleaf_from_pte(pte); if (pm->show_pfn) { pgoff_t offset; + /* * For PFN swap offsets, keeping the offset field * to be PFN only to be compatible with old smaps. */ - if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry); + if (softleaf_has_pfn(entry)) + offset = softleaf_to_pfn(entry); else offset = swp_offset(entry); frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; - if (is_pfn_swap_entry(entry)) - page = pfn_swap_entry_to_page(entry); - if (pte_marker_entry_uffd_wp(entry)) + if (softleaf_has_pfn(entry)) + page = softleaf_to_page(entry); + if (softleaf_is_uffd_wp_marker(entry)) flags |= PM_UFFD_WP; - if (is_guard_swp_entry(entry)) + if (softleaf_is_guard_marker(entry)) flags |= PM_GUARD_REGION; } @@ -1969,96 +1976,110 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, __folio_page_mapped_exclusively(folio, page)) flags |= PM_MMAP_EXCLUSIVE; } + +out: if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; return make_pme(frame, flags); } -static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, - struct mm_walk *walk) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, + unsigned long end, struct vm_area_struct *vma, + struct pagemapread *pm) { - struct vm_area_struct *vma = walk->vma; - struct pagemapread *pm = walk->private; - spinlock_t *ptl; - pte_t *pte, *orig_pte; + unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; + u64 flags = 0, frame = 0; + pmd_t pmd = *pmdp; + struct page *page = NULL; + struct folio *folio = NULL; int err = 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - ptl = pmd_trans_huge_lock(pmdp, vma); - if (ptl) { - unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; - u64 flags = 0, frame = 0; - pmd_t pmd = *pmdp; - struct page *page = NULL; - struct folio *folio = NULL; + if (vma->vm_flags & VM_SOFTDIRTY) + flags |= PM_SOFT_DIRTY; - if (vma->vm_flags & VM_SOFTDIRTY) - flags |= PM_SOFT_DIRTY; + if (pmd_none(pmd)) + goto populate_pagemap; - if (pmd_present(pmd)) { - page = pmd_page(pmd); + if (pmd_present(pmd)) { + page = pmd_page(pmd); - flags |= PM_PRESENT; - if (pmd_soft_dirty(pmd)) - flags |= PM_SOFT_DIRTY; - if (pmd_uffd_wp(pmd)) - flags |= PM_UFFD_WP; - if (pm->show_pfn) - frame = pmd_pfn(pmd) + idx; - } -#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - else if (is_swap_pmd(pmd)) { - swp_entry_t entry = pmd_to_swp_entry(pmd); - unsigned long offset; - - if (pm->show_pfn) { - if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry) + idx; - else - offset = swp_offset(entry) + idx; - frame = swp_type(entry) | - (offset << MAX_SWAPFILES_SHIFT); - } - flags |= PM_SWAP; - if (pmd_swp_soft_dirty(pmd)) - flags |= PM_SOFT_DIRTY; - if (pmd_swp_uffd_wp(pmd)) - flags |= PM_UFFD_WP; - VM_BUG_ON(!is_pmd_migration_entry(pmd)); - page = pfn_swap_entry_to_page(entry); - } -#endif + flags |= PM_PRESENT; + if (pmd_soft_dirty(pmd)) + flags |= PM_SOFT_DIRTY; + if (pmd_uffd_wp(pmd)) + flags |= PM_UFFD_WP; + if (pm->show_pfn) + frame = pmd_pfn(pmd) + idx; + } else if (thp_migration_supported()) { + const softleaf_t entry = softleaf_from_pmd(pmd); + unsigned long offset; - if (page) { - folio = page_folio(page); - if (!folio_test_anon(folio)) - flags |= PM_FILE; + if (pm->show_pfn) { + if (softleaf_has_pfn(entry)) + offset = softleaf_to_pfn(entry) + idx; + else + offset = swp_offset(entry) + idx; + frame = swp_type(entry) | + (offset << MAX_SWAPFILES_SHIFT); } + flags |= PM_SWAP; + if (pmd_swp_soft_dirty(pmd)) + flags |= PM_SOFT_DIRTY; + if (pmd_swp_uffd_wp(pmd)) + flags |= PM_UFFD_WP; + VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd)); + page = softleaf_to_page(entry); + } + + if (page) { + folio = page_folio(page); + if (!folio_test_anon(folio)) + flags |= PM_FILE; + } - for (; addr != end; addr += PAGE_SIZE, idx++) { - u64 cur_flags = flags; - pagemap_entry_t pme; +populate_pagemap: + for (; addr != end; addr += PAGE_SIZE, idx++) { + u64 cur_flags = flags; + pagemap_entry_t pme; - if (folio && (flags & PM_PRESENT) && - __folio_page_mapped_exclusively(folio, page)) - cur_flags |= PM_MMAP_EXCLUSIVE; + if (folio && (flags & PM_PRESENT) && + __folio_page_mapped_exclusively(folio, page)) + cur_flags |= PM_MMAP_EXCLUSIVE; - pme = make_pme(frame, cur_flags); - err = add_to_pagemap(&pme, pm); - if (err) - break; - if (pm->show_pfn) { - if (flags & PM_PRESENT) - frame++; - else if (flags & PM_SWAP) - frame += (1 << MAX_SWAPFILES_SHIFT); - } + pme = make_pme(frame, cur_flags); + err = add_to_pagemap(&pme, pm); + if (err) + break; + if (pm->show_pfn) { + if (flags & PM_PRESENT) + frame++; + else if (flags & PM_SWAP) + frame += (1 << MAX_SWAPFILES_SHIFT); } + } + return err; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + struct pagemapread *pm = walk->private; + spinlock_t *ptl; + pte_t *pte, *orig_pte; + int err = 0; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ptl = pmd_trans_huge_lock(pmdp, vma); + if (ptl) { + err = pagemap_pmd_range_thp(pmdp, addr, end, vma, pm); spin_unlock(ptl); return err; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* * We can assume that @vma always points to a valid one and @end never @@ -2310,12 +2331,16 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - unsigned long categories = 0; + unsigned long categories; + + if (pte_none(pte)) + return 0; if (pte_present(pte)) { struct page *page; - categories |= PAGE_IS_PRESENT; + categories = PAGE_IS_PRESENT; + if (!pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; @@ -2329,19 +2354,20 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; - } else if (is_swap_pte(pte)) { - swp_entry_t swp; + } else { + softleaf_t entry; + + categories = PAGE_IS_SWAPPED; - categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; - swp = pte_to_swp_entry(pte); - if (is_guard_swp_entry(swp)) + entry = softleaf_from_pte(pte); + if (softleaf_is_guard_marker(entry)) categories |= PAGE_IS_GUARD; else if ((p->masks_of_interest & PAGE_IS_FILE) && - is_pfn_swap_entry(swp) && - !folio_test_anon(pfn_swap_entry_folio(swp))) + softleaf_has_pfn(entry) && + !folio_test_anon(softleaf_to_folio(entry))) categories |= PAGE_IS_FILE; if (pte_swp_soft_dirty(pte)) @@ -2360,12 +2386,12 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma, old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_mkuffd_wp(old_pte); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); - } else if (is_swap_pte(ptent)) { - ptent = pte_swp_mkuffd_wp(ptent); - set_pte_at(vma->vm_mm, addr, pte, ptent); - } else { + } else if (pte_none(ptent)) { set_pte_at(vma->vm_mm, addr, pte, make_pte_marker(PTE_MARKER_UFFD_WP)); + } else { + ptent = pte_swp_mkuffd_wp(ptent); + set_pte_at(vma->vm_mm, addr, pte, ptent); } } @@ -2376,6 +2402,9 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, { unsigned long categories = PAGE_IS_HUGE; + if (pmd_none(pmd)) + return categories; + if (pmd_present(pmd)) { struct page *page; @@ -2393,9 +2422,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; - } else if (is_swap_pmd(pmd)) { - swp_entry_t swp; - + } else { categories |= PAGE_IS_SWAPPED; if (!pmd_swp_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; @@ -2403,9 +2430,10 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_SOFT_DIRTY; if (p->masks_of_interest & PAGE_IS_FILE) { - swp = pmd_to_swp_entry(pmd); - if (is_pfn_swap_entry(swp) && - !folio_test_anon(pfn_swap_entry_folio(swp))) + const softleaf_t entry = softleaf_from_pmd(pmd); + + if (softleaf_has_pfn(entry) && + !folio_test_anon(softleaf_to_folio(entry))) categories |= PAGE_IS_FILE; } } @@ -2422,7 +2450,7 @@ static void make_uffd_wp_pmd(struct vm_area_struct *vma, old = pmdp_invalidate_ad(vma, addr, pmdp); pmd = pmd_mkuffd_wp(old); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); - } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + } else if (pmd_is_migration_entry(pmd)) { pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } @@ -2434,6 +2462,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte) { unsigned long categories = PAGE_IS_HUGE; + if (pte_none(pte)) + return categories; + /* * According to pagemap_hugetlb_range(), file-backed HugeTLB * page cannot be swapped. So PAGE_IS_FILE is not checked for @@ -2441,6 +2472,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte) */ if (pte_present(pte)) { categories |= PAGE_IS_PRESENT; + if (!huge_pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; if (!PageAnon(pte_page(pte))) @@ -2449,8 +2481,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte) categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; - } else if (is_swap_pte(pte)) { + } else { categories |= PAGE_IS_SWAPPED; + if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; if (pte_swp_soft_dirty(pte)) @@ -2464,22 +2497,25 @@ static void make_uffd_wp_huge_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t ptent) { - unsigned long psize; + const unsigned long psize = huge_page_size(hstate_vma(vma)); + softleaf_t entry; - if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent)) + if (huge_pte_none(ptent)) { + set_huge_pte_at(vma->vm_mm, addr, ptep, + make_pte_marker(PTE_MARKER_UFFD_WP), psize); return; + } - psize = huge_page_size(hstate_vma(vma)); + entry = softleaf_from_pte(ptent); + if (softleaf_is_hwpoison(entry) || softleaf_is_marker(entry)) + return; - if (is_hugetlb_entry_migration(ptent)) + if (softleaf_is_migration(entry)) set_huge_pte_at(vma->vm_mm, addr, ptep, pte_swp_mkuffd_wp(ptent), psize); - else if (!huge_pte_none(ptent)) + else huge_ptep_modify_prot_commit(vma, addr, ptep, ptent, huge_pte_mkuffd_wp(ptent)); - else - set_huge_pte_at(vma->vm_mm, addr, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP), psize); } #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 0e5050d6ab64..d6113dbe58e0 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -31,12 +31,11 @@ static const struct inode_operations proc_thread_self_inode_operations = { .get_link = proc_thread_self_get_link, }; -static unsigned thread_self_inum __ro_after_init; +unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *thread_self; int ret = -ENOMEM; @@ -51,19 +50,15 @@ int proc_setup_thread_self(struct super_block *s) inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_thread_self_inode_operations; - d_add(thread_self, inode); + d_make_persistent(thread_self, inode); ret = 0; - } else { - dput(thread_self); } + dput(thread_self); } inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/thread-self\n"); - else - fs_info->proc_thread_self = thread_self; - return ret; } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index b4e55c90f8dc..71deffcc3356 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -373,7 +373,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) if (!dentry) return -ENOMEM; - private->dentry = dentry; + private->dentry = dentry; // borrowed private->record = record; inode->i_size = private->total_size = size; inode->i_private = private; @@ -382,7 +382,8 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) inode_set_mtime_to_ts(inode, inode_set_ctime_to_ts(inode, record->time)); - d_add(dentry, no_free_ptr(inode)); + d_make_persistent(dentry, no_free_ptr(inode)); + dput(dentry); list_add(&(no_free_ptr(private))->list, &records_list); @@ -465,7 +466,7 @@ static void pstore_kill_sb(struct super_block *sb) guard(mutex)(&pstore_sb_lock); WARN_ON(pstore_sb && pstore_sb != sb); - kill_litter_super(sb); + kill_anon_super(sb); pstore_sb = NULL; guard(mutex)(&records_list_lock); diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index bc68b4de5287..39936d6da0dd 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -864,6 +864,8 @@ static int ramoops_probe(struct platform_device *pdev) ramoops_console_size = pdata->console_size; ramoops_pmsg_size = pdata->pmsg_size; ramoops_ftrace_size = pdata->ftrace_size; + mem_type = pdata->mem_type; + ramoops_ecc = pdata->ecc_info.ecc_size; pr_info("using 0x%lx@0x%llx, ecc: %d\n", cxt->size, (unsigned long long)cxt->phys_addr, diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index b11f5b20b78b..c3ed1c5117b2 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -35,7 +35,7 @@ static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); + return mm_get_unmapped_area(file, addr, len, pgoff, flags); } const struct file_operations ramfs_file_operations = { diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 41f9995da7ca..505d10a0cb36 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -110,8 +110,7 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir, goto out; } - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); error = 0; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } @@ -154,8 +153,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir, error = page_symlink(inode, symname, l); if (!error) { - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } else @@ -313,7 +311,7 @@ int ramfs_init_fs_context(struct fs_context *fc) void ramfs_kill_sb(struct super_block *sb) { kfree(sb->s_fs_info); - kill_litter_super(sb); + kill_anon_super(sb); } static struct file_system_type ramfs_fs_type = { diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c index 87bbc2605de1..0bfc13c5b96d 100644 --- a/fs/resctrl/pseudo_lock.c +++ b/fs/resctrl/pseudo_lock.c @@ -995,10 +995,11 @@ static const struct vm_operations_struct pseudo_mmap_ops = { .mremap = pseudo_lock_dev_mremap, }; -static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) +static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc) { - unsigned long vsize = vma->vm_end - vma->vm_start; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT; + unsigned long off = desc->pgoff << PAGE_SHIFT; + unsigned long vsize = vma_desc_size(desc); + struct file *filp = desc->file; struct pseudo_lock_region *plr; struct rdtgroup *rdtgrp; unsigned long physical; @@ -1043,7 +1044,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * Ensure changes are carried directly to the memory being mapped, * do not allow copy-on-write mapping. */ - if (!(vma->vm_flags & VM_SHARED)) { + if (!(desc->vm_flags & VM_SHARED)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } @@ -1055,12 +1056,9 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) memset(plr->kmem + off, 0, vsize); - if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff, - vsize, vma->vm_page_prot)) { - mutex_unlock(&rdtgroup_mutex); - return -EAGAIN; - } - vma->vm_ops = &pseudo_mmap_ops; + desc->vm_ops = &pseudo_mmap_ops; + mmap_action_remap_full(desc, physical + desc->pgoff); + mutex_unlock(&rdtgroup_mutex); return 0; } @@ -1071,7 +1069,7 @@ static const struct file_operations pseudo_lock_dev_fops = { .write = NULL, .open = pseudo_lock_dev_open, .release = pseudo_lock_dev_release, - .mmap = pseudo_lock_dev_mmap, + .mmap_prepare = pseudo_lock_dev_mmap_prepare, }; int rdt_pseudo_lock_init(void) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index e3ea6fe7edb4..1db7ab6c2529 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -176,7 +176,7 @@ replay_again: server = cifs_pick_channel(ses); if (!server->ops->new_lease_key) - return -EIO; + return smb_EIO(smb_eio_trace_no_lease_key); utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 7fdcaf9feb16..2cb234d4bd2f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -37,7 +37,7 @@ cifs_dump_mem(char *label, void *data, int length) data, length, true); } -void cifs_dump_detail(void *buf, struct TCP_Server_Info *server) +void cifs_dump_detail(void *buf, size_t buf_len, struct TCP_Server_Info *server) { #ifdef CONFIG_CIFS_DEBUG2 struct smb_hdr *smb = buf; @@ -45,7 +45,7 @@ void cifs_dump_detail(void *buf, struct TCP_Server_Info *server) cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d Wct: %d\n", smb->Command, smb->Status.CifsError, smb->Flags, smb->Flags2, smb->Mid, smb->Pid, smb->WordCount); - if (!server->ops->check_message(buf, server->total_read, server)) { + if (!server->ops->check_message(buf, buf_len, server->total_read, server)) { cifs_dbg(VFS, "smb buf %p len %u\n", smb, server->ops->calc_smb_size(smb)); } @@ -79,9 +79,9 @@ void cifs_dump_mids(struct TCP_Server_Info *server) cifs_dbg(VFS, "IsMult: %d IsEnd: %d\n", mid_entry->multiRsp, mid_entry->multiEnd); if (mid_entry->resp_buf) { - cifs_dump_detail(mid_entry->resp_buf, server); - cifs_dump_mem("existing buf: ", - mid_entry->resp_buf, 62); + cifs_dump_detail(mid_entry->resp_buf, + mid_entry->response_pdu_len, server); + cifs_dump_mem("existing buf: ", mid_entry->resp_buf, 62); } } spin_unlock(&server->mid_queue_lock); @@ -1318,11 +1318,11 @@ static const struct proc_ops cifs_mount_params_proc_ops = { }; #else -inline void cifs_proc_init(void) +void cifs_proc_init(void) { } -inline void cifs_proc_clean(void) +void cifs_proc_clean(void) { } #endif /* PROC_FS */ diff --git a/fs/smb/client/cifs_debug.h b/fs/smb/client/cifs_debug.h index ce5cfd236fdb..e0035ff42dba 100644 --- a/fs/smb/client/cifs_debug.h +++ b/fs/smb/client/cifs_debug.h @@ -15,10 +15,10 @@ #define pr_fmt(fmt) "CIFS: " fmt void cifs_dump_mem(char *label, void *data, int length); -void cifs_dump_detail(void *buf, struct TCP_Server_Info *ptcp_info); -void cifs_dump_mids(struct TCP_Server_Info *); +void cifs_dump_detail(void *buf, size_t buf_len, struct TCP_Server_Info *server); +void cifs_dump_mids(struct TCP_Server_Info *server); extern bool traceSMB; /* flag which enables the function below */ -void dump_smb(void *, int); +void dump_smb(void *buf, int smb_buf_length); #define CIFS_INFO 0x01 #define CIFS_RC 0x02 #define CIFS_TIMER 0x04 diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index da935bd1ce87..3a41bbada04c 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -159,6 +159,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, cifs_dbg(FYI, "key description = %s\n", description); scoped_with_creds(spnego_cred) spnego_key = request_key(&cifs_spnego_key_type, description, ""); + trace_smb3_kerberos_auth(server, sesInfo, PTR_ERR_OR_ZERO(spnego_key)); #ifdef CONFIG_CIFS_DEBUG2 if (cifsFYI && !IS_ERR(spnego_key)) { diff --git a/fs/smb/client/cifs_spnego.h b/fs/smb/client/cifs_spnego.h index e4d751b0c812..e70929db3611 100644 --- a/fs/smb/client/cifs_spnego.h +++ b/fs/smb/client/cifs_spnego.h @@ -27,10 +27,8 @@ struct cifs_spnego_msg { uint8_t data[]; }; -#ifdef __KERNEL__ extern struct key_type cifs_spnego_key_type; extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo, struct TCP_Server_Info *server); -#endif /* KERNEL */ #endif /* _CIFS_SPNEGO_H */ diff --git a/fs/smb/client/cifs_unicode.h b/fs/smb/client/cifs_unicode.h index e137a0dfbbe9..6e4b99786498 100644 --- a/fs/smb/client/cifs_unicode.h +++ b/fs/smb/client/cifs_unicode.h @@ -54,7 +54,6 @@ #define SFM_MAP_UNI_RSVD 1 #define SFU_MAP_UNI_RSVD 2 -#ifdef __KERNEL__ int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, const struct nls_table *cp, int map_type); int cifs_utf16_bytes(const __le16 *from, int maxbytes, @@ -69,8 +68,6 @@ extern int cifs_remap(struct cifs_sb_info *cifs_sb); extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, const struct nls_table *cp, int remap); -#endif - wchar_t cifs_toupper(wchar_t in); #endif /* _CIFS_UNICODE_H */ diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index ce2ebc213a1d..7e6e473bd4a0 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -300,7 +300,7 @@ id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid) __func__, sidtype == SIDOWNER ? 'u' : 'g', cid); goto out_revert_creds; } else if (sidkey->datalen < CIFS_SID_BASE_SIZE) { - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_malformed_sid_key, sidkey->datalen); cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu)\n", __func__, sidkey->datalen); goto invalidate_key; @@ -317,7 +317,8 @@ id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid) ksid_size = CIFS_SID_BASE_SIZE + (ksid->num_subauth * sizeof(__le32)); if (ksid_size > sidkey->datalen) { - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_malformed_ksid_key, + ksid_size, sidkey->datalen); cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu, ksid_size=%u)\n", __func__, sidkey->datalen, ksid_size); goto invalidate_key; @@ -352,7 +353,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, if (unlikely(psid->num_subauth > SID_MAX_SUB_AUTHORITIES)) { cifs_dbg(FYI, "%s: %u subauthorities is too many!\n", __func__, psid->num_subauth); - return -EIO; + return smb_EIO2(smb_eio_trace_sid_too_many_auth, + psid->num_subauth, SID_MAX_SUB_AUTHORITIES); } if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) || @@ -1227,7 +1229,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, __u32 dacloffset; if (pntsd == NULL) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); owner_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 801824825ecf..ca2a84e8673e 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -75,48 +75,35 @@ static int cifs_sig_iter(const struct iov_iter *iter, size_t maxsize, struct cifs_calc_sig_ctx *ctx) { struct iov_iter tmp_iter = *iter; - int err = -EIO; + size_t did; + int err; - if (iterate_and_advance_kernel(&tmp_iter, maxsize, ctx, &err, - cifs_sig_step) != maxsize) - return err; + did = iterate_and_advance_kernel(&tmp_iter, maxsize, ctx, &err, + cifs_sig_step); + if (did != maxsize) + return smb_EIO2(smb_eio_trace_sig_iter, did, maxsize); return 0; } int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature, struct cifs_calc_sig_ctx *ctx) { - int i; + struct iov_iter iter; ssize_t rc; - struct kvec *iov = rqst->rq_iov; - int n_vec = rqst->rq_nvec; - - /* iov[0] is actual data and not the rfc1002 length for SMB2+ */ - if (!is_smb1(server)) { - if (iov[0].iov_len <= 4) - return -EIO; - i = 0; - } else { - if (n_vec < 2 || iov[0].iov_len != 4) - return -EIO; - i = 1; /* skip rfc1002 length */ - } + size_t size = 0; - for (; i < n_vec; i++) { - if (iov[i].iov_len == 0) - continue; - if (iov[i].iov_base == NULL) { - cifs_dbg(VFS, "null iovec entry\n"); - return -EIO; - } + for (int i = 0; i < rqst->rq_nvec; i++) + size += rqst->rq_iov[i].iov_len; - rc = cifs_sig_update(ctx, iov[i].iov_base, iov[i].iov_len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with payload\n", - __func__); - return rc; - } - } + iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, size); + + if (iov_iter_count(&iter) <= 4) + return smb_EIO2(smb_eio_trace_sig_data_too_small, + iov_iter_count(&iter), 4); + + rc = cifs_sig_iter(&iter, iov_iter_count(&iter), ctx); + if (rc < 0) + return rc; rc = cifs_sig_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), ctx); if (rc < 0) @@ -165,10 +152,6 @@ int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, char smb_signature[20]; struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return -EIO; - if ((cifs_pdu == NULL) || (server == NULL)) return -EINVAL; @@ -201,30 +184,6 @@ int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, return rc; } -int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, - __u32 *pexpected_response_sequence) -{ - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = n_vec }; - - return cifs_sign_rqst(&rqst, server, pexpected_response_sequence); -} - -/* must be called with server->srv_mutex held */ -int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, - __u32 *pexpected_response_sequence_number) -{ - struct kvec iov[2]; - - iov[0].iov_base = cifs_pdu; - iov[0].iov_len = 4; - iov[1].iov_base = (char *)cifs_pdu + 4; - iov[1].iov_len = be32_to_cpu(cifs_pdu->smb_buf_length); - - return cifs_sign_smbv(iov, 2, server, - pexpected_response_sequence_number); -} - int cifs_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 expected_sequence_number) @@ -234,10 +193,6 @@ int cifs_verify_signature(struct smb_rqst *rqst, char what_we_think_sig_should_be[20]; struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return -EIO; - if (cifs_pdu == NULL || server == NULL) return -EINVAL; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 6eccb9ed9daa..d9664634144d 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -28,6 +28,8 @@ #include <linux/splice.h> #include <linux/uuid.h> #include <linux/xattr.h> +#include <linux/mm.h> +#include <linux/key-type.h> #include <uapi/linux/magic.h> #include <net/ipv6.h> #include "cifsfs.h" @@ -35,10 +37,9 @@ #define DECLARE_GLOBALS_HERE #include "cifsglob.h" #include "cifsproto.h" +#include "smb2proto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" -#include <linux/mm.h> -#include <linux/key-type.h> #include "cifs_spnego.h" #include "fscache.h" #ifdef CONFIG_CIFS_DFS_UPCALL @@ -442,7 +443,7 @@ static struct kmem_cache *cifs_io_request_cachep; static struct kmem_cache *cifs_io_subrequest_cachep; mempool_t *cifs_sm_req_poolp; mempool_t *cifs_req_poolp; -mempool_t *cifs_mid_poolp; +mempool_t cifs_mid_pool; mempool_t cifs_io_request_pool; mempool_t cifs_io_subrequest_pool; @@ -1016,7 +1017,6 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, } else { cifs_info("Attempting to mount %s\n", old_ctx->source); } - cifs_sb = kzalloc(sizeof(*cifs_sb), GFP_KERNEL); if (!cifs_sb) return ERR_PTR(-ENOMEM); @@ -1847,8 +1847,7 @@ static int init_mids(void) return -ENOMEM; /* 3 is a reasonable minimum number of simultaneous operations */ - cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep); - if (cifs_mid_poolp == NULL) { + if (mempool_init_slab_pool(&cifs_mid_pool, 3, cifs_mid_cachep) < 0) { kmem_cache_destroy(cifs_mid_cachep); return -ENOMEM; } @@ -1858,7 +1857,7 @@ static int init_mids(void) static void destroy_mids(void) { - mempool_destroy(cifs_mid_poolp); + mempool_exit(&cifs_mid_pool); kmem_cache_destroy(cifs_mid_cachep); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 0c76e0a31386..3eca5bfb7030 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -311,8 +311,9 @@ struct cifs_open_parms; struct cifs_credits; struct smb_version_operations { - int (*send_cancel)(struct TCP_Server_Info *, struct smb_rqst *, - struct mid_q_entry *); + int (*send_cancel)(struct cifs_ses *ses, struct TCP_Server_Info *server, + struct smb_rqst *rqst, struct mid_q_entry *mid, + unsigned int xid); bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); /* setup request: allocate mid, sign message */ struct mid_q_entry *(*setup_request)(struct cifs_ses *, @@ -346,13 +347,14 @@ struct smb_version_operations { /* map smb to linux error */ int (*map_error)(char *, bool); /* find mid corresponding to the response message */ - struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *); - void (*dump_detail)(void *buf, struct TCP_Server_Info *ptcp_info); + struct mid_q_entry *(*find_mid)(struct TCP_Server_Info *server, char *buf); + void (*dump_detail)(void *buf, size_t buf_len, struct TCP_Server_Info *ptcp_info); void (*clear_stats)(struct cifs_tcon *); void (*print_stats)(struct seq_file *m, struct cifs_tcon *); void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *); /* verify the message */ - int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); + int (*check_message)(char *buf, unsigned int pdu_len, unsigned int len, + struct TCP_Server_Info *server); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, @@ -636,8 +638,7 @@ struct smb_version_operations { #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) -#define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) -#define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server)) +#define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when @@ -744,6 +745,7 @@ struct TCP_Server_Info { struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ unsigned long neg_start; /* when negotiate started (jiffies) */ + unsigned long reconn_delay; /* when resched session and tcon reconnect */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ @@ -832,9 +834,9 @@ struct TCP_Server_Info { char dns_dom[CIFS_MAX_DOMAINNAME_LEN + 1]; }; -static inline bool is_smb1(struct TCP_Server_Info *server) +static inline bool is_smb1(const struct TCP_Server_Info *server) { - return HEADER_PREAMBLE_SIZE(server) != 0; + return server->vals->protocol_id == SMB10_PROT_ID; } static inline void cifs_server_lock(struct TCP_Server_Info *server) @@ -973,16 +975,16 @@ compare_mid(__u16 mid, const struct smb_hdr *smb) * of kvecs to handle the receive, though that should only need to be done * once. */ -#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) -#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) +#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ)) +#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP)) /* * When the server doesn't allow large posix writes, only allow a rsize/wsize * of 2^17-1 minus the size of the call header. That allows for a read or * write up to the maximum size described by RFC1002. */ -#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) -#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) +#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ)) +#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP)) /* * Windows only supports a max of 60kb reads and 65535 byte writes. Default to @@ -1659,7 +1661,7 @@ static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon, * Returns zero on a successful receive, or an error. The receive state in * the TCP_Server_Info will also be updated. */ -typedef int (mid_receive_t)(struct TCP_Server_Info *server, +typedef int (*mid_receive_t)(struct TCP_Server_Info *server, struct mid_q_entry *mid); /* @@ -1670,37 +1672,38 @@ typedef int (mid_receive_t)(struct TCP_Server_Info *server, * - it will be called by cifsd, with no locks held * - the mid will be removed from any lists */ -typedef void (mid_callback_t)(struct mid_q_entry *mid); +typedef void (*mid_callback_t)(struct TCP_Server_Info *srv, struct mid_q_entry *mid); /* * This is the protopyte for mid handle function. This is called once the mid * has been recognized after decryption of the message. */ -typedef int (mid_handle_t)(struct TCP_Server_Info *server, +typedef int (*mid_handle_t)(struct TCP_Server_Info *server, struct mid_q_entry *mid); /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ - struct kref refcount; - struct TCP_Server_Info *server; /* server corresponding to this mid */ + refcount_t refcount; __u64 mid; /* multiplex id */ __u16 credits; /* number of credits consumed by this mid */ __u16 credits_received; /* number of credits from the response */ __u32 pid; /* process id */ __u32 sequence_number; /* for CIFS signing */ + unsigned int sr_flags; /* Flags passed to send_recv() */ unsigned long when_alloc; /* when mid was created */ #ifdef CONFIG_CIFS_STATS2 unsigned long when_sent; /* time when smb send finished */ unsigned long when_received; /* when demux complete (taken off wire) */ #endif - mid_receive_t *receive; /* call receive callback */ - mid_callback_t *callback; /* call completion callback */ - mid_handle_t *handle; /* call handle mid callback */ + mid_receive_t receive; /* call receive callback */ + mid_callback_t callback; /* call completion callback */ + mid_handle_t handle; /* call handle mid callback */ void *callback_data; /* general purpose pointer for callback */ struct task_struct *creator; void *resp_buf; /* pointer to received SMB header */ unsigned int resp_buf_size; + u32 response_pdu_len; int mid_state; /* wish this were enum but can not pass to wait_event */ int mid_rc; /* rc for MID_RC */ __le16 command; /* smb command code */ @@ -1899,6 +1902,8 @@ enum cifs_writable_file_flags { #define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */ #define CIFS_NO_SRV_RSP 0x1000 /* there is no server response */ #define CIFS_COMPRESS_REQ 0x4000 /* compress request before sending */ +#define CIFS_INTERRUPTIBLE_WAIT 0x8000 /* Interruptible wait (e.g. lock request) */ +#define CIFS_WINDOWS_LOCK 0x10000 /* We're trying to get a Windows lock */ /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 @@ -2104,7 +2109,7 @@ extern __u32 cifs_lock_secret; extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; -extern mempool_t *cifs_mid_poolp; +extern mempool_t cifs_mid_pool; extern mempool_t cifs_io_request_pool; extern mempool_t cifs_io_subrequest_pool; @@ -2114,7 +2119,7 @@ extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; -#endif /* CIFS_ALLOW_INSECURE_LEGACY */ +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; extern struct smb_version_values smbdefault_values; @@ -2202,94 +2207,6 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const dst->FileNameLength = src->FileNameLength; } -static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, - int num_rqst, - const u8 *sig) -{ - unsigned int len, skip; - unsigned int nents = 0; - unsigned long addr; - size_t data_size; - int i, j; - - /* - * The first rqst has a transform header where the first 20 bytes are - * not part of the encrypted blob. - */ - skip = 20; - - /* Assumes the first rqst has a transform header as the first iov. - * I.e. - * rqst[0].rq_iov[0] is transform header - * rqst[0].rq_iov[1+] data to be encrypted/decrypted - * rqst[1+].rq_iov[0+] data to be encrypted/decrypted - */ - for (i = 0; i < num_rqst; i++) { - data_size = iov_iter_count(&rqst[i].rq_iter); - - /* We really don't want a mixture of pinned and unpinned pages - * in the sglist. It's hard to keep track of which is what. - * Instead, we convert to a BVEC-type iterator higher up. - */ - if (data_size && - WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) - return -EIO; - - /* We also don't want to have any extra refs or pins to clean - * up in the sglist. - */ - if (data_size && - WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) - return -EIO; - - for (j = 0; j < rqst[i].rq_nvec; j++) { - struct kvec *iov = &rqst[i].rq_iov[j]; - - addr = (unsigned long)iov->iov_base + skip; - if (is_vmalloc_or_module_addr((void *)addr)) { - len = iov->iov_len - skip; - nents += DIV_ROUND_UP(offset_in_page(addr) + len, - PAGE_SIZE); - } else { - nents++; - } - skip = 0; - } - if (data_size) - nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); - } - nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); - return nents; -} - -/* We can not use the normal sg_set_buf() as we will sometimes pass a - * stack object as buf. - */ -static inline void cifs_sg_set_buf(struct sg_table *sgtable, - const void *buf, - unsigned int buflen) -{ - unsigned long addr = (unsigned long)buf; - unsigned int off = offset_in_page(addr); - - addr &= PAGE_MASK; - if (is_vmalloc_or_module_addr((void *)addr)) { - do { - unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); - - sg_set_page(&sgtable->sgl[sgtable->nents++], - vmalloc_to_page((void *)addr), len, off); - - off = 0; - addr += PAGE_SIZE; - buflen -= len; - } while (buflen); - } else { - sg_set_page(&sgtable->sgl[sgtable->nents++], - virt_to_page((void *)addr), buflen, off); - } -} - #define CIFS_OPARMS(_cifs_sb, _tcon, _path, _da, _cd, _co, _mode) \ ((struct cifs_open_parms) { \ .tcon = _tcon, \ @@ -2351,9 +2268,10 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen) * Execute mid callback atomically - ensures callback runs exactly once * and prevents sleeping in atomic context. */ -static inline void mid_execute_callback(struct mid_q_entry *mid) +static inline void mid_execute_callback(struct TCP_Server_Info *server, + struct mid_q_entry *mid) { - void (*callback)(struct mid_q_entry *mid); + mid_callback_t callback; spin_lock(&mid->mid_lock); callback = mid->callback; @@ -2361,7 +2279,7 @@ static inline void mid_execute_callback(struct mid_q_entry *mid) spin_unlock(&mid->mid_lock); if (callback) - callback(mid); + callback(server, mid); } #define CIFS_REPARSE_SUPPORT(tcon) \ @@ -2369,4 +2287,30 @@ static inline void mid_execute_callback(struct mid_q_entry *mid) (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ FILE_SUPPORTS_REPARSE_POINTS)) +struct cifs_calc_sig_ctx { + struct md5_ctx *md5; + struct hmac_sha256_ctx *hmac; + struct shash_desc *shash; +}; + +#define CIFS_RECONN_DELAY_SECS 30 +#define CIFS_MAX_RECONN_DELAY (4 * CIFS_RECONN_DELAY_SECS) + +static inline void cifs_queue_server_reconn(struct TCP_Server_Info *server) +{ + if (!delayed_work_pending(&server->reconnect)) { + WRITE_ONCE(server->reconn_delay, 0); + mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + } +} + +static inline void cifs_requeue_server_reconn(struct TCP_Server_Info *server) +{ + unsigned long delay = READ_ONCE(server->reconn_delay); + + delay = umin(delay + CIFS_RECONN_DELAY_SECS, CIFS_MAX_RECONN_DELAY); + WRITE_ONCE(server->reconn_delay, delay); + queue_delayed_work(cifsiod_wq, &server->reconnect, delay * HZ); +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 49f35cb3cf2e..37b23664ddf3 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -90,7 +90,7 @@ /* future chained NTCreateXReadX bigger, but for time being NTCreateX biggest */ /* among the requests (NTCreateX response is bigger with wct of 34) */ -#define MAX_CIFS_HDR_SIZE 0x58 /* 4 len + 32 hdr + (2*24 wct) + 2 bct + 2 pad */ +#define MAX_CIFS_HDR_SIZE 0x54 /* 32 hdr + (2*24 wct) + 2 bct + 2 pad */ #define CIFS_SMALL_PATH 120 /* allows for (448-88)/3 */ /* internal cifs vfs structures */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 3528c365a452..f8c0615d4ee4 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -30,8 +30,6 @@ extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); extern void free_rsp_buf(int, void *); -extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, - unsigned int /* length */); extern int smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *msg, size_t *sent); @@ -82,11 +80,10 @@ extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, int add_treename); -extern char *build_wildcard_path_from_dentry(struct dentry *direntry); char *cifs_build_devname(char *nodename, const char *prepath); -extern void delete_mid(struct mid_q_entry *mid); -void __release_mid(struct kref *refcount); -extern void cifs_wake_up_task(struct mid_q_entry *mid); +void delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid); +void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid); +void cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern char *smb3_fs_context_fullpath(const struct smb3_fs_context *ctx, @@ -97,10 +94,10 @@ extern int cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs); extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs); extern int cifs_discard_remaining_data(struct TCP_Server_Info *server); extern int cifs_call_async(struct TCP_Server_Info *server, - struct smb_rqst *rqst, - mid_receive_t *receive, mid_callback_t *callback, - mid_handle_t *handle, void *cbdata, const int flags, - const struct cifs_credits *exist_credits); + struct smb_rqst *rqst, + mid_receive_t receive, mid_callback_t callback, + mid_handle_t handle, void *cbdata, const int flags, + const struct cifs_credits *exist_credits); extern struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses); extern int cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, struct TCP_Server_Info *server, @@ -111,18 +108,16 @@ extern int compound_send_recv(const unsigned int xid, struct cifs_ses *ses, const int flags, const int num_rqst, struct smb_rqst *rqst, int *resp_buf_type, struct kvec *resp_iov); -extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, - struct smb_hdr * /* input */ , - struct smb_hdr * /* out */ , - int * /* bytes returned */ , const int); -extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, - char *in_buf, int flags); +int SendReceive(const unsigned int xid, struct cifs_ses *ses, + struct smb_hdr *in_buf, unsigned int in_len, + struct smb_hdr *out_buf, int *pbytes_returned, const int flags); +int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, + char *in_buf, unsigned int in_len, int flags); int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server); -extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *, - struct TCP_Server_Info *, - struct smb_rqst *); -extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, - struct smb_rqst *); +struct mid_q_entry *cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored, + struct smb_rqst *rqst); +struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *server, + struct smb_rqst *rqst); int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst); extern int cifs_check_receive(struct mid_q_entry *mid, @@ -134,11 +129,12 @@ extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server, struct cifs_credits *credits); static inline int -send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, - struct mid_q_entry *mid) +send_cancel(struct cifs_ses *ses, struct TCP_Server_Info *server, + struct smb_rqst *rqst, struct mid_q_entry *mid, + unsigned int xid) { return server->ops->send_cancel ? - server->ops->send_cancel(server, rqst, mid) : 0; + server->ops->send_cancel(ses, server, rqst, mid, xid) : 0; } int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ); @@ -146,11 +142,6 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */, const int flags, struct kvec * /* resp vec */); -extern int SendReceiveBlockingLock(const unsigned int xid, - struct cifs_tcon *ptcon, - struct smb_hdr *in_buf, - struct smb_hdr *out_buf, - int *bytes_returned); void smb2_query_server_interfaces(struct work_struct *work); void @@ -161,13 +152,12 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, bool mark_smb_session); extern int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session); -extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr); +int checkSMB(char *buf, unsigned int pdu_len, unsigned int len, + struct TCP_Server_Info *srvr); extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *); extern bool backup_cred(struct cifs_sb_info *); extern bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 eof, bool from_readdir); -extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, - unsigned int bytes_written); void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, @@ -187,15 +177,14 @@ extern int decode_negTokenInit(unsigned char *security_blob, int length, extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); extern void cifs_set_port(struct sockaddr *addr, const unsigned short int port); extern int map_smb_to_linux_error(char *buf, bool logErr); -extern int map_and_check_smb_error(struct mid_q_entry *mid, bool logErr); -extern void header_assemble(struct smb_hdr *, char /* command */ , - const struct cifs_tcon *, int /* length of - fixed section (word count) in two byte units */); +extern int map_and_check_smb_error(struct TCP_Server_Info *server, + struct mid_q_entry *mid, bool logErr); +unsigned int header_assemble(struct smb_hdr *buffer, char smb_command, + const struct cifs_tcon *treeCon, int word_count + /* length of fixed section word count in two byte units */); extern int small_smb_init_no_tc(const int smb_cmd, const int wct, struct cifs_ses *ses, void **request_buf); -extern enum securityEnum select_sectype(struct TCP_Server_Info *server, - enum securityEnum requested); extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, struct TCP_Server_Info *server, const struct nls_table *nls_cp); @@ -270,7 +259,7 @@ extern unsigned int setup_special_mode_ACE(struct smb_ace *pace, __u64 nmode); extern unsigned int setup_special_user_owner_ACE(struct smb_ace *pace); -extern void dequeue_mid(struct mid_q_entry *mid, bool malformed); +void dequeue_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid, bool malformed); extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, unsigned int to_read); extern ssize_t cifs_discard_from_socket(struct TCP_Server_Info *server, @@ -565,12 +554,9 @@ extern void tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number); -extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *, - __u32 *); -extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); -extern int cifs_verify_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, - __u32 expected_sequence_number); +int cifs_verify_signature(struct smb_rqst *rqst, + struct TCP_Server_Info *server, + __u32 expected_sequence_number); extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); extern void cifs_crypto_secmech_release(struct TCP_Server_Info *server); extern int calc_seckey(struct cifs_ses *); @@ -603,7 +589,7 @@ extern int cifs_do_set_acl(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nls_codepage, int remap); extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, const int netfid, __u64 *pExtAttrBits, __u64 *pMask); -#endif /* CIFS_ALLOW_INSECURE_LEGACY */ +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); extern bool couldbe_mf_symlink(const struct cifs_fattr *fattr); extern int check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, @@ -635,11 +621,6 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const unsigned char *path, char *pbuf, unsigned int *pbytes_written); -struct cifs_calc_sig_ctx { - struct md5_ctx *md5; - struct hmac_sha256_ctx *hmac; - struct shash_desc *shash; -}; int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature, struct cifs_calc_sig_ctx *ctx); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, @@ -649,8 +630,9 @@ int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); int cifs_try_adding_channels(struct cifs_ses *ses); +int smb3_update_ses_channels(struct cifs_ses *ses, struct TCP_Server_Info *server, + bool from_reconnect, bool disable_mchan); bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); -void cifs_ses_mark_for_reconnect(struct cifs_ses *ses); int cifs_ses_get_chan_index(struct cifs_ses *ses, @@ -674,7 +656,7 @@ bool cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server); void -cifs_disable_secondary_channels(struct cifs_ses *ses); +cifs_decrease_secondary_channels(struct cifs_ses *ses, bool disable_mchan); void cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); int @@ -777,9 +759,15 @@ static inline bool dfs_src_pathname_equal(const char *s1, const char *s2) return true; } -static inline void release_mid(struct mid_q_entry *mid) +static inline void smb_get_mid(struct mid_q_entry *mid) +{ + refcount_inc(&mid->refcount); +} + +static inline void release_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid) { - kref_put(&mid->refcount, __release_mid); + if (refcount_dec_and_test(&mid->refcount)) + __release_mid(server, mid); } static inline void cifs_free_open_info(struct cifs_open_info_data *data) @@ -789,4 +777,110 @@ static inline void cifs_free_open_info(struct cifs_open_info_data *data) memset(data, 0, sizeof(*data)); } +static inline int smb_EIO(enum smb_eio_trace trace) +{ + trace_smb3_eio(trace, 0, 0); + return -EIO; +} + +static inline int smb_EIO1(enum smb_eio_trace trace, unsigned long info) +{ + trace_smb3_eio(trace, info, 0); + return -EIO; +} + +static inline int smb_EIO2(enum smb_eio_trace trace, unsigned long info, unsigned long info2) +{ + trace_smb3_eio(trace, info, info2); + return -EIO; +} + +static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, + int num_rqst, + const u8 *sig) +{ + unsigned int len, skip; + unsigned int nents = 0; + unsigned long addr; + size_t data_size; + int i, j; + + /* + * The first rqst has a transform header where the first 20 bytes are + * not part of the encrypted blob. + */ + skip = 20; + + /* Assumes the first rqst has a transform header as the first iov. + * I.e. + * rqst[0].rq_iov[0] is transform header + * rqst[0].rq_iov[1+] data to be encrypted/decrypted + * rqst[1+].rq_iov[0+] data to be encrypted/decrypted + */ + for (i = 0; i < num_rqst; i++) { + data_size = iov_iter_count(&rqst[i].rq_iter); + + /* We really don't want a mixture of pinned and unpinned pages + * in the sglist. It's hard to keep track of which is what. + * Instead, we convert to a BVEC-type iterator higher up. + */ + if (data_size && + WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) + return smb_EIO(smb_eio_trace_user_iter); + + /* We also don't want to have any extra refs or pins to clean + * up in the sglist. + */ + if (data_size && + WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) + return smb_EIO(smb_eio_trace_extract_will_pin); + + for (j = 0; j < rqst[i].rq_nvec; j++) { + struct kvec *iov = &rqst[i].rq_iov[j]; + + addr = (unsigned long)iov->iov_base + skip; + if (is_vmalloc_or_module_addr((void *)addr)) { + len = iov->iov_len - skip; + nents += DIV_ROUND_UP(offset_in_page(addr) + len, + PAGE_SIZE); + } else { + nents++; + } + skip = 0; + } + if (data_size) + nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); + } + nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); + return nents; +} + +/* We can not use the normal sg_set_buf() as we will sometimes pass a + * stack object as buf. + */ +static inline void cifs_sg_set_buf(struct sg_table *sgtable, + const void *buf, + unsigned int buflen) +{ + unsigned long addr = (unsigned long)buf; + unsigned int off = offset_in_page(addr); + + addr &= PAGE_MASK; + if (is_vmalloc_or_module_addr((void *)addr)) { + do { + unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); + + sg_set_page(&sgtable->sgl[sgtable->nents++], + vmalloc_to_page((void *)addr), len, off); + + off = 0; + addr += PAGE_SIZE; + buflen -= len; + } while (buflen); + } else { + sg_set_page(&sgtable->sgl[sgtable->nents++], + virt_to_page((void *)addr), buflen, off); + } +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 428e582e0414..3db1a892c526 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -226,6 +226,7 @@ static int small_smb_init(int smb_command, int wct, struct cifs_tcon *tcon, void **request_buf) { + unsigned int in_len; int rc; rc = cifs_reconnect_tcon(tcon, smb_command); @@ -238,13 +239,13 @@ small_smb_init(int smb_command, int wct, struct cifs_tcon *tcon, return -ENOMEM; } - header_assemble((struct smb_hdr *) *request_buf, smb_command, - tcon, wct); + in_len = header_assemble((struct smb_hdr *) *request_buf, smb_command, + tcon, wct); if (tcon != NULL) cifs_stats_inc(&tcon->num_smbs_sent); - return 0; + return in_len; } int @@ -255,7 +256,7 @@ small_smb_init_no_tc(const int smb_command, const int wct, struct smb_hdr *buffer; rc = small_smb_init(smb_command, wct, NULL, request_buf); - if (rc) + if (rc < 0) return rc; buffer = (struct smb_hdr *)*request_buf; @@ -278,6 +279,8 @@ static int __smb_init(int smb_command, int wct, struct cifs_tcon *tcon, void **request_buf, void **response_buf) { + unsigned int in_len; + *request_buf = cifs_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ @@ -290,13 +293,13 @@ __smb_init(int smb_command, int wct, struct cifs_tcon *tcon, if (response_buf) *response_buf = *request_buf; - header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon, - wct); + in_len = header_assemble((struct smb_hdr *)*request_buf, smb_command, tcon, + wct); if (tcon != NULL) cifs_stats_inc(&tcon->num_smbs_sent); - return 0; + return in_len; } /* If the return code is zero, this function must fill in request_buf pointer */ @@ -370,7 +373,8 @@ decode_ext_sec_blob(struct cifs_ses *ses, SMB_NEGOTIATE_RSP *pSMBr) count = get_bcc(&pSMBr->hdr); if (count < SMB1_CLIENT_GUID_SIZE) - return -EIO; + return smb_EIO2(smb_eio_trace_neg_sec_blob_too_small, + count, SMB1_CLIENT_GUID_SIZE); spin_lock(&cifs_tcp_ses_lock); if (server->srv_count > 1) { @@ -421,6 +425,7 @@ CIFSSMBNegotiate(const unsigned int xid, { SMB_NEGOTIATE_REQ *pSMB; SMB_NEGOTIATE_RSP *pSMBr; + unsigned int in_len; int rc = 0; int bytes_returned; int i; @@ -428,13 +433,14 @@ CIFSSMBNegotiate(const unsigned int xid, if (!server) { WARN(1, "%s: server is NULL!\n", __func__); - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); } rc = smb_init(SMB_COM_NEGOTIATE, 0, NULL /* no tcon yet */ , (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Mid = get_next_mid(server); pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS; @@ -458,10 +464,10 @@ CIFSSMBNegotiate(const unsigned int xid, memcpy(&pSMB->DialectsArray[count], protocols[i].name, len); count += len; } - inc_rfc1001_len(pSMB, count); + in_len += count; pSMB->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc != 0) goto neg_err_exit; @@ -511,7 +517,8 @@ CIFSSMBNegotiate(const unsigned int xid, server->negflavor = CIFS_NEGFLAVOR_EXTENDED; rc = decode_ext_sec_blob(ses, pSMBr); } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { - rc = -EIO; /* no crypt key only if plain text pwd */ + /* no crypt key only if plain text pwd */ + rc = smb_EIO(smb_eio_trace_neg_no_crypt_key); } else { server->negflavor = CIFS_NEGFLAVOR_UNENCAP; server->capabilities &= ~CAP_EXTENDED_SECURITY; @@ -530,13 +537,14 @@ int CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon) { struct smb_hdr *smb_buffer; + unsigned int in_len; int rc = 0; cifs_dbg(FYI, "In tree disconnect\n"); /* BB: do we need to check this? These should never be NULL. */ if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); /* * No need to return error on this operation if tid invalidated and @@ -547,16 +555,17 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon) spin_lock(&tcon->ses->chan_lock); if ((tcon->need_reconnect) || CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses)) { spin_unlock(&tcon->ses->chan_lock); - return -EIO; + return smb_EIO(smb_eio_trace_tdis_in_reconnect); } spin_unlock(&tcon->ses->chan_lock); rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, (void **)&smb_buffer); - if (rc) + if (rc < 0) return rc; + in_len = rc; - rc = SendReceiveNoRsp(xid, tcon->ses, (char *)smb_buffer, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *)smb_buffer, in_len, 0); cifs_small_buf_release(smb_buffer); if (rc) cifs_dbg(FYI, "Tree disconnect failed %d\n", rc); @@ -577,12 +586,11 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon) * FIXME: maybe we should consider checking that the reply matches request? */ static void -cifs_echo_callback(struct mid_q_entry *mid) +cifs_echo_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { - struct TCP_Server_Info *server = mid->callback_data; struct cifs_credits credits = { .value = 1, .instance = 0 }; - release_mid(mid); + release_mid(server, mid); add_credits(server, &credits, CIFS_ECHO_OP); } @@ -591,15 +599,19 @@ CIFSSMBEcho(struct TCP_Server_Info *server) { ECHO_REQ *smb; int rc = 0; - struct kvec iov[2]; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; + struct kvec iov[1]; + struct smb_rqst rqst = { + .rq_iov = iov, + .rq_nvec = ARRAY_SIZE(iov), + }; + unsigned int in_len; cifs_dbg(FYI, "In echo request\n"); rc = small_smb_init(SMB_COM_ECHO, 0, NULL, (void **)&smb); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (server->capabilities & CAP_UNICODE) smb->hdr.Flags2 |= SMBFLG2_UNICODE; @@ -610,12 +622,10 @@ CIFSSMBEcho(struct TCP_Server_Info *server) put_unaligned_le16(1, &smb->EchoCount); put_bcc(1, &smb->hdr); smb->Data[0] = 'a'; - inc_rfc1001_len(smb, 3); + in_len += 3; - iov[0].iov_len = 4; + iov[0].iov_len = in_len; iov[0].iov_base = smb; - iov[1].iov_len = get_rfc1002_len(smb); - iov[1].iov_base = (char *)smb + 4; rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback, NULL, server, CIFS_NON_BLOCKING | CIFS_ECHO_OP, NULL); @@ -631,6 +641,7 @@ int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) { LOGOFF_ANDX_REQ *pSMB; + unsigned int in_len; int rc = 0; cifs_dbg(FYI, "In SMBLogoff for session disconnect\n"); @@ -641,7 +652,7 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) * should probably be a BUG() */ if (!ses || !ses->server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); mutex_lock(&ses->session_mutex); spin_lock(&ses->chan_lock); @@ -653,10 +664,11 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) spin_unlock(&ses->chan_lock); rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB); - if (rc) { + if (rc < 0) { mutex_unlock(&ses->session_mutex); return rc; } + in_len = rc; pSMB->hdr.Mid = get_next_mid(ses->server); @@ -666,7 +678,7 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) pSMB->hdr.Uid = ses->Suid; pSMB->AndXCommand = 0xFF; - rc = SendReceiveNoRsp(xid, ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); session_already_dead: mutex_unlock(&ses->session_mutex); @@ -687,6 +699,7 @@ CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; struct unlink_psx_rq *pRqD; + unsigned int in_len; int name_len; int rc = 0; int bytes_returned = 0; @@ -696,8 +709,9 @@ CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, PsxDelete: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -718,14 +732,11 @@ PsxDelete: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; - /* Setup pointer to Request Data (inode type). - * Note that SMB offsets are from the beginning of SMB which is 4 bytes - * in, after RFC1001 field - */ - pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4); + /* Setup pointer to Request Data (inode type). */ + pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset); pRqD->type = cpu_to_le16(type); pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); @@ -740,9 +751,9 @@ PsxDelete: pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "Posix delete returned %d\n", rc); @@ -762,6 +773,7 @@ CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, { DELETE_FILE_REQ *pSMB = NULL; DELETE_FILE_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; int name_len; @@ -770,8 +782,9 @@ CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, DelFileRetry: rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->fileName, name, @@ -785,9 +798,9 @@ DelFileRetry: pSMB->SearchAttributes = cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); pSMB->BufferFormat = 0x04; - inc_rfc1001_len(pSMB, name_len + 1); + in_len += name_len + 1; pSMB->ByteCount = cpu_to_le16(name_len + 1); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_deletes); if (rc) @@ -806,6 +819,7 @@ CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, { DELETE_DIRECTORY_REQ *pSMB = NULL; DELETE_DIRECTORY_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; int name_len; @@ -815,8 +829,9 @@ CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, RmDirRetry: rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, @@ -829,9 +844,9 @@ RmDirRetry: } pSMB->BufferFormat = 0x04; - inc_rfc1001_len(pSMB, name_len + 1); + in_len += name_len + 1; pSMB->ByteCount = cpu_to_le16(name_len + 1); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_rmdirs); if (rc) @@ -851,6 +866,7 @@ CIFSSMBMkDir(const unsigned int xid, struct inode *inode, umode_t mode, int rc = 0; CREATE_DIRECTORY_REQ *pSMB = NULL; CREATE_DIRECTORY_RSP *pSMBr = NULL; + unsigned int in_len; int bytes_returned; int name_len; int remap = cifs_remap(cifs_sb); @@ -859,8 +875,9 @@ CIFSSMBMkDir(const unsigned int xid, struct inode *inode, umode_t mode, MkDirRetry: rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, @@ -873,9 +890,9 @@ MkDirRetry: } pSMB->BufferFormat = 0x04; - inc_rfc1001_len(pSMB, name_len + 1); + in_len += name_len + 1; pSMB->ByteCount = cpu_to_le16(name_len + 1); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_mkdirs); if (rc) @@ -896,6 +913,7 @@ CIFSPOSIXCreate(const unsigned int xid, struct cifs_tcon *tcon, { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; + unsigned int in_len; int name_len; int rc = 0; int bytes_returned = 0; @@ -907,8 +925,9 @@ CIFSPOSIXCreate(const unsigned int xid, struct cifs_tcon *tcon, PsxCreat: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -930,10 +949,9 @@ PsxCreat: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ - pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4); + pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset); pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); pdata->Permissions = cpu_to_le64(mode); pdata->PosixOpenFlags = cpu_to_le32(posix_flags); @@ -951,9 +969,9 @@ PsxCreat: pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Posix create returned %d\n", rc); @@ -964,13 +982,14 @@ PsxCreat: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) { - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_create_rsp_too_small, + get_bcc(&pSMBr->hdr), sizeof(OPEN_PSX_RSP)); goto psx_create_err; } /* copy return information to pRetData */ - psx_rsp = (OPEN_PSX_RSP *)((char *) &pSMBr->hdr.Protocol - + le16_to_cpu(pSMBr->t2.DataOffset)); + psx_rsp = (OPEN_PSX_RSP *) + ((char *)pSMBr + le16_to_cpu(pSMBr->t2.DataOffset)); *pOplock = le16_to_cpu(psx_rsp->OplockFlags); if (netfid) @@ -990,9 +1009,9 @@ PsxCreat: pRetData->Type = cpu_to_le32(-1); goto psx_create_err; } - memcpy((char *) pRetData, - (char *)psx_rsp + sizeof(OPEN_PSX_RSP), - sizeof(FILE_UNIX_BASIC_INFO)); + memcpy(pRetData, + (char *)psx_rsp + sizeof(OPEN_PSX_RSP), + sizeof(*pRetData)); } psx_create_err: @@ -1079,6 +1098,7 @@ SMBLegacyOpen(const unsigned int xid, struct cifs_tcon *tcon, int rc; OPENX_REQ *pSMB = NULL; OPENX_RSP *pSMBr = NULL; + unsigned int in_len; int bytes_returned; int name_len; __u16 count; @@ -1086,8 +1106,9 @@ SMBLegacyOpen(const unsigned int xid, struct cifs_tcon *tcon, OldOpenRetry: rc = smb_init(SMB_COM_OPEN_ANDX, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->AndXCommand = 0xFF; /* none */ @@ -1130,10 +1151,10 @@ OldOpenRetry: pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition)); count += name_len; - inc_rfc1001_len(pSMB, count); + in_len += count; pSMB->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *)pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_opens); if (rc) { @@ -1191,12 +1212,14 @@ CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, int *oplock, int desired_access = oparms->desired_access; int disposition = oparms->disposition; const char *path = oparms->path; + unsigned int in_len; openRetry: rc = smb_init(SMB_COM_NT_CREATE_ANDX, 24, tcon, (void **)&req, (void **)&rsp); - if (rc) + if (rc < 0) return rc; + in_len = rc; /* no commands go after this */ req->AndXCommand = 0xFF; @@ -1254,10 +1277,10 @@ openRetry: req->SecurityFlags = SECURITY_CONTEXT_TRACKING|SECURITY_EFFECTIVE_ONLY; count += name_len; - inc_rfc1001_len(req, count); + in_len += count; req->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)req, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)req, in_len, (struct smb_hdr *)rsp, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_opens); if (rc) { @@ -1296,14 +1319,13 @@ openRetry: } static void -cifs_readv_callback(struct mid_q_entry *mid) +cifs_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { struct cifs_io_subrequest *rdata = mid->callback_data; struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode); struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); - struct TCP_Server_Info *server = tcon->ses->server; struct smb_rqst rqst = { .rq_iov = rdata->iov, - .rq_nvec = 2, + .rq_nvec = 1, .rq_iter = rdata->subreq.io_iter }; struct cifs_credits credits = { .value = 1, @@ -1352,11 +1374,12 @@ do_retry: break; case MID_RESPONSE_MALFORMED: trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); - rdata->result = -EIO; + rdata->result = smb_EIO(smb_eio_trace_read_rsp_malformed); break; default: trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); - rdata->result = -EIO; + rdata->result = smb_EIO1(smb_eio_trace_read_mid_state_unknown, + mid->mid_state); break; } @@ -1374,7 +1397,7 @@ do_retry: } else { size_t trans = rdata->subreq.transferred + rdata->got_bytes; if (trans < rdata->subreq.len && - rdata->subreq.start + trans == ictx->remote_i_size) { + rdata->subreq.start + trans >= ictx->remote_i_size) { rdata->result = 0; __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); } else if (rdata->got_bytes > 0) { @@ -1399,7 +1422,7 @@ do_retry: rdata->subreq.transferred += rdata->got_bytes; trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress); netfs_read_subreq_terminated(&rdata->subreq); - release_mid(mid); + release_mid(server, mid); add_credits(server, &credits, 0); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, server->credits, server->in_flight, @@ -1415,7 +1438,8 @@ cifs_async_readv(struct cifs_io_subrequest *rdata) int wct; struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); struct smb_rqst rqst = { .rq_iov = rdata->iov, - .rq_nvec = 2 }; + .rq_nvec = 1 }; + unsigned int in_len; cifs_dbg(FYI, "%s: offset=%llu bytes=%zu\n", __func__, rdata->subreq.start, rdata->subreq.len); @@ -1426,13 +1450,14 @@ cifs_async_readv(struct cifs_io_subrequest *rdata) wct = 10; /* old style read */ if ((rdata->subreq.start >> 32) > 0) { /* can not handle this big offset for old */ - return -EIO; + return smb_EIO(smb_eio_trace_read_too_far); } } rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **)&smb); - if (rc) + if (rc < 0) return rc; + in_len = rc; smb->hdr.Pid = cpu_to_le16((__u16)rdata->req->pid); smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->req->pid >> 16)); @@ -1456,9 +1481,7 @@ cifs_async_readv(struct cifs_io_subrequest *rdata) /* 4 for RFC1001 length + 1 for BCC */ rdata->iov[0].iov_base = smb; - rdata->iov[0].iov_len = 4; - rdata->iov[1].iov_base = (char *)smb + 4; - rdata->iov[1].iov_len = get_rfc1002_len(smb); + rdata->iov[0].iov_len = in_len; trace_smb3_read_enter(rdata->rreq->debug_id, rdata->subreq.debug_index, @@ -1492,6 +1515,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, __u16 netfid = io_parms->netfid; __u64 offset = io_parms->offset; struct cifs_tcon *tcon = io_parms->tcon; + unsigned int in_len; unsigned int count = io_parms->length; cifs_dbg(FYI, "Reading %d bytes on fid %d\n", count, netfid); @@ -1501,14 +1525,15 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, wct = 10; /* old style read */ if ((offset >> 32) > 0) { /* can not handle this big offset for old */ - return -EIO; + return smb_EIO(smb_eio_trace_read_too_far); } } *nbytes = 0; rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Pid = cpu_to_le16((__u16)pid); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16)); @@ -1536,7 +1561,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, } iov[0].iov_base = (char *)pSMB; - iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4; + iov[0].iov_len = in_len; rc = SendReceive2(xid, tcon->ses, iov, 1, &resp_buf_type, CIFS_LOG_ERROR, &rsp_iov); cifs_small_buf_release(pSMB); @@ -1555,7 +1580,8 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, || (data_length > count)) { cifs_dbg(FYI, "bad length %d for count %d\n", data_length, count); - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_read_overlarge, + data_length, count); *nbytes = 0; } else { pReadData = (char *) (&pSMBr->hdr.Protocol) + @@ -1600,7 +1626,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, __u16 netfid = io_parms->netfid; __u64 offset = io_parms->offset; struct cifs_tcon *tcon = io_parms->tcon; - unsigned int count = io_parms->length; + unsigned int count = io_parms->length, in_len; *nbytes = 0; @@ -1614,14 +1640,15 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, wct = 12; if ((offset >> 32) > 0) { /* can not handle big offset for old srv */ - return -EIO; + return smb_EIO(smb_eio_trace_write_too_far); } } rc = smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Pid = cpu_to_le16((__u16)pid); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16)); @@ -1654,7 +1681,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, if (bytes_sent > count) bytes_sent = count; pSMB->DataOffset = - cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); + cpu_to_le16(offsetof(struct smb_com_write_req, Data)); if (buf) memcpy(pSMB->Data, buf, bytes_sent); else if (count != 0) { @@ -1669,7 +1696,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF); pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; if (wct == 14) pSMB->ByteCount = cpu_to_le16(byte_count); @@ -1680,7 +1707,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, pSMBW->ByteCount = cpu_to_le16(byte_count); } - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); if (rc) { @@ -1712,10 +1739,9 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, * workqueue completion task. */ static void -cifs_writev_callback(struct mid_q_entry *mid) +cifs_writev_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { struct cifs_io_subrequest *wdata = mid->callback_data; - struct TCP_Server_Info *server = wdata->server; struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; struct cifs_credits credits = { @@ -1765,11 +1791,12 @@ cifs_writev_callback(struct mid_q_entry *mid) break; case MID_RESPONSE_MALFORMED: trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); - result = -EIO; + result = smb_EIO(smb_eio_trace_write_rsp_malformed); break; default: trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); - result = -EIO; + result = smb_EIO1(smb_eio_trace_write_mid_state_unknown, + mid->mid_state); break; } @@ -1779,7 +1806,7 @@ cifs_writev_callback(struct mid_q_entry *mid) 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; cifs_write_subrequest_terminated(wdata, result); - release_mid(mid); + release_mid(server, mid); trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0, server->credits, server->in_flight, credits.value, cifs_trace_rw_credits_write_response_add); @@ -1791,11 +1818,12 @@ void cifs_async_writev(struct cifs_io_subrequest *wdata) { int rc = -EACCES; - WRITE_REQ *smb = NULL; + WRITE_REQ *req = NULL; int wct; struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); - struct kvec iov[2]; + struct kvec iov[1]; struct smb_rqst rqst = { }; + unsigned int in_len; if (tcon->ses->capabilities & CAP_LARGE_FILES) { wct = 14; @@ -1803,56 +1831,54 @@ cifs_async_writev(struct cifs_io_subrequest *wdata) wct = 12; if (wdata->subreq.start >> 32 > 0) { /* can not handle big offset for old srv */ - rc = -EIO; + rc = smb_EIO(smb_eio_trace_write_too_far); goto out; } } - rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **)&smb); - if (rc) + rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **)&req); + if (rc < 0) goto async_writev_out; + in_len = rc; - smb->hdr.Pid = cpu_to_le16((__u16)wdata->req->pid); - smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->req->pid >> 16)); + req->hdr.Pid = cpu_to_le16((__u16)wdata->req->pid); + req->hdr.PidHigh = cpu_to_le16((__u16)(wdata->req->pid >> 16)); - smb->AndXCommand = 0xFF; /* none */ - smb->Fid = wdata->req->cfile->fid.netfid; - smb->OffsetLow = cpu_to_le32(wdata->subreq.start & 0xFFFFFFFF); + req->AndXCommand = 0xFF; /* none */ + req->Fid = wdata->req->cfile->fid.netfid; + req->OffsetLow = cpu_to_le32(wdata->subreq.start & 0xFFFFFFFF); if (wct == 14) - smb->OffsetHigh = cpu_to_le32(wdata->subreq.start >> 32); - smb->Reserved = 0xFFFFFFFF; - smb->WriteMode = 0; - smb->Remaining = 0; + req->OffsetHigh = cpu_to_le32(wdata->subreq.start >> 32); + req->Reserved = 0xFFFFFFFF; + req->WriteMode = 0; + req->Remaining = 0; - smb->DataOffset = - cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); + req->DataOffset = + cpu_to_le16(offsetof(struct smb_com_write_req, Data)); - /* 4 for RFC1001 length + 1 for BCC */ - iov[0].iov_len = 4; - iov[0].iov_base = smb; - iov[1].iov_len = get_rfc1002_len(smb) + 1; - iov[1].iov_base = (char *)smb + 4; + iov[0].iov_base = req; + iov[0].iov_len = in_len + 1; /* +1 for BCC */ rqst.rq_iov = iov; - rqst.rq_nvec = 2; + rqst.rq_nvec = 1; rqst.rq_iter = wdata->subreq.io_iter; cifs_dbg(FYI, "async write at %llu %zu bytes\n", wdata->subreq.start, wdata->subreq.len); - smb->DataLengthLow = cpu_to_le16(wdata->subreq.len & 0xFFFF); - smb->DataLengthHigh = cpu_to_le16(wdata->subreq.len >> 16); + req->DataLengthLow = cpu_to_le16(wdata->subreq.len & 0xFFFF); + req->DataLengthHigh = cpu_to_le16(wdata->subreq.len >> 16); if (wct == 14) { - inc_rfc1001_len(&smb->hdr, wdata->subreq.len + 1); - put_bcc(wdata->subreq.len + 1, &smb->hdr); + in_len += wdata->subreq.len + 1; + put_bcc(wdata->subreq.len + 1, &req->hdr); } else { /* wct == 12 */ - struct smb_com_writex_req *smbw = - (struct smb_com_writex_req *)smb; - inc_rfc1001_len(&smbw->hdr, wdata->subreq.len + 5); - put_bcc(wdata->subreq.len + 5, &smbw->hdr); - iov[1].iov_len += 4; /* pad bigger by four bytes */ + struct smb_com_writex_req *reqw = + (struct smb_com_writex_req *)req; + in_len += wdata->subreq.len + 5; + put_bcc(wdata->subreq.len + 5, &reqw->hdr); + iov[0].iov_len += 4; /* pad bigger by four bytes */ } rc = cifs_call_async(tcon->ses->server, &rqst, NULL, @@ -1862,7 +1888,7 @@ cifs_async_writev(struct cifs_io_subrequest *wdata) cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); async_writev_out: - cifs_small_buf_release(smb); + cifs_small_buf_release(req); out: if (rc) { add_credits_and_wake_if(wdata->server, &wdata->credits, 0); @@ -1885,6 +1911,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, struct cifs_tcon *tcon = io_parms->tcon; unsigned int count = io_parms->length; struct kvec rsp_iov; + unsigned int in_len; *nbytes = 0; @@ -1896,12 +1923,13 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, wct = 12; if ((offset >> 32) > 0) { /* can not handle big offset for old srv */ - return -EIO; + return smb_EIO(smb_eio_trace_write_too_far); } } rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Pid = cpu_to_le16((__u16)pid); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16)); @@ -1920,16 +1948,16 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, pSMB->Remaining = 0; pSMB->DataOffset = - cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); + cpu_to_le16(offsetof(struct smb_com_write_req, Data)); pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF); pSMB->DataLengthHigh = cpu_to_le16(count >> 16); /* header + 1 byte pad */ - smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1; + smb_hdr_len = in_len + 1; if (wct == 14) - inc_rfc1001_len(pSMB, count + 1); + in_len += count + 1; else /* wct == 12 */ - inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */ + in_len += count + 5; /* smb data starts later */ if (wct == 14) pSMB->ByteCount = cpu_to_le16(count + 1); else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ { @@ -1951,7 +1979,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, cifs_dbg(FYI, "Send error Write2 = %d\n", rc); } else if (resp_buf_type == 0) { /* presumably this can not happen, but best to be safe */ - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_write_bad_buf_type, resp_buf_type); } else { WRITE_RSP *pSMBr = (WRITE_RSP *)rsp_iov.iov_base; *nbytes = le16_to_cpu(pSMBr->CountHigh); @@ -1983,6 +2011,7 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, LOCK_REQ *pSMB = NULL; struct kvec iov[2]; struct kvec rsp_iov; + unsigned int in_len; int resp_buf_type; __u16 count; @@ -1990,8 +2019,9 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, num_lock, num_unlock); rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->Timeout = 0; pSMB->NumberOfLocks = cpu_to_le16(num_lock); @@ -2001,11 +2031,11 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Fid = netfid; /* netfid stays le */ count = (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE); - inc_rfc1001_len(pSMB, count); + in_len += count; pSMB->ByteCount = cpu_to_le16(count); iov[0].iov_base = (char *)pSMB; - iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4 - + iov[0].iov_len = in_len - (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE); iov[1].iov_base = (char *)buf; iov[1].iov_len = (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE); @@ -2030,16 +2060,18 @@ CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; LOCK_REQ *pSMB = NULL; /* LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */ + unsigned int in_len; int bytes_returned; - int flags = 0; + int flags = CIFS_WINDOWS_LOCK | CIFS_INTERRUPTIBLE_WAIT; __u16 count; cifs_dbg(FYI, "CIFSSMBLock timeout %d numLock %d\n", (int)waitFlag, numLock); rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { /* no response expected */ @@ -2071,14 +2103,15 @@ CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon, /* oplock break */ count = 0; } - inc_rfc1001_len(pSMB, count); + in_len += count; pSMB->ByteCount = cpu_to_le16(count); if (waitFlag) - rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMB, &bytes_returned); + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, + (struct smb_hdr *) pSMB, &bytes_returned, + flags); else - rc = SendReceiveNoRsp(xid, tcon->ses, (char *)pSMB, flags); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *)pSMB, in_len, flags); cifs_small_buf_release(pSMB); cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); if (rc) @@ -2099,8 +2132,9 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; struct cifs_posix_lock *parm_data; + unsigned int in_len; int rc = 0; - int timeout = 0; + int sr_flags = CIFS_INTERRUPTIBLE_WAIT; int bytes_returned = 0; int resp_buf_type = 0; __u16 params, param_offset, offset, byte_count, count; @@ -2110,9 +2144,9 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, cifs_dbg(FYI, "Posix Lock\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); - - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMBr = (struct smb_com_transaction2_sfi_rsp *)pSMB; @@ -2121,7 +2155,7 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Reserved = 0; pSMB->Flags = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid); offset = param_offset + params; count = sizeof(struct cifs_posix_lock); @@ -2139,13 +2173,11 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, pSMB->TotalDataCount = pSMB->DataCount; pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->ParameterOffset = cpu_to_le16(param_offset); - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ - parm_data = (struct cifs_posix_lock *) - (((char *)pSMB) + offset + 4); + parm_data = (struct cifs_posix_lock *)(((char *)pSMB) + offset); parm_data->lock_type = cpu_to_le16(lock_type); if (waitFlag) { - timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */ + sr_flags |= CIFS_BLOCKING_OP; /* blocking operation, no timeout */ parm_data->lock_flags = cpu_to_le16(1); pSMB->Timeout = cpu_to_le32(-1); } else @@ -2159,16 +2191,17 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Fid = smb_file_id; pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); if (waitFlag) { - rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned); + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, + (struct smb_hdr *) pSMBr, &bytes_returned, + sr_flags); } else { iov[0].iov_base = (char *)pSMB; - iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4; + iov[0].iov_len = in_len; rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, - &resp_buf_type, timeout, &rsp_iov); + &resp_buf_type, sr_flags, &rsp_iov); pSMBr = (struct smb_com_transaction2_sfi_rsp *)rsp_iov.iov_base; } cifs_small_buf_release(pSMB); @@ -2182,13 +2215,15 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) { - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_lock_bcc_too_small, + get_bcc(&pSMBr->hdr), sizeof(*parm_data)); goto plk_err_exit; } data_offset = le16_to_cpu(pSMBr->t2.DataOffset); data_count = le16_to_cpu(pSMBr->t2.DataCount); if (data_count < sizeof(struct cifs_posix_lock)) { - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_lock_data_too_small, + data_count, sizeof(struct cifs_posix_lock)); goto plk_err_exit; } parm_data = (struct cifs_posix_lock *) @@ -2226,19 +2261,22 @@ CIFSSMBClose(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) { int rc = 0; CLOSE_REQ *pSMB = NULL; + unsigned int in_len; + cifs_dbg(FYI, "In CIFSSMBClose\n"); /* do not retry on dead session on close */ rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB); if (rc == -EAGAIN) return 0; - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->FileID = (__u16) smb_file_id; pSMB->LastWriteTime = 0xFFFFFFFF; pSMB->ByteCount = 0; - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); cifs_stats_inc(&tcon->stats.cifs_stats.num_closes); if (rc) { @@ -2260,15 +2298,18 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) { int rc = 0; FLUSH_REQ *pSMB = NULL; + unsigned int in_len; + cifs_dbg(FYI, "In CIFSSMBFlush\n"); rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->FileID = (__u16) smb_file_id; pSMB->ByteCount = 0; - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); cifs_stats_inc(&tcon->stats.cifs_stats.num_flushes); if (rc) @@ -2285,6 +2326,7 @@ int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; RENAME_REQ *pSMB = NULL; RENAME_RSP *pSMBr = NULL; + unsigned int in_len; int bytes_returned; int name_len, name_len2; __u16 count; @@ -2294,8 +2336,9 @@ int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, renameRetry: rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->BufferFormat = 0x04; pSMB->SearchAttributes = @@ -2325,10 +2368,10 @@ renameRetry: } count = 1 /* 1st signature byte */ + name_len + name_len2; - inc_rfc1001_len(pSMB, count); + in_len += count; pSMB->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_renames); if (rc) @@ -2349,6 +2392,7 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; struct set_file_rename *rename_info; + unsigned int in_len; char *data_offset; char dummy_string[30]; int rc = 0; @@ -2359,8 +2403,9 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, cifs_dbg(FYI, "Rename to File by handle\n"); rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 6; pSMB->MaxSetupCount = 0; @@ -2368,11 +2413,10 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid); offset = param_offset + params; - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ - data_offset = (char *)(pSMB) + offset + 4; + data_offset = (char *)(pSMB) + offset; rename_info = (struct set_file_rename *) data_offset; pSMB->MaxParameterCount = cpu_to_le16(2); pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB from sess */ @@ -2408,9 +2452,9 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&pTcon->stats.cifs_stats.num_t2renames); if (rc) @@ -2432,6 +2476,7 @@ CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; + unsigned int in_len; char *data_offset; int name_len; int name_len_target; @@ -2443,8 +2488,9 @@ CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, createSymLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -2464,11 +2510,10 @@ createSymLinkRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ - data_offset = (char *)pSMB + offset + 4; + data_offset = (char *)pSMB + offset; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len_target = cifsConvertToUTF16((__le16 *) data_offset, toName, @@ -2495,9 +2540,9 @@ createSymLinkRetry: pSMB->DataOffset = cpu_to_le16(offset); pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_symlinks); if (rc) @@ -2519,6 +2564,7 @@ CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; + unsigned int in_len; char *data_offset; int name_len; int name_len_target; @@ -2530,8 +2576,9 @@ CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, createHardLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, toName, @@ -2549,11 +2596,10 @@ createHardLinkRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ - data_offset = (char *)pSMB + offset + 4; + data_offset = (char *)pSMB + offset; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len_target = cifsConvertToUTF16((__le16 *) data_offset, fromName, @@ -2579,9 +2625,9 @@ createHardLinkRetry: pSMB->DataOffset = cpu_to_le16(offset); pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks); if (rc) @@ -2604,6 +2650,7 @@ int CIFSCreateHardLink(const unsigned int xid, int rc = 0; NT_RENAME_REQ *pSMB = NULL; RENAME_RSP *pSMBr = NULL; + unsigned int in_len; int bytes_returned; int name_len, name_len2; __u16 count; @@ -2614,8 +2661,9 @@ winCreateHardLinkRetry: rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->SearchAttributes = cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | @@ -2649,10 +2697,10 @@ winCreateHardLinkRetry: } count = 1 /* string type byte */ + name_len + name_len2; - inc_rfc1001_len(pSMB, count); + in_len += count; pSMB->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks); if (rc) @@ -2673,6 +2721,7 @@ CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, /* SMB_QUERY_FILE_UNIX_LINK */ TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; int name_len; @@ -2684,8 +2733,9 @@ CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, querySymLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -2708,7 +2758,7 @@ querySymLinkRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req, InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -2719,10 +2769,10 @@ querySymLinkRetry: pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QuerySymLinkInfo = %d\n", rc); @@ -2732,7 +2782,8 @@ querySymLinkRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); /* BB also check enough total bytes returned */ if (rc || get_bcc(&pSMBr->hdr) < 2) - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qsym_bcc_too_small, + get_bcc(&pSMBr->hdr), 2); else { bool is_unicode; u16 count = le16_to_cpu(pSMBr->t2.DataCount); @@ -2770,6 +2821,7 @@ int cifs_query_reparse_point(const unsigned int xid, TRANSACT_IOCTL_REQ *io_req = NULL; TRANSACT_IOCTL_RSP *io_rsp = NULL; struct cifs_fid fid; + unsigned int in_len; __u32 data_offset, data_count, len; __u8 *start, *end; int io_rsp_len; @@ -2801,8 +2853,9 @@ int cifs_query_reparse_point(const unsigned int xid, rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **)&io_req, (void **)&io_rsp); - if (rc) + if (rc < 0) goto error; + in_len = rc; io_req->TotalParameterCount = 0; io_req->TotalDataCount = 0; @@ -2823,7 +2876,7 @@ int cifs_query_reparse_point(const unsigned int xid, io_req->Fid = fid.netfid; io_req->ByteCount = 0; - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req, in_len, (struct smb_hdr *)io_rsp, &io_rsp_len, 0); if (rc) goto error; @@ -2832,13 +2885,15 @@ int cifs_query_reparse_point(const unsigned int xid, data_count = le32_to_cpu(io_rsp->DataCount); if (get_bcc(&io_rsp->hdr) < 2 || data_offset > 512 || !data_count || data_count > 2048) { - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qreparse_sizes_wrong, + get_bcc(&io_rsp->hdr), data_count); goto error; } /* SetupCount must be 1, otherwise offset to ByteCount is incorrect. */ if (io_rsp->SetupCount != 1) { - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qreparse_setup_count, + io_rsp->SetupCount, 1); goto error; } @@ -2848,14 +2903,17 @@ int cifs_query_reparse_point(const unsigned int xid, * Check that we have full FSCTL_GET_REPARSE_POINT buffer. */ if (data_count != le16_to_cpu(io_rsp->ReturnedDataLen)) { - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qreparse_ret_datalen, + data_count, le16_to_cpu(io_rsp->ReturnedDataLen)); goto error; } end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount; start = (__u8 *)&io_rsp->hdr.Protocol + data_offset; if (start >= end) { - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qreparse_data_area, + (unsigned long)start - (unsigned long)io_rsp, + (unsigned long)end - (unsigned long)io_rsp); goto error; } @@ -2864,7 +2922,8 @@ int cifs_query_reparse_point(const unsigned int xid, len = sizeof(*buf); if (data_count < len || data_count < le16_to_cpu(buf->ReparseDataLength) + len) { - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qreparse_rep_datalen, + data_count, le16_to_cpu(buf->ReparseDataLength) + len); goto error; } @@ -2897,7 +2956,7 @@ struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, struct kvec in_iov[2]; struct kvec out_iov; struct cifs_fid fid; - int io_req_len; + unsigned int in_len; int oplock = 0; int buf_type = 0; int rc; @@ -2953,12 +3012,10 @@ struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, #endif rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **)&io_req, NULL); - if (rc) + if (rc < 0) goto out_close; - - inc_rfc1001_len(io_req, sizeof(io_req->Pad)); - - io_req_len = be32_to_cpu(io_req->hdr.smb_buf_length) + sizeof(io_req->hdr.smb_buf_length); + in_len = rc; + in_len += sizeof(io_req->Pad); /* NT IOCTL response contains one-word long output setup buffer with size of output data. */ io_req->MaxSetupCount = 1; @@ -2972,8 +3029,7 @@ struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, io_req->ParameterCount = io_req->TotalParameterCount; io_req->ParameterOffset = cpu_to_le32(0); io_req->DataCount = io_req->TotalDataCount; - io_req->DataOffset = cpu_to_le32(offsetof(typeof(*io_req), Data) - - sizeof(io_req->hdr.smb_buf_length)); + io_req->DataOffset = cpu_to_le32(offsetof(typeof(*io_req), Data)); io_req->SetupCount = 4; io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); io_req->FunctionCode = cpu_to_le32(FSCTL_SET_REPARSE_POINT); @@ -2982,10 +3038,8 @@ struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, io_req->IsRootFlag = 0; io_req->ByteCount = cpu_to_le16(le32_to_cpu(io_req->DataCount) + sizeof(io_req->Pad)); - inc_rfc1001_len(io_req, reparse_iov->iov_len); - in_iov[0].iov_base = (char *)io_req; - in_iov[0].iov_len = io_req_len; + in_iov[0].iov_len = in_len; in_iov[1] = *reparse_iov; rc = SendReceive2(xid, tcon->ses, in_iov, ARRAY_SIZE(in_iov), &buf_type, CIFS_NO_RSP_BUF, &out_iov); @@ -3017,12 +3071,14 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; struct smb_com_transaction_compr_ioctl_req *pSMB; struct smb_com_transaction_ioctl_rsp *pSMBr; + unsigned int in_len; cifs_dbg(FYI, "Set compression for %u\n", fid); rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT); @@ -3036,7 +3092,7 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, pSMB->DataCount = cpu_to_le32(2); pSMB->DataOffset = cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req, - compression_state) - 4); /* 84 */ + compression_state)); /* 84 */ pSMB->SetupCount = 4; pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); pSMB->ParameterCount = 0; @@ -3046,9 +3102,9 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Fid = fid; /* file handle always le */ /* 3 byte pad, followed by 2 byte compress state */ pSMB->ByteCount = cpu_to_le16(5); - inc_rfc1001_len(pSMB, 5); + in_len += 5; - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "Send error in SetCompression = %d\n", rc); @@ -3246,6 +3302,7 @@ int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon, /* SMB_QUERY_POSIX_ACL */ TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; int name_len; @@ -3256,8 +3313,9 @@ int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon, queryAclRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -3284,7 +3342,7 @@ queryAclRetry: pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16( offsetof(struct smb_com_transaction2_qpi_req, - InformationLevel) - 4); + InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -3295,10 +3353,10 @@ queryAclRetry: pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get); if (rc) { @@ -3309,7 +3367,8 @@ queryAclRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); /* BB also check enough total bytes returned */ if (rc || get_bcc(&pSMBr->hdr) < 2) - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_getacl_bcc_too_small, + get_bcc(&pSMBr->hdr), 2); else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); __u16 count = le16_to_cpu(pSMBr->t2.DataCount); @@ -3336,6 +3395,7 @@ int cifs_do_set_acl(const unsigned int xid, struct cifs_tcon *tcon, { struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; + unsigned int in_len; char *parm_data; int name_len; int rc = 0; @@ -3346,8 +3406,9 @@ int cifs_do_set_acl(const unsigned int xid, struct cifs_tcon *tcon, setAclRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, @@ -3367,9 +3428,9 @@ setAclRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; - parm_data = ((char *)pSMB) + sizeof(pSMB->hdr.smb_buf_length) + offset; + parm_data = ((char *)pSMB) + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); /* convert to on the wire format for POSIX ACL */ @@ -3390,9 +3451,9 @@ setAclRetry: pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "Set POSIX ACL returned %d\n", rc); @@ -3428,6 +3489,7 @@ CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; struct smb_t2_qfi_req *pSMB = NULL; struct smb_t2_qfi_rsp *pSMBr = NULL; + unsigned int in_len; int bytes_returned; __u16 params, byte_count; @@ -3438,8 +3500,9 @@ CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, GetExtAttrRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2 /* level */ + 2 /* fid */; pSMB->t2.TotalDataCount = 0; @@ -3452,7 +3515,7 @@ GetExtAttrRetry: pSMB->t2.Timeout = 0; pSMB->t2.Reserved2 = 0; pSMB->t2.ParameterOffset = cpu_to_le16(offsetof(struct smb_t2_qfi_req, - Fid) - 4); + Fid)); pSMB->t2.DataCount = 0; pSMB->t2.DataOffset = 0; pSMB->t2.SetupCount = 1; @@ -3464,10 +3527,10 @@ GetExtAttrRetry: pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); pSMB->Pad = 0; pSMB->Fid = netfid; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->t2.ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "error %d in GetExtAttr\n", rc); @@ -3478,7 +3541,8 @@ GetExtAttrRetry: if (rc || get_bcc(&pSMBr->hdr) < 2) /* If rc should we check for EOPNOSUPP and disable the srvino flag? or in caller? */ - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_getextattr_bcc_too_small, + get_bcc(&pSMBr->hdr), 2); else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); __u16 count = le16_to_cpu(pSMBr->t2.DataCount); @@ -3486,7 +3550,8 @@ GetExtAttrRetry: if (count != 16) { cifs_dbg(FYI, "Invalid size ret in GetExtAttr\n"); - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_getextattr_inv_size, + count, 16); goto GetExtAttrOut; } pfinfo = (struct file_chattr_info *) @@ -3520,11 +3585,13 @@ smb_init_nttransact(const __u16 sub_command, const int setup_count, int rc; __u32 temp_offset; struct smb_com_ntransact_req *pSMB; + unsigned int in_len; rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon, (void **)&pSMB); - if (rc) + if (rc < 0) return rc; + in_len = rc; *ret_buf = (void *)pSMB; pSMB->Reserved = 0; pSMB->TotalParameterCount = cpu_to_le32(parm_len); @@ -3533,12 +3600,12 @@ smb_init_nttransact(const __u16 sub_command, const int setup_count, pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->DataCount = pSMB->TotalDataCount; temp_offset = offsetof(struct smb_com_ntransact_req, Parms) + - (setup_count * 2) - 4 /* for rfc1001 length itself */; + (setup_count * 2); pSMB->ParameterOffset = cpu_to_le32(temp_offset); pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len); pSMB->SetupCount = setup_count; /* no need to le convert byte fields */ pSMB->SubCommand = cpu_to_le16(sub_command); - return 0; + return in_len; } static int @@ -3604,6 +3671,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, QUERY_SEC_DESC_REQ *pSMB; struct kvec iov[1]; struct kvec rsp_iov; + unsigned int in_len; cifs_dbg(FYI, "GetCifsACL\n"); @@ -3612,8 +3680,9 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, rc = smb_init_nttransact(NT_TRANSACT_QUERY_SECURITY_DESC, 0, 8 /* parm len */, tcon, (void **) &pSMB); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->MaxParameterCount = cpu_to_le32(4); /* BB TEST with big acls that might need to be e.g. larger than 16K */ @@ -3621,9 +3690,9 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, pSMB->Fid = fid; /* file handle always le */ pSMB->AclFlags = cpu_to_le32(info); pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */ - inc_rfc1001_len(pSMB, 11); + in_len += 11; iov[0].iov_base = (char *)pSMB; - iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4; + iov[0].iov_len = in_len; rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0, &rsp_iov); @@ -3649,7 +3718,8 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, pSMBr, parm, *acl_inf); if (le32_to_cpu(pSMBr->ParameterCount) != 4) { - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_getcifsacl_param_count, + le32_to_cpu(pSMBr->ParameterCount), 4); *pbuflen = 0; goto qsec_out; } @@ -3692,18 +3762,20 @@ CIFSSMBSetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, int rc = 0; int bytes_returned = 0; SET_SEC_DESC_REQ *pSMB = NULL; + unsigned int in_len; void *pSMBr; setCifsAclRetry: rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; param_count = 8; - param_offset = offsetof(struct smb_com_transaction_ssec_req, Fid) - 4; + param_offset = offsetof(struct smb_com_transaction_ssec_req, Fid); data_count = acllen; data_offset = param_offset + param_count; byte_count = 3 /* pad */ + param_count; @@ -3725,13 +3797,12 @@ setCifsAclRetry: pSMB->AclFlags = cpu_to_le32(aclflag); if (pntsd && acllen) { - memcpy((char *)pSMBr + offsetof(struct smb_hdr, Protocol) + - data_offset, pntsd, acllen); - inc_rfc1001_len(pSMB, byte_count + data_count); + memcpy((char *)pSMBr + data_offset, pntsd, acllen); + in_len += byte_count + data_count; } else - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_dbg(FYI, "SetCIFSACL bytes_returned: %d, rc: %d\n", @@ -3756,6 +3827,7 @@ SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, { QUERY_INFORMATION_REQ *pSMB; QUERY_INFORMATION_RSP *pSMBr; + unsigned int in_len; int rc = 0; int bytes_returned; int name_len; @@ -3764,8 +3836,9 @@ SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, QInfRetry: rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -3779,10 +3852,10 @@ QInfRetry: } pSMB->BufferFormat = 0x04; name_len++; /* account for buffer type byte */ - inc_rfc1001_len(pSMB, (__u16)name_len); + in_len += name_len; pSMB->ByteCount = cpu_to_le16(name_len); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QueryInfo = %d\n", rc); @@ -3804,8 +3877,10 @@ QInfRetry: data->EndOfFile = data->AllocationSize; data->Attributes = cpu_to_le32(le16_to_cpu(pSMBr->attr)); - } else - rc = -EIO; /* bad buffer passed in */ + } else { + /* bad buffer passed in */ + rc = smb_EIO(smb_eio_trace_null_pointers); + } cifs_buf_release(pSMB); @@ -3821,6 +3896,7 @@ CIFSSMBQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, { struct smb_t2_qfi_req *pSMB = NULL; struct smb_t2_qfi_rsp *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; __u16 params, byte_count; @@ -3828,8 +3904,9 @@ CIFSSMBQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, QFileInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2 /* level */ + 2 /* fid */; pSMB->t2.TotalDataCount = 0; @@ -3842,7 +3919,7 @@ QFileInfoRetry: pSMB->t2.Timeout = 0; pSMB->t2.Reserved2 = 0; pSMB->t2.ParameterOffset = cpu_to_le16(offsetof(struct smb_t2_qfi_req, - Fid) - 4); + Fid)); pSMB->t2.DataCount = 0; pSMB->t2.DataOffset = 0; pSMB->t2.SetupCount = 1; @@ -3854,10 +3931,10 @@ QFileInfoRetry: pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); pSMB->Pad = 0; pSMB->Fid = netfid; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->t2.ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QFileInfo = %d\n", rc); @@ -3865,9 +3942,11 @@ QFileInfoRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc) /* BB add auto retry on EOPNOTSUPP? */ - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qfileinfo_invalid, + get_bcc(&pSMBr->hdr), 40); else if (get_bcc(&pSMBr->hdr) < 40) - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qfileinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 40); else if (pFindData) { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); memcpy((char *) pFindData, @@ -3892,6 +3971,7 @@ CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, /* level 263 SMB_QUERY_FILE_ALL_INFO */ TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; int name_len; @@ -3901,8 +3981,9 @@ CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, QPathInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -3925,7 +4006,7 @@ QPathInfoRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req, InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -3939,10 +4020,10 @@ QPathInfoRetry: else pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); @@ -3950,12 +4031,15 @@ QPathInfoRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc) /* BB add auto retry on EOPNOTSUPP? */ - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qpathinfo_invalid, + get_bcc(&pSMBr->hdr), 40); else if (!legacy && get_bcc(&pSMBr->hdr) < 40) - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qpathinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 40); else if (legacy && get_bcc(&pSMBr->hdr) < 24) - rc = -EIO; /* 24 or 26 expected but we do not read - last field */ + /* 24 or 26 expected but we do not read last field */ + rc = smb_EIO2(smb_eio_trace_qpathinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 24); else if (data) { int size; __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); @@ -3988,6 +4072,7 @@ CIFSSMBUnixQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, { struct smb_t2_qfi_req *pSMB = NULL; struct smb_t2_qfi_rsp *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; __u16 params, byte_count; @@ -3995,8 +4080,9 @@ CIFSSMBUnixQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, UnixQFileInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2 /* level */ + 2 /* fid */; pSMB->t2.TotalDataCount = 0; @@ -4009,7 +4095,7 @@ UnixQFileInfoRetry: pSMB->t2.Timeout = 0; pSMB->t2.Reserved2 = 0; pSMB->t2.ParameterOffset = cpu_to_le16(offsetof(struct smb_t2_qfi_req, - Fid) - 4); + Fid)); pSMB->t2.DataCount = 0; pSMB->t2.DataOffset = 0; pSMB->t2.SetupCount = 1; @@ -4021,10 +4107,10 @@ UnixQFileInfoRetry: pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); pSMB->Pad = 0; pSMB->Fid = netfid; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->t2.ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in UnixQFileInfo = %d\n", rc); @@ -4033,7 +4119,8 @@ UnixQFileInfoRetry: if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) { cifs_dbg(VFS, "Malformed FILE_UNIX_BASIC_INFO response. Unix Extensions can be disabled on mount by specifying the nosfu mount option.\n"); - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_unixqfileinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), sizeof(FILE_UNIX_BASIC_INFO)); } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); memcpy((char *) pFindData, @@ -4059,6 +4146,7 @@ CIFSSMBUnixQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, /* SMB_QUERY_FILE_UNIX_BASIC */ TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned = 0; int name_len; @@ -4068,8 +4156,9 @@ CIFSSMBUnixQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, UnixQPathInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -4092,7 +4181,7 @@ UnixQPathInfoRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req, InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -4103,10 +4192,10 @@ UnixQPathInfoRetry: pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in UnixQPathInfo = %d\n", rc); @@ -4115,7 +4204,8 @@ UnixQPathInfoRetry: if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) { cifs_dbg(VFS, "Malformed FILE_UNIX_BASIC_INFO response. Unix Extensions can be disabled on mount by specifying the nosfu mount option.\n"); - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_unixqpathinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), sizeof(FILE_UNIX_BASIC_INFO)); } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); memcpy((char *) pFindData, @@ -4143,7 +4233,7 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, TRANSACTION2_FFIRST_RSP *pSMBr = NULL; T2_FFIRST_RSP_PARMS *parms; struct nls_table *nls_codepage; - unsigned int lnoff; + unsigned int in_len, lnoff; __u16 params, byte_count; int bytes_returned = 0; int name_len, remap; @@ -4154,8 +4244,9 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, findFirstRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; nls_codepage = cifs_sb->local_nls; remap = cifs_remap(cifs_sb); @@ -4215,8 +4306,7 @@ findFirstRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16( - offsetof(struct smb_com_transaction2_ffirst_req, SearchAttributes) - - 4); + offsetof(struct smb_com_transaction2_ffirst_req, SearchAttributes)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; /* one byte, no need to make endian neutral */ @@ -4231,10 +4321,10 @@ findFirstRetry: /* BB what should we set StorageType to? Does it matter? BB */ pSMB->SearchStorageType = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_ffirst); @@ -4293,7 +4383,7 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, TRANSACTION2_FNEXT_REQ *pSMB = NULL; TRANSACTION2_FNEXT_RSP *pSMBr = NULL; T2_FNEXT_RSP_PARMS *parms; - unsigned int name_len; + unsigned int name_len, in_len; unsigned int lnoff; __u16 params, byte_count; char *response_data; @@ -4307,8 +4397,9 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 14; /* includes 2 bytes of null string, converted to LE below*/ byte_count = 0; @@ -4321,7 +4412,7 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16( - offsetof(struct smb_com_transaction2_fnext_req,SearchHandle) - 4); + offsetof(struct smb_com_transaction2_fnext_req, SearchHandle)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -4349,10 +4440,10 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, byte_count = params + 1 /* pad */ ; pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_fnext); @@ -4418,6 +4509,7 @@ CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, { int rc = 0; FINDCLOSE_REQ *pSMB = NULL; + unsigned int in_len; cifs_dbg(FYI, "In CIFSSMBFindClose\n"); rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB); @@ -4426,12 +4518,13 @@ CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, as file handle has been closed */ if (rc == -EAGAIN) return 0; - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->FileID = searchHandle; pSMB->ByteCount = 0; - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); if (rc) cifs_dbg(VFS, "Send error in FindClose = %d\n", rc); @@ -4453,6 +4546,7 @@ CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; + unsigned int in_len; int name_len, bytes_returned; __u16 params, byte_count; @@ -4463,8 +4557,9 @@ CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, GetInodeNumberRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -4488,7 +4583,7 @@ GetInodeNumberRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req, InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -4499,10 +4594,10 @@ GetInodeNumberRetry: pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "error %d in QueryInternalInfo\n", rc); @@ -4513,7 +4608,8 @@ GetInodeNumberRetry: if (rc || get_bcc(&pSMBr->hdr) < 2) /* If rc should we check for EOPNOSUPP and disable the srvino flag? or in caller? */ - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_getsrvinonum_bcc_too_small, + get_bcc(&pSMBr->hdr), 2); else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); __u16 count = le16_to_cpu(pSMBr->t2.DataCount); @@ -4521,7 +4617,8 @@ GetInodeNumberRetry: /* BB Do we need a cast or hash here ? */ if (count < 8) { cifs_dbg(FYI, "Invalid size ret in QryIntrnlInf\n"); - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_getsrvinonum_size, + count, 8); goto GetInodeNumOut; } pfinfo = (struct file_internal_info *) @@ -4545,6 +4642,7 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, /* TRANS2_GET_DFS_REFERRAL */ TRANSACTION2_GET_DFS_REFER_REQ *pSMB = NULL; TRANSACTION2_GET_DFS_REFER_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned; int name_len; @@ -4564,8 +4662,9 @@ getDFSRetry: */ rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **)&pSMB, (void **)&pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; /* server pointer checked in called function, but should never be null here anyway */ @@ -4607,7 +4706,7 @@ getDFSRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_get_dfs_refer_req, MaxReferralLevel) - 4); + struct smb_com_transaction2_get_dfs_refer_req, MaxReferralLevel)); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_GET_DFS_REFERRAL); @@ -4615,10 +4714,10 @@ getDFSRetry: pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->MaxReferralLevel = cpu_to_le16(3); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in GetDFSRefer = %d\n", rc); @@ -4628,7 +4727,8 @@ getDFSRetry: /* BB Also check if enough total bytes returned? */ if (rc || get_bcc(&pSMBr->hdr) < 17) { - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_getdfsrefer_bcc_too_small, + get_bcc(&pSMBr->hdr), 17); goto GetDFSRefExit; } @@ -4660,6 +4760,7 @@ SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, TRANSACTION2_QFSI_REQ *pSMB = NULL; TRANSACTION2_QFSI_RSP *pSMBr = NULL; FILE_SYSTEM_ALLOC_INFO *response_data; + unsigned int in_len; int rc = 0; int bytes_returned = 0; __u16 params, byte_count; @@ -4668,8 +4769,9 @@ SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, oldQFSInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2; /* level */ pSMB->TotalDataCount = 0; @@ -4684,17 +4786,17 @@ oldQFSInfoRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); + struct smb_com_transaction2_qfsi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QFSInfo = %d\n", rc); @@ -4702,7 +4804,8 @@ oldQFSInfoRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < 18) - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_oldqfsinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 18); else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); cifs_dbg(FYI, "qfsinf resp BCC: %d Offset %d\n", @@ -4747,6 +4850,7 @@ CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, TRANSACTION2_QFSI_REQ *pSMB = NULL; TRANSACTION2_QFSI_RSP *pSMBr = NULL; FILE_SYSTEM_SIZE_INFO *response_data; + unsigned int in_len; int rc = 0; int bytes_returned = 0; __u16 params, byte_count; @@ -4755,8 +4859,9 @@ CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, QFSInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2; /* level */ pSMB->TotalDataCount = 0; @@ -4771,17 +4876,17 @@ QFSInfoRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); + struct smb_com_transaction2_qfsi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QFSInfo = %d\n", rc); @@ -4789,7 +4894,8 @@ QFSInfoRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < 24) - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qfsinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 24); else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); @@ -4833,6 +4939,7 @@ CIFSSMBQFSAttributeInfo(const unsigned int xid, struct cifs_tcon *tcon) TRANSACTION2_QFSI_REQ *pSMB = NULL; TRANSACTION2_QFSI_RSP *pSMBr = NULL; FILE_SYSTEM_ATTRIBUTE_INFO *response_data; + unsigned int in_len; int rc = 0; int bytes_returned = 0; __u16 params, byte_count; @@ -4841,8 +4948,9 @@ CIFSSMBQFSAttributeInfo(const unsigned int xid, struct cifs_tcon *tcon) QFSAttributeRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2; /* level */ pSMB->TotalDataCount = 0; @@ -4858,17 +4966,17 @@ QFSAttributeRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); + struct smb_com_transaction2_qfsi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(VFS, "Send error in QFSAttributeInfo = %d\n", rc); @@ -4877,7 +4985,8 @@ QFSAttributeRetry: if (rc || get_bcc(&pSMBr->hdr) < 13) { /* BB also check if enough bytes returned */ - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qfsattrinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 13); } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); response_data = @@ -4903,6 +5012,7 @@ CIFSSMBQFSDeviceInfo(const unsigned int xid, struct cifs_tcon *tcon) TRANSACTION2_QFSI_REQ *pSMB = NULL; TRANSACTION2_QFSI_RSP *pSMBr = NULL; FILE_SYSTEM_DEVICE_INFO *response_data; + unsigned int in_len; int rc = 0; int bytes_returned = 0; __u16 params, byte_count; @@ -4911,8 +5021,9 @@ CIFSSMBQFSDeviceInfo(const unsigned int xid, struct cifs_tcon *tcon) QFSDeviceRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2; /* level */ pSMB->TotalDataCount = 0; @@ -4928,7 +5039,7 @@ QFSDeviceRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); + struct smb_com_transaction2_qfsi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; @@ -4936,10 +5047,10 @@ QFSDeviceRetry: pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QFSDeviceInfo = %d\n", rc); @@ -4948,7 +5059,9 @@ QFSDeviceRetry: if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_SYSTEM_DEVICE_INFO)) - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qfsdevinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), + sizeof(FILE_SYSTEM_DEVICE_INFO)); else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); response_data = @@ -4974,6 +5087,7 @@ CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon) TRANSACTION2_QFSI_REQ *pSMB = NULL; TRANSACTION2_QFSI_RSP *pSMBr = NULL; FILE_SYSTEM_UNIX_INFO *response_data; + unsigned int in_len; int rc = 0; int bytes_returned = 0; __u16 params, byte_count; @@ -4982,8 +5096,9 @@ CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon) QFSUnixRetry: rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2; /* level */ pSMB->TotalDataCount = 0; @@ -5001,15 +5116,15 @@ QFSUnixRetry: pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof(struct - smb_com_transaction2_qfsi_req, InformationLevel) - 4); + smb_com_transaction2_qfsi_req, InformationLevel)); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(VFS, "Send error in QFSUnixInfo = %d\n", rc); @@ -5017,7 +5132,8 @@ QFSUnixRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < 13) { - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qfsunixinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 13); } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); response_data = @@ -5043,6 +5159,7 @@ CIFSSMBSetFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon, __u64 cap) /* level 0x200 SMB_SET_CIFS_UNIX_INFO */ TRANSACTION2_SETFSI_REQ *pSMB = NULL; TRANSACTION2_SETFSI_RSP *pSMBr = NULL; + unsigned int in_len; int rc = 0; int bytes_returned = 0; __u16 params, param_offset, offset, byte_count; @@ -5052,8 +5169,9 @@ SETFSUnixRetry: /* BB switch to small buf init to save memory */ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 4; /* 2 bytes zero followed by info level. */ pSMB->MaxSetupCount = 0; @@ -5061,8 +5179,7 @@ SETFSUnixRetry: pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_setfsi_req, FileNum) - - 4; + param_offset = offsetof(struct smb_com_transaction2_setfsi_req, FileNum); offset = param_offset + params; pSMB->MaxParameterCount = cpu_to_le16(4); @@ -5089,10 +5206,10 @@ SETFSUnixRetry: pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION); pSMB->ClientUnixCap = cpu_to_le64(cap); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(VFS, "Send error in SETFSUnixInfo = %d\n", rc); @@ -5119,6 +5236,7 @@ CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, TRANSACTION2_QFSI_REQ *pSMB = NULL; TRANSACTION2_QFSI_RSP *pSMBr = NULL; FILE_SYSTEM_POSIX_INFO *response_data; + unsigned int in_len; int rc = 0; int bytes_returned = 0; __u16 params, byte_count; @@ -5127,8 +5245,9 @@ CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, QFSPosixRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; params = 2; /* level */ pSMB->TotalDataCount = 0; @@ -5146,15 +5265,15 @@ QFSPosixRetry: pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof(struct - smb_com_transaction2_qfsi_req, InformationLevel) - 4); + smb_com_transaction2_qfsi_req, InformationLevel)); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO); - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QFSUnixInfo = %d\n", rc); @@ -5162,7 +5281,8 @@ QFSPosixRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < 13) { - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qfsposixinfo_bcc_too_small, + get_bcc(&pSMBr->hdr), 13); } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); response_data = @@ -5219,6 +5339,7 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; struct file_end_of_file_info *parm_data; + unsigned int in_len; int name_len; int rc = 0; int bytes_returned = 0; @@ -5230,8 +5351,9 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, SetEOFRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -5252,7 +5374,7 @@ SetEOFRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; if (set_allocation) { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) @@ -5284,10 +5406,10 @@ SetEOFRetry: pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; parm_data->FileSize = cpu_to_le64(size); pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "SetPathInfo (file size) returned %d\n", rc); @@ -5306,15 +5428,16 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct file_end_of_file_info *parm_data; + unsigned int in_len; int rc = 0; __u16 params, param_offset, offset, byte_count, count; cifs_dbg(FYI, "SetFileSize (via SetFileInfo) %lld\n", (long long)size); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); - - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Pid = cpu_to_le16((__u16)cfile->pid); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(cfile->pid >> 16)); @@ -5325,7 +5448,7 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid); offset = param_offset + params; count = sizeof(struct file_end_of_file_info); @@ -5341,9 +5464,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, pSMB->TotalDataCount = pSMB->DataCount; pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->ParameterOffset = cpu_to_le16(param_offset); - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ parm_data = - (struct file_end_of_file_info *)(((char *)pSMB) + offset + 4); + (struct file_end_of_file_info *)(((char *)pSMB) + offset); pSMB->DataOffset = cpu_to_le16(offset); parm_data->FileSize = cpu_to_le64(size); pSMB->Fid = cfile->fid.netfid; @@ -5363,9 +5485,9 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); } pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); if (rc) { cifs_dbg(FYI, "Send error in SetFileInfo (SetFileSize) = %d\n", @@ -5387,6 +5509,7 @@ SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, SETATTR_REQ *pSMB; SETATTR_RSP *pSMBr; struct timespec64 ts; + unsigned int in_len; int bytes_returned; int name_len; int rc; @@ -5396,8 +5519,9 @@ SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, retry: rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -5419,10 +5543,10 @@ retry: } pSMB->BufferFormat = 0x04; name_len++; /* account for buffer type byte */ - inc_rfc1001_len(pSMB, (__u16)name_len); + in_len += name_len; pSMB->ByteCount = cpu_to_le16(name_len); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "Send error in %s = %d\n", __func__, rc); @@ -5446,15 +5570,16 @@ CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; + unsigned int in_len; char *data_offset; int rc = 0; __u16 params, param_offset, offset, byte_count, count; cifs_dbg(FYI, "Set Times (via SetFileInfo)\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); - - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); @@ -5465,11 +5590,10 @@ CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid); offset = param_offset + params; - data_offset = (char *)pSMB + - offsetof(struct smb_hdr, Protocol) + offset; + data_offset = (char *)pSMB + offset; count = sizeof(FILE_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); @@ -5491,10 +5615,10 @@ CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, else pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); if (rc) cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n", @@ -5511,15 +5635,16 @@ CIFSSMBSetFileDisposition(const unsigned int xid, struct cifs_tcon *tcon, bool delete_file, __u16 fid, __u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; + unsigned int in_len; char *data_offset; int rc = 0; __u16 params, param_offset, offset, byte_count, count; cifs_dbg(FYI, "Set File Disposition (via SetFileInfo)\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); - - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); @@ -5530,11 +5655,9 @@ CIFSSMBSetFileDisposition(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid); offset = param_offset + params; - - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ - data_offset = (char *)(pSMB) + offset + 4; + data_offset = (char *)(pSMB) + offset; count = 1; pSMB->MaxParameterCount = cpu_to_le16(2); @@ -5553,10 +5676,10 @@ CIFSSMBSetFileDisposition(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Fid = fid; pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); *data_offset = delete_file ? 1 : 0; - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); if (rc) cifs_dbg(FYI, "Send error in SetFileDisposition = %d\n", rc); @@ -5604,6 +5727,7 @@ CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; + unsigned int in_len; int name_len; int rc = 0; int bytes_returned = 0; @@ -5616,8 +5740,9 @@ CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, SetTimesRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -5640,7 +5765,7 @@ SetTimesRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; data_offset = (char *)pSMB + offsetof(typeof(*pSMB), hdr.Protocol) + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); @@ -5659,10 +5784,10 @@ SetTimesRetry: else pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "SetPathInfo (times) returned %d\n", rc); @@ -5732,15 +5857,16 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, u16 fid, u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; + unsigned int in_len; char *data_offset; int rc = 0; u16 params, param_offset, offset, byte_count, count; cifs_dbg(FYI, "Set Unix Info (via SetFileInfo)\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); - - if (rc) + if (rc < 0) return rc; + in_len = rc; pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); @@ -5751,11 +5877,10 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid); offset = param_offset + params; - data_offset = (char *)pSMB + - offsetof(struct smb_hdr, Protocol) + offset; + data_offset = (char *)pSMB + offset; count = sizeof(FILE_UNIX_BASIC_INFO); @@ -5775,12 +5900,12 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, pSMB->Fid = fid; pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); cifs_fill_unix_set_info((FILE_UNIX_BASIC_INFO *)data_offset, args); - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, in_len, 0); cifs_small_buf_release(pSMB); if (rc) cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n", @@ -5800,6 +5925,7 @@ CIFSSMBUnixSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; + unsigned int in_len; int name_len; int rc = 0; int bytes_returned = 0; @@ -5810,8 +5936,9 @@ CIFSSMBUnixSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, setPermsRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -5834,10 +5961,9 @@ setPermsRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; - /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ - data_offset = (FILE_UNIX_BASIC_INFO *)((char *) pSMB + offset + 4); + data_offset = (FILE_UNIX_BASIC_INFO *)((char *) pSMB + offset); memset(data_offset, 0, count); pSMB->DataOffset = cpu_to_le16(offset); pSMB->ParameterOffset = cpu_to_le16(param_offset); @@ -5851,12 +5977,12 @@ setPermsRetry: pSMB->TotalDataCount = pSMB->DataCount; pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; cifs_fill_unix_set_info(data_offset, args); pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "SetPathInfo (perms) returned %d\n", rc); @@ -5888,6 +6014,7 @@ CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, TRANSACTION2_QPI_RSP *pSMBr = NULL; int remap = cifs_remap(cifs_sb); struct nls_table *nls_codepage = cifs_sb->local_nls; + unsigned int in_len; int rc = 0; int bytes_returned; int list_len; @@ -5902,8 +6029,9 @@ CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, QAllEAsRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { list_len = @@ -5926,7 +6054,7 @@ QAllEAsRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req, InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel)); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -5937,10 +6065,10 @@ QAllEAsRetry: pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS); pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QueryAllEAs = %d\n", rc); @@ -5954,7 +6082,8 @@ QAllEAsRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < 4) { - rc = -EIO; /* bad smb */ + rc = smb_EIO2(smb_eio_trace_qalleas_bcc_too_small, + get_bcc(&pSMBr->hdr), 4); goto QAllEAsOut; } @@ -5984,7 +6113,9 @@ QAllEAsRetry: end_of_smb = (char *)pByteArea(&pSMBr->hdr) + get_bcc(&pSMBr->hdr); if ((char *)ea_response_data + list_len > end_of_smb) { cifs_dbg(FYI, "EA list appears to go beyond SMB\n"); - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_qalleas_overlong, + (unsigned long)ea_response_data + list_len - (unsigned long)pSMBr, + (unsigned long)end_of_smb - (unsigned long)pSMBr); goto QAllEAsOut; } @@ -6001,7 +6132,7 @@ QAllEAsRetry: /* make sure we can read name_len and value_len */ if (list_len < 0) { cifs_dbg(FYI, "EA entry goes beyond length of list\n"); - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_qalleas_ea_overlong, list_len); goto QAllEAsOut; } @@ -6010,7 +6141,7 @@ QAllEAsRetry: list_len -= name_len + 1 + value_len; if (list_len < 0) { cifs_dbg(FYI, "EA entry goes beyond length of list\n"); - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_qalleas_ea_overlong, list_len); goto QAllEAsOut; } @@ -6072,6 +6203,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; struct fealist *parm_data; + unsigned int in_len; int name_len; int rc = 0; int bytes_returned = 0; @@ -6082,8 +6214,9 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, SetEARetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); - if (rc) + if (rc < 0) return rc; + in_len = rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = @@ -6115,12 +6248,12 @@ SetEARetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel); offset = param_offset + params; pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_EA); - parm_data = (void *)pSMB + offsetof(struct smb_hdr, Protocol) + offset; + parm_data = (void *)pSMB + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); pSMB->SetupCount = 1; @@ -6149,9 +6282,9 @@ SetEARetry: pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->Reserved4 = 0; - inc_rfc1001_len(pSMB, byte_count); + in_len += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, in_len, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) cifs_dbg(FYI, "SetPathInfo (EA) returned %d\n", rc); diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c index 4c4f5befb6d3..28d1cee90625 100644 --- a/fs/smb/client/cifstransport.c +++ b/fs/smb/client/cifstransport.c @@ -43,9 +43,9 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return NULL; } - temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); + temp = mempool_alloc(&cifs_mid_pool, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); - kref_init(&temp->refcount); + refcount_set(&temp->refcount, 1); spin_lock_init(&temp->mid_lock); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; @@ -54,7 +54,6 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) /* easier to use jiffies */ /* when mid allocated can be before when sent */ temp->when_alloc = jiffies; - temp->server = server; /* * The default is for the mid to be synchronous, so the @@ -70,22 +69,6 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; } -int -smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length) -{ - struct kvec iov[2]; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; - - iov[0].iov_base = smb_buffer; - iov[0].iov_len = 4; - iov[1].iov_base = (char *)smb_buffer + 4; - iov[1].iov_len = smb_buf_length; - - return __smb_send_rqst(server, 1, &rqst); -} - static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, struct mid_q_entry **ppmidQ) { @@ -125,10 +108,6 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return ERR_PTR(-EIO); - /* enable signing if server requires it */ if (server->sign) hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; @@ -139,7 +118,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); if (rc) { - release_mid(mid); + release_mid(server, mid); return ERR_PTR(rc); } @@ -157,7 +136,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) */ int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, - char *in_buf, int flags) + char *in_buf, unsigned int in_len, int flags) { int rc; struct kvec iov[1]; @@ -165,7 +144,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, int resp_buf_type; iov[0].iov_base = in_buf; - iov[0].iov_len = get_rfc1002_len(in_buf) + 4; + iov[0].iov_len = in_len; flags |= CIFS_NO_RSP_BUF; rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc); @@ -177,21 +156,19 @@ int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error) { - unsigned int len = get_rfc1002_len(mid->resp_buf) + 4; + unsigned int len = mid->response_pdu_len; dump_smb(mid->resp_buf, min_t(u32, 92, len)); /* convert the length into a more usable form */ if (server->sign) { - struct kvec iov[2]; + struct kvec iov[1]; int rc = 0; struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; + .rq_nvec = ARRAY_SIZE(iov) }; iov[0].iov_base = mid->resp_buf; - iov[0].iov_len = 4; - iov[1].iov_base = (char *)mid->resp_buf + 4; - iov[1].iov_len = len - 4; + iov[0].iov_len = len; /* FIXME: add code to kill session */ rc = cifs_verify_signature(&rqst, server, mid->sequence_number); @@ -201,27 +178,23 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, } /* BB special case reconnect tid and uid here? */ - return map_and_check_smb_error(mid, log_error); + return map_and_check_smb_error(server, mid, log_error); } struct mid_q_entry * -cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored, +cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *server, struct smb_rqst *rqst) { int rc; struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return ERR_PTR(-EIO); - rc = allocate_mid(ses, hdr, &mid); if (rc) return ERR_PTR(rc); - rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); + rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); if (rc) { - delete_mid(mid); + delete_mid(server, mid); return ERR_PTR(rc); } return mid; @@ -232,334 +205,59 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, const int flags, struct kvec *resp_iov) { - struct smb_rqst rqst; - struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; - int rc; - - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { - new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec), - GFP_KERNEL); - if (!new_iov) { - /* otherwise cifs_send_recv below sets resp_buf_type */ - *resp_buf_type = CIFS_NO_BUFFER; - return -ENOMEM; - } - } else - new_iov = s_iov; - - /* 1st iov is a RFC1001 length followed by the rest of the packet */ - memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); - - new_iov[0].iov_base = new_iov[1].iov_base; - new_iov[0].iov_len = 4; - new_iov[1].iov_base += 4; - new_iov[1].iov_len -= 4; + struct smb_rqst rqst = { + .rq_iov = iov, + .rq_nvec = n_vec, + }; - memset(&rqst, 0, sizeof(struct smb_rqst)); - rqst.rq_iov = new_iov; - rqst.rq_nvec = n_vec + 1; - - rc = cifs_send_recv(xid, ses, ses->server, - &rqst, resp_buf_type, flags, resp_iov); - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) - kfree(new_iov); - return rc; + return cifs_send_recv(xid, ses, ses->server, + &rqst, resp_buf_type, flags, resp_iov); } int SendReceive(const unsigned int xid, struct cifs_ses *ses, - struct smb_hdr *in_buf, struct smb_hdr *out_buf, - int *pbytes_returned, const int flags) + struct smb_hdr *in_buf, unsigned int in_len, + struct smb_hdr *out_buf, int *pbytes_returned, const int flags) { - int rc = 0; - struct mid_q_entry *midQ; - unsigned int len = be32_to_cpu(in_buf->smb_buf_length); - struct kvec iov = { .iov_base = in_buf, .iov_len = len }; - struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - struct cifs_credits credits = { .value = 1, .instance = 0 }; struct TCP_Server_Info *server; + struct kvec resp_iov = {}; + struct kvec iov = { .iov_base = in_buf, .iov_len = in_len }; + struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; + int resp_buf_type; + int rc = 0; + if (WARN_ON_ONCE(in_len > 0xffffff)) + return smb_EIO1(smb_eio_trace_tx_too_long, in_len); if (ses == NULL) { cifs_dbg(VFS, "Null smb session\n"); - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); } server = ses->server; if (server == NULL) { cifs_dbg(VFS, "Null tcp session\n"); - return -EIO; - } - - spin_lock(&server->srv_lock); - if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->srv_lock); - return -ENOENT; + return smb_EIO(smb_eio_trace_null_pointers); } - spin_unlock(&server->srv_lock); /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or use ses->maxReq */ - if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + if (in_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", - len); - return -EIO; - } - - rc = wait_for_free_request(server, flags, &credits.instance); - if (rc) - return rc; - - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - cifs_server_lock(server); - - rc = allocate_mid(ses, in_buf, &midQ); - if (rc) { - cifs_server_unlock(server); - /* Update # of requests on wire to server */ - add_credits(server, &credits, 0); - return rc; - } - - rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); - if (rc) { - cifs_server_unlock(server); - goto out; - } - - midQ->mid_state = MID_REQUEST_SUBMITTED; - - rc = smb_send(server, in_buf, len); - cifs_save_when_sent(midQ); - - if (rc < 0) - server->sequence_number -= 2; - - cifs_server_unlock(server); - - if (rc < 0) - goto out; - - rc = wait_for_response(server, midQ); - if (rc != 0) { - send_cancel(server, &rqst, midQ); - spin_lock(&midQ->mid_lock); - if (midQ->callback) { - /* no longer considered to be "in-flight" */ - midQ->callback = release_mid; - spin_unlock(&midQ->mid_lock); - add_credits(server, &credits, 0); - return rc; - } - spin_unlock(&midQ->mid_lock); - } - - rc = cifs_sync_mid_result(midQ, server); - if (rc != 0) { - add_credits(server, &credits, 0); - return rc; - } - - if (!midQ->resp_buf || !out_buf || - midQ->mid_state != MID_RESPONSE_READY) { - rc = -EIO; - cifs_server_dbg(VFS, "Bad MID state?\n"); - goto out; - } - - *pbytes_returned = get_rfc1002_len(midQ->resp_buf); - memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); - rc = cifs_check_receive(midQ, server, 0); -out: - delete_mid(midQ); - add_credits(server, &credits, 0); - - return rc; -} - -/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows - blocking lock to return. */ - -static int -send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, - struct smb_hdr *in_buf, - struct smb_hdr *out_buf) -{ - int bytes_returned; - struct cifs_ses *ses = tcon->ses; - LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; - - /* We just modify the current in_buf to change - the type of lock from LOCKING_ANDX_SHARED_LOCK - or LOCKING_ANDX_EXCLUSIVE_LOCK to - LOCKING_ANDX_CANCEL_LOCK. */ - - pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; - pSMB->Timeout = 0; - pSMB->hdr.Mid = get_next_mid(ses->server); - - return SendReceive(xid, ses, in_buf, out_buf, - &bytes_returned, 0); -} - -int -SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, - struct smb_hdr *in_buf, struct smb_hdr *out_buf, - int *pbytes_returned) -{ - int rc = 0; - int rstart = 0; - struct mid_q_entry *midQ; - struct cifs_ses *ses; - unsigned int len = be32_to_cpu(in_buf->smb_buf_length); - struct kvec iov = { .iov_base = in_buf, .iov_len = len }; - struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - unsigned int instance; - struct TCP_Server_Info *server; - - if (tcon == NULL || tcon->ses == NULL) { - cifs_dbg(VFS, "Null smb session\n"); - return -EIO; - } - ses = tcon->ses; - server = ses->server; - - if (server == NULL) { - cifs_dbg(VFS, "Null tcp session\n"); - return -EIO; - } - - spin_lock(&server->srv_lock); - if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->srv_lock); - return -ENOENT; - } - spin_unlock(&server->srv_lock); - - /* Ensure that we do not send more than 50 overlapping requests - to the same server. We may make this configurable later or - use ses->maxReq */ - - if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", - len); - return -EIO; + in_len); + return smb_EIO1(smb_eio_trace_tx_too_long, in_len); } - rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance); - if (rc) - return rc; - - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - cifs_server_lock(server); - - rc = allocate_mid(ses, in_buf, &midQ); - if (rc) { - cifs_server_unlock(server); - return rc; - } - - rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); - if (rc) { - delete_mid(midQ); - cifs_server_unlock(server); - return rc; - } - - midQ->mid_state = MID_REQUEST_SUBMITTED; - rc = smb_send(server, in_buf, len); - cifs_save_when_sent(midQ); - + rc = cifs_send_recv(xid, ses, ses->server, + &rqst, &resp_buf_type, flags, &resp_iov); if (rc < 0) - server->sequence_number -= 2; - - cifs_server_unlock(server); - - if (rc < 0) { - delete_mid(midQ); return rc; - } - /* Wait for a reply - allow signals to interrupt. */ - rc = wait_event_interruptible(server->response_q, - (!(midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED)) || - ((server->tcpStatus != CifsGood) && - (server->tcpStatus != CifsNew))); - - /* Were we interrupted by a signal ? */ - spin_lock(&server->srv_lock); - if ((rc == -ERESTARTSYS) && - (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) && - ((server->tcpStatus == CifsGood) || - (server->tcpStatus == CifsNew))) { - spin_unlock(&server->srv_lock); - - if (in_buf->Command == SMB_COM_TRANSACTION2) { - /* POSIX lock. We send a NT_CANCEL SMB to cause the - blocking lock to return. */ - rc = send_cancel(server, &rqst, midQ); - if (rc) { - delete_mid(midQ); - return rc; - } - } else { - /* Windows lock. We send a LOCKINGX_CANCEL_LOCK - to cause the blocking lock to return. */ - - rc = send_lock_cancel(xid, tcon, in_buf, out_buf); - - /* If we get -ENOLCK back the lock may have - already been removed. Don't exit in this case. */ - if (rc && rc != -ENOLCK) { - delete_mid(midQ); - return rc; - } - } - - rc = wait_for_response(server, midQ); - if (rc) { - send_cancel(server, &rqst, midQ); - spin_lock(&midQ->mid_lock); - if (midQ->callback) { - /* no longer considered to be "in-flight" */ - midQ->callback = release_mid; - spin_unlock(&midQ->mid_lock); - return rc; - } - spin_unlock(&midQ->mid_lock); - } - - /* We got the response - restart system call. */ - rstart = 1; - spin_lock(&server->srv_lock); - } - spin_unlock(&server->srv_lock); - - rc = cifs_sync_mid_result(midQ, server); - if (rc != 0) - return rc; - - /* rcvd frame is ok */ - if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) { - rc = -EIO; - cifs_tcon_dbg(VFS, "Bad MID state?\n"); - goto out; + if (out_buf) { + *pbytes_returned = resp_iov.iov_len; + if (resp_iov.iov_len) + memcpy(out_buf, resp_iov.iov_base, resp_iov.iov_len); } - - *pbytes_returned = get_rfc1002_len(midQ->resp_buf); - memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); - rc = cifs_check_receive(midQ, server, 0); -out: - delete_mid(midQ); - if (rstart && rc == -EACCES) - return -ERESTARTSYS; + free_rsp_buf(resp_buf_type, resp_iov.iov_base); return rc; } diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c index db709f5cd2e1..e0c44b46080e 100644 --- a/fs/smb/client/compress.c +++ b/fs/smb/client/compress.c @@ -44,7 +44,7 @@ struct bucket { unsigned int count; }; -/** +/* * has_low_entropy() - Compute Shannon entropy of the sampled data. * @bkt: Bytes counts of the sample. * @slen: Size of the sample. @@ -82,7 +82,7 @@ static bool has_low_entropy(struct bucket *bkt, size_t slen) #define BYTE_DIST_BAD 0 #define BYTE_DIST_GOOD 1 #define BYTE_DIST_MAYBE 2 -/** +/* * calc_byte_distribution() - Compute byte distribution on the sampled data. * @bkt: Byte counts of the sample. * @slen: Size of the sample. @@ -182,7 +182,7 @@ static int collect_sample(const struct iov_iter *source, ssize_t max, u8 *sample return s; } -/** +/* * is_compressible() - Determines if a chunk of data is compressible. * @data: Iterator containing uncompressed data. * @@ -261,6 +261,21 @@ out: return ret; } +/* + * should_compress() - Determines if a request (write) or the response to a + * request (read) should be compressed. + * @tcon: tcon of the request is being sent to + * @rqst: request to evaluate + * + * Return: true iff: + * - compression was successfully negotiated with server + * - server has enabled compression for the share + * - it's a read or write request + * - (write only) request length is >= SMB_COMPRESS_MIN_LEN + * - (write only) is_compressible() returns 1 + * + * Return false otherwise. + */ bool should_compress(const struct cifs_tcon *tcon, const struct smb_rqst *rq) { const struct smb2_hdr *shdr = rq->rq_iov->iov_base; @@ -310,7 +325,7 @@ int smb_compress(struct TCP_Server_Info *server, struct smb_rqst *rq, compress_s iter = rq->rq_iter; if (!copy_from_iter_full(src, slen, &iter)) { - ret = -EIO; + ret = smb_EIO(smb_eio_trace_compress_copy); goto err_free; } diff --git a/fs/smb/client/compress.h b/fs/smb/client/compress.h index f3ed1d3e52fb..63aea32fbe92 100644 --- a/fs/smb/client/compress.h +++ b/fs/smb/client/compress.h @@ -29,26 +29,11 @@ #ifdef CONFIG_CIFS_COMPRESSION typedef int (*compress_send_fn)(struct TCP_Server_Info *, int, struct smb_rqst *); -int smb_compress(struct TCP_Server_Info *server, struct smb_rqst *rq, compress_send_fn send_fn); -/** - * should_compress() - Determines if a request (write) or the response to a - * request (read) should be compressed. - * @tcon: tcon of the request is being sent to - * @rqst: request to evaluate - * - * Return: true iff: - * - compression was successfully negotiated with server - * - server has enabled compression for the share - * - it's a read or write request - * - (write only) request length is >= SMB_COMPRESS_MIN_LEN - * - (write only) is_compressible() returns 1 - * - * Return false otherwise. - */ +int smb_compress(struct TCP_Server_Info *server, struct smb_rqst *rq, compress_send_fn send_fn); bool should_compress(const struct cifs_tcon *tcon, const struct smb_rqst *rq); -/** +/* * smb_compress_alg_valid() - Validate a compression algorithm. * @alg: Compression algorithm to check. * @valid_none: Conditional check whether NONE algorithm should be diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 0049d65de50b..ce620503e9f7 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -325,7 +325,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) cifs_dbg(FYI, "%s: moving mids to private list\n", __func__); spin_lock(&server->mid_queue_lock); list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { - kref_get(&mid->refcount); + smb_get_mid(mid); if (mid->mid_state == MID_REQUEST_SUBMITTED) mid->mid_state = MID_RETRY_NEEDED; list_move(&mid->qhead, &retry_list); @@ -337,8 +337,8 @@ cifs_abort_connection(struct TCP_Server_Info *server) cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_entry_safe(mid, nmid, &retry_list, qhead) { list_del_init(&mid->qhead); - mid_execute_callback(mid); - release_mid(mid); + mid_execute_callback(server, mid); + release_mid(server, mid); } } @@ -425,7 +425,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + cifs_queue_server_reconn(server); } } while (server->tcpStatus == CifsNeedReconnect); @@ -564,7 +564,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + cifs_queue_server_reconn(server); } while (server->tcpStatus == CifsNeedReconnect); dfs_cache_noreq_update_tgthint(ref_path, target_hint); @@ -882,7 +882,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) */ spin_lock(&server->mid_queue_lock); list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { - kref_get(&mid->refcount); + smb_get_mid(mid); list_move(&mid->qhead, &dispose_list); mid->deleted_from_q = true; } @@ -915,8 +915,8 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) list_del_init(&mid->qhead); mid->mid_rc = mid_rc; mid->mid_state = MID_RC; - mid_execute_callback(mid); - release_mid(mid); + mid_execute_callback(server, mid); + release_mid(server, mid); } /* @@ -948,12 +948,12 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) } void -dequeue_mid(struct mid_q_entry *mid, bool malformed) +dequeue_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid, bool malformed) { #ifdef CONFIG_CIFS_STATS2 mid->when_received = jiffies; #endif - spin_lock(&mid->server->mid_queue_lock); + spin_lock(&server->mid_queue_lock); if (!malformed) mid->mid_state = MID_RESPONSE_RECEIVED; else @@ -963,12 +963,12 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) * function has finished processing it is a bug. */ if (mid->deleted_from_q == true) { - spin_unlock(&mid->server->mid_queue_lock); + spin_unlock(&server->mid_queue_lock); pr_warn_once("trying to dequeue a deleted mid\n"); } else { list_del_init(&mid->qhead); mid->deleted_from_q = true; - spin_unlock(&mid->server->mid_queue_lock); + spin_unlock(&server->mid_queue_lock); } } @@ -1004,7 +1004,7 @@ handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server, else server->smallbuf = NULL; } - dequeue_mid(mid, malformed); + dequeue_mid(server, mid, malformed); } int @@ -1101,7 +1101,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Clearing mid %llu\n", mid_entry->mid); - kref_get(&mid_entry->refcount); + smb_get_mid(mid_entry); mid_entry->mid_state = MID_SHUTDOWN; list_move(&mid_entry->qhead, &dispose_list); mid_entry->deleted_from_q = true; @@ -1113,8 +1113,8 @@ clean_demultiplex_info(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Callback mid %llu\n", mid_entry->mid); list_del_init(&mid_entry->qhead); - mid_execute_callback(mid_entry); - release_mid(mid_entry); + mid_execute_callback(server, mid_entry); + release_mid(server, mid_entry); } /* 1/8th of sec is more than enough time for them to exit */ msleep(125); @@ -1155,15 +1155,14 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) unsigned int pdu_length = server->pdu_size; /* make sure this will fit in a large buffer */ - if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - - HEADER_PREAMBLE_SIZE(server)) { + if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); cifs_reconnect(server, true); return -ECONNABORTED; } /* switch to large buffer if too big for a small one */ - if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { + if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE) { server->large_buf = true; memcpy(server->bigbuf, buf, server->total_read); buf = server->bigbuf; @@ -1196,7 +1195,8 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid) * 48 bytes is enough to display the header and a little bit * into the payload for debugging purposes. */ - rc = server->ops->check_message(buf, server->total_read, server); + rc = server->ops->check_message(buf, server->pdu_size, + server->total_read, server); if (rc) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); @@ -1286,16 +1286,13 @@ cifs_demultiplex_thread(void *p) if (length < 0) continue; - if (is_smb1(server)) - server->total_read = length; - else - server->total_read = 0; + server->total_read = 0; /* * The right amount was read from socket - 4 bytes, * so we can now interpret the length field. */ - pdu_length = get_rfc1002_len(buf); + pdu_length = be32_to_cpup(((__be32 *)buf)) & 0xffffff; cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length); if (!is_smb_response(server, buf[0])) @@ -1314,9 +1311,8 @@ next_pdu: } /* read down to the MID */ - length = cifs_read_from_socket(server, - buf + HEADER_PREAMBLE_SIZE(server), - MID_HEADER_SIZE(server)); + length = cifs_read_from_socket(server, buf, + MID_HEADER_SIZE(server)); if (length < 0) continue; server->total_read += length; @@ -1348,6 +1344,8 @@ next_pdu: bufs[0] = buf; num_mids = 1; + if (mids[0]) + mids[0]->response_pdu_len = pdu_length; if (!mids[0] || !mids[0]->receive) length = standard_receive3(server, mids[0]); else @@ -1357,7 +1355,7 @@ next_pdu: if (length < 0) { for (i = 0; i < num_mids; i++) if (mids[i]) - release_mid(mids[i]); + release_mid(server, mids[i]); continue; } @@ -1390,9 +1388,9 @@ next_pdu: } if (!mids[i]->multiRsp || mids[i]->multiEnd) - mid_execute_callback(mids[i]); + mid_execute_callback(server, mids[i]); - release_mid(mids[i]); + release_mid(server, mids[i]); } else if (server->ops->is_oplock_break && server->ops->is_oplock_break(bufs[i], server)) { @@ -1406,7 +1404,7 @@ next_pdu: smb2_add_credits_from_hdr(bufs[i], server); #ifdef CONFIG_CIFS_DEBUG2 if (server->ops->dump_detail) - server->ops->dump_detail(bufs[i], + server->ops->dump_detail(bufs[i], pdu_length, server); cifs_dump_mids(server); #endif /* CIFS_DEBUG2 */ @@ -3242,7 +3240,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) if (be16_to_cpu(resp.length) != 0) { cifs_dbg(VFS, "RFC 1002 positive session response but with invalid non-zero length %u\n", be16_to_cpu(resp.length)); - return -EIO; + return smb_EIO(smb_eio_trace_rx_pos_sess_resp); } cifs_dbg(FYI, "RFC 1002 positive session response"); break; @@ -3281,17 +3279,18 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) break; case RFC1002_INSUFFICIENT_RESOURCE: /* remote server resource error */ + smb_EIO(smb_eio_trace_rx_insuff_res); rc = -EREMOTEIO; break; case RFC1002_UNSPECIFIED_ERROR: default: /* other/unknown error */ - rc = -EIO; + rc = smb_EIO(smb_eio_trace_rx_unspec_error); break; } } else { cifs_dbg(VFS, "RFC 1002 negative session response\n"); - rc = -EIO; + rc = smb_EIO(smb_eio_trace_rx_neg_sess_resp); } return rc; case RFC1002_RETARGET_SESSION_RESPONSE: @@ -3313,7 +3312,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) return -EMULTIHOP; default: cifs_dbg(VFS, "RFC 1002 unknown response type 0x%x\n", resp.type); - return -EIO; + return smb_EIO1(smb_eio_trace_rx_unknown_resp, resp.type); } server->with_rfc1001 = true; @@ -3927,7 +3926,9 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) ctx->prepath = NULL; out: - cifs_try_adding_channels(mnt_ctx.ses); + smb3_update_ses_channels(mnt_ctx.ses, mnt_ctx.server, + false /* from_reconnect */, + false /* disable_mchan */); rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); if (rc) goto error; @@ -3999,11 +4000,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, TCONX_RSP *pSMBr; unsigned char *bcc_ptr; int rc = 0; - int length; + int length, in_len; __u16 bytes_left, count; if (ses == NULL) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); smb_buffer = cifs_buf_get(); if (smb_buffer == NULL) @@ -4011,8 +4012,8 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, smb_buffer_response = smb_buffer; - header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, - NULL /*no tid */, 4 /*wct */); + in_len = header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, + NULL /*no tid */, 4 /*wct */); smb_buffer->Mid = get_next_mid(ses->server); smb_buffer->Uid = ses->Suid; @@ -4053,11 +4054,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, bcc_ptr += strlen("?????"); bcc_ptr += 1; count = bcc_ptr - &pSMB->Password[0]; - be32_add_cpu(&pSMB->hdr.smb_buf_length, count); + in_len += count; pSMB->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, - 0); + rc = SendReceive(xid, ses, smb_buffer, in_len, smb_buffer_response, + &length, 0); /* above now done in SendReceive */ if (rc == 0) { @@ -4237,8 +4238,10 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&pserver->dstaddr; struct sockaddr_in *addr = (struct sockaddr_in *)&pserver->dstaddr; bool is_binding = false; + bool new_ses; spin_lock(&ses->ses_lock); + new_ses = ses->ses_status == SES_NEW; cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n", __func__, ses->chans_need_reconnect); @@ -4324,7 +4327,10 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, } if (rc) { - cifs_server_dbg(VFS, "Send error in SessSetup = %d\n", rc); + if (new_ses) { + cifs_server_dbg(VFS, "failed to create a new SMB session with %s: %d\n", + get_security_type_str(ses->sectype), rc); + } spin_lock(&ses->ses_lock); if (ses->ses_status == SES_IN_SETUP) ses->ses_status = SES_NEED_RECON; diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index da5597dbf5b9..747256025e49 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -457,7 +457,7 @@ out_err: int cifs_atomic_open(struct inode *inode, struct dentry *direntry, - struct file *file, unsigned oflags, umode_t mode) + struct file *file, unsigned int oflags, umode_t mode) { int rc; unsigned int xid; @@ -471,7 +471,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct cifs_open_info_data buf = {}; if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); /* * Posix open is only called (at lookup time) for file create now. For @@ -589,7 +589,7 @@ int cifs_create(struct mnt_idmap *idmap, struct inode *inode, inode, direntry, direntry); if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_forced_shutdown); goto out_free_xid; } @@ -631,7 +631,7 @@ int cifs_mknod(struct mnt_idmap *idmap, struct inode *inode, cifs_sb = CIFS_SB(inode->i_sb); if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) diff --git a/fs/smb/client/dns_resolve.h b/fs/smb/client/dns_resolve.h index 0dc706f2c422..36bc4a6a55bf 100644 --- a/fs/smb/client/dns_resolve.h +++ b/fs/smb/client/dns_resolve.h @@ -15,8 +15,6 @@ #include "cifsglob.h" #include "cifsproto.h" -#ifdef __KERNEL__ - int dns_resolve_name(const char *dom, const char *name, size_t namelen, struct sockaddr *ip_addr); @@ -36,6 +34,4 @@ static inline int dns_resolve_unc(const char *dom, const char *unc, return dns_resolve_name(dom, name, namelen, ip_addr); } -#endif /* KERNEL */ - #endif /* _DNS_RESOLVE_H */ diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9dc0a968ec89..7ff5cc9c5c5b 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -118,7 +118,7 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq) int rc; if (cifs_forced_shutdown(sbi)) { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_forced_shutdown); goto fail; } @@ -286,7 +286,7 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) req->pid = req->cfile->pid; } else if (rreq->origin != NETFS_WRITEBACK) { WARN_ON_ONCE(1); - return -EIO; + return smb_EIO1(smb_eio_trace_not_netfs_writeback, rreq->origin); } return 0; @@ -1036,7 +1036,7 @@ int cifs_open(struct inode *inode, struct file *file) cifs_sb = CIFS_SB(inode->i_sb); if (unlikely(cifs_forced_shutdown(cifs_sb))) { free_xid(xid); - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); } tlink = cifs_sb_tlink(cifs_sb); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 2a0d8b87bd8e..c2de97e4ad59 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -505,7 +505,7 @@ cifs_parse_smb_version(struct fs_context *fc, char *value, struct smb3_fs_contex case Smb_20: cifs_errorf(fc, "vers=2.0 mount not permitted when legacy dialects disabled\n"); return 1; -#endif /* CIFS_ALLOW_INSECURE_LEGACY */ +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ case Smb_21: ctx->ops = &smb21_operations; ctx->vals = &smb21_values; @@ -711,12 +711,54 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) return 0; } +static int smb3_handle_conflicting_options(struct fs_context *fc) +{ + struct smb3_fs_context *ctx = smb3_fc2context(fc); + + if (ctx->multichannel_specified) { + if (ctx->multichannel) { + if (!ctx->max_channels_specified) { + ctx->max_channels = 2; + } else if (ctx->max_channels == 1) { + cifs_errorf(fc, + "max_channels must be greater than 1 when multichannel is enabled\n"); + return -EINVAL; + } + } else { + if (!ctx->max_channels_specified) { + ctx->max_channels = 1; + } else if (ctx->max_channels > 1) { + cifs_errorf(fc, + "max_channels must be equal to 1 when multichannel is disabled\n"); + return -EINVAL; + } + } + } else { + if (ctx->max_channels_specified) { + if (ctx->max_channels > 1) + ctx->multichannel = true; + else + ctx->multichannel = false; + } else { + ctx->multichannel = false; + ctx->max_channels = 1; + } + } + + //resetting default values as remount doesn't initialize fs_context again + ctx->multichannel_specified = false; + ctx->max_channels_specified = false; + + return 0; +} + static void smb3_fs_context_free(struct fs_context *fc); static int smb3_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); static int smb3_fs_context_parse_monolithic(struct fs_context *fc, void *data); static int smb3_get_tree(struct fs_context *fc); +static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channels); static int smb3_reconfigure(struct fs_context *fc); static const struct fs_context_operations smb3_fs_context_ops = { @@ -784,6 +826,7 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc, if (ret < 0) break; } + ret = smb3_handle_conflicting_options(fc); return ret; } @@ -1013,6 +1056,22 @@ int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_se return 0; } +/* + * smb3_sync_ses_chan_max - Synchronize the session's maximum channel count + * @ses: pointer to the old CIFS session structure + * @max_channels: new maximum number of channels to allow + * + * Updates the session's chan_max field to the new value, protecting the update + * with the session's channel lock. This should be called whenever the maximum + * allowed channels for a session changes (e.g., after a remount or reconfigure). + */ +static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channels) +{ + spin_lock(&ses->chan_lock); + ses->chan_max = max_channels; + spin_unlock(&ses->chan_lock); +} + static int smb3_reconfigure(struct fs_context *fc) { struct smb3_fs_context *ctx = smb3_fc2context(fc); @@ -1095,7 +1154,39 @@ static int smb3_reconfigure(struct fs_context *fc) ses->password2 = new_password2; } - mutex_unlock(&ses->session_mutex); + /* + * If multichannel or max_channels has changed, update the session's channels accordingly. + * This may add or remove channels to match the new configuration. + */ + if ((ctx->multichannel != cifs_sb->ctx->multichannel) || + (ctx->max_channels != cifs_sb->ctx->max_channels)) { + + /* Synchronize ses->chan_max with the new mount context */ + smb3_sync_ses_chan_max(ses, ctx->max_channels); + /* Now update the session's channels to match the new configuration */ + /* Prevent concurrent scaling operations */ + spin_lock(&ses->ses_lock); + if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) { + spin_unlock(&ses->ses_lock); + mutex_unlock(&ses->session_mutex); + return -EINVAL; + } + ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); + + mutex_unlock(&ses->session_mutex); + + rc = smb3_update_ses_channels(ses, ses->server, + false /* from_reconnect */, + false /* disable_mchan */); + + /* Clear scaling flag after operation */ + spin_lock(&ses->ses_lock); + ses->flags &= ~CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); + } else { + mutex_unlock(&ses->session_mutex); + } STEAL_STRING(cifs_sb, ctx, domainname); STEAL_STRING(cifs_sb, ctx, nodename); @@ -1250,15 +1341,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->nodelete = 1; break; case Opt_multichannel: - if (result.negated) { + ctx->multichannel_specified = true; + if (result.negated) ctx->multichannel = false; - ctx->max_channels = 1; - } else { + else ctx->multichannel = true; - /* if number of channels not specified, default to 2 */ - if (ctx->max_channels < 2) - ctx->max_channels = 2; - } break; case Opt_uid: ctx->linux_uid = result.uid; @@ -1394,15 +1481,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->max_credits = result.uint_32; break; case Opt_max_channels: + ctx->max_channels_specified = true; if (result.uint_32 < 1 || result.uint_32 > CIFS_MAX_CHANNELS) { cifs_errorf(fc, "%s: Invalid max_channels value, needs to be 1-%d\n", __func__, CIFS_MAX_CHANNELS); goto cifs_parse_mount_err; } ctx->max_channels = result.uint_32; - /* If more than one channel requested ... they want multichan */ - if (result.uint_32 > 1) - ctx->multichannel = true; break; case Opt_max_cached_dirs: if (result.uint_32 < 1) { @@ -1820,13 +1905,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } - /* - * Multichannel is not meaningful if max_channels is 1. - * Force multichannel to false to ensure consistent configuration. - */ - if (ctx->multichannel && ctx->max_channels == 1) - ctx->multichannel = false; - return 0; cifs_parse_mount_err: @@ -1913,6 +1991,8 @@ int smb3_init_fs_context(struct fs_context *fc) /* default to no multichannel (single server connection) */ ctx->multichannel = false; + ctx->multichannel_specified = false; + ctx->max_channels_specified = false; ctx->max_channels = 1; ctx->backupuid_specified = false; /* no backup intent for a user */ diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index b0fec6b9a23b..7af7cbbe4208 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -294,6 +294,8 @@ struct smb3_fs_context { bool domainauto:1; bool rdma:1; bool multichannel:1; + bool multichannel_specified:1; /* true if user specified multichannel or nomultichannel */ + bool max_channels_specified:1; /* true if user specified max_channels */ bool use_client_guid:1; /* reuse existing guid for multichannel */ u8 client_guid[SMB2_CLIENT_GUID_SIZE]; diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 400e4fbd450f..f9ee95953fa4 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1952,7 +1952,7 @@ static int __cifs_unlink(struct inode *dir, struct dentry *dentry, bool sillyren cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry); if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); /* Unhash dentry in advance to prevent any concurrent opens */ spin_lock(&dentry->d_lock); @@ -2268,7 +2268,7 @@ struct dentry *cifs_mkdir(struct mnt_idmap *idmap, struct inode *inode, cifs_sb = CIFS_SB(inode->i_sb); if (unlikely(cifs_forced_shutdown(cifs_sb))) - return ERR_PTR(-EIO); + return ERR_PTR(smb_EIO(smb_eio_trace_forced_shutdown)); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return ERR_CAST(tlink); @@ -2354,7 +2354,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifs_sb = CIFS_SB(inode->i_sb); if (unlikely(cifs_forced_shutdown(cifs_sb))) { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_forced_shutdown); goto rmdir_exit; } @@ -2516,7 +2516,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, cifs_sb = CIFS_SB(source_dir->i_sb); if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); /* * Prevent any concurrent opens on the target by unhashing the dentry. @@ -2901,7 +2901,7 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path, int rc; if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); /* * We need to be sure that all dirty pages are written and the server @@ -2976,7 +2976,7 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, int rc; if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); /* * We need to be sure that all dirty pages are written as they @@ -3468,7 +3468,7 @@ cifs_setattr(struct mnt_idmap *idmap, struct dentry *direntry, #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); /* * Avoid setting [cm]time with O_TRUNC to prevent the server from * disabling automatic timestamp updates as specified in diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index 70f3c0c67eeb..fdfdc9a3abdd 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -160,7 +160,8 @@ create_mf_symlink(const unsigned int xid, struct cifs_tcon *tcon, goto out; if (bytes_written != CIFS_MF_SYMLINK_FILE_SIZE) - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_symlink_file_size, + bytes_written, CIFS_MF_SYMLINK_FILE_SIZE); out: kfree(buf); return rc; @@ -424,7 +425,8 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, /* Make sure we wrote all of the symlink data */ if ((rc == 0) && (*pbytes_written != CIFS_MF_SYMLINK_FILE_SIZE)) - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_short_symlink_write, + *pbytes_written, CIFS_MF_SYMLINK_FILE_SIZE); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); @@ -451,7 +453,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, struct cifsInodeInfo *cifsInode; if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -553,7 +555,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, struct inode *newinode = NULL; if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); page = alloc_dentry_path(); if (!page) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 340c44dc7b5b..9529fa385938 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -18,6 +18,7 @@ #include "nterr.h" #include "cifs_unicode.h" #include "smb2pdu.h" +#include "smb2proto.h" #include "cifsfs.h" #ifdef CONFIG_CIFS_DFS_UPCALL #include "dns_resolve.h" @@ -264,19 +265,18 @@ free_rsp_buf(int resp_buftype, void *rsp) /* NB: MID can not be set if treeCon not passed in, in that case it is responsibility of caller to set the mid */ -void -header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , +unsigned int +header_assemble(struct smb_hdr *buffer, char smb_command, const struct cifs_tcon *treeCon, int word_count /* length of fixed section (word count) in two byte units */) { + unsigned int in_len; char *temp = (char *) buffer; memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ - buffer->smb_buf_length = cpu_to_be32( - (2 * word_count) + sizeof(struct smb_hdr) - - 4 /* RFC 1001 length field does not count */ + - 2 /* for bcc field itself */) ; + in_len = (2 * word_count) + sizeof(struct smb_hdr) + + 2 /* for bcc field itself */; buffer->Protocol[0] = 0xFF; buffer->Protocol[1] = 'S'; @@ -311,7 +311,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , /* endian conversion of flags is now done just before sending */ buffer->WordCount = (char) word_count; - return; + return in_len; } static int @@ -346,10 +346,11 @@ check_smb_hdr(struct smb_hdr *smb) } int -checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) +checkSMB(char *buf, unsigned int pdu_len, unsigned int total_read, + struct TCP_Server_Info *server) { struct smb_hdr *smb = (struct smb_hdr *)buf; - __u32 rfclen = be32_to_cpu(smb->smb_buf_length); + __u32 rfclen = pdu_len; __u32 clc_len; /* calculated length */ cifs_dbg(FYI, "checkSMB Length: 0x%x, smb_buf_length: 0x%x\n", total_read, rfclen); @@ -379,42 +380,47 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) return 0; } cifs_dbg(VFS, "rcvd invalid byte count (bcc)\n"); + return smb_EIO1(smb_eio_trace_rx_inv_bcc, tmp[sizeof(struct smb_hdr)]); } else { cifs_dbg(VFS, "Length less than smb header size\n"); + return smb_EIO2(smb_eio_trace_rx_too_short, + total_read, smb->WordCount); } - return -EIO; } else if (total_read < sizeof(*smb) + 2 * smb->WordCount) { cifs_dbg(VFS, "%s: can't read BCC due to invalid WordCount(%u)\n", __func__, smb->WordCount); - return -EIO; + return smb_EIO2(smb_eio_trace_rx_check_rsp, + total_read, 2 + sizeof(struct smb_hdr)); } /* otherwise, there is enough to get to the BCC */ if (check_smb_hdr(smb)) - return -EIO; + return smb_EIO1(smb_eio_trace_rx_rfc1002_magic, *(u32 *)smb->Protocol); clc_len = smbCalcSize(smb); - if (4 + rfclen != total_read) { - cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n", - rfclen); - return -EIO; + if (rfclen != total_read) { + cifs_dbg(VFS, "Length read does not match RFC1001 length %d/%d\n", + rfclen, total_read); + return smb_EIO2(smb_eio_trace_rx_check_rsp, + total_read, rfclen); } - if (4 + rfclen != clc_len) { + if (rfclen != clc_len) { __u16 mid = get_mid(smb); /* check if bcc wrapped around for large read responses */ if ((rfclen > 64 * 1024) && (rfclen > clc_len)) { /* check if lengths match mod 64K */ - if (((4 + rfclen) & 0xFFFF) == (clc_len & 0xFFFF)) + if (((rfclen) & 0xFFFF) == (clc_len & 0xFFFF)) return 0; /* bcc wrapped */ } cifs_dbg(FYI, "Calculated size %u vs length %u mismatch for mid=%u\n", - clc_len, 4 + rfclen, mid); + clc_len, rfclen, mid); - if (4 + rfclen < clc_len) { + if (rfclen < clc_len) { cifs_dbg(VFS, "RFC1001 size %u smaller than SMB for mid=%u\n", rfclen, mid); - return -EIO; + return smb_EIO2(smb_eio_trace_rx_calc_len_too_big, + rfclen, clc_len); } else if (rfclen > clc_len + 512) { /* * Some servers (Windows XP in particular) send more @@ -427,7 +433,8 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) */ cifs_dbg(VFS, "RFC1001 size %u more than 512 bytes larger than SMB for mid=%u\n", rfclen, mid); - return -EIO; + return smb_EIO2(smb_eio_trace_rx_overlong, + rfclen, clc_len + 512); } } return 0; @@ -451,7 +458,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) (struct smb_com_transaction_change_notify_rsp *)buf; struct file_notify_information *pnotify; __u32 data_offset = 0; - size_t len = srv->total_read - sizeof(pSMBr->hdr.smb_buf_length); + size_t len = srv->total_read - srv->pdu_size; if (get_bcc(buf) > sizeof(struct file_notify_information)) { data_offset = le32_to_cpu(pSMBr->DataOffset); diff --git a/fs/smb/client/netmisc.c b/fs/smb/client/netmisc.c index 9ec20601cee2..ae15f0bef009 100644 --- a/fs/smb/client/netmisc.c +++ b/fs/smb/client/netmisc.c @@ -200,7 +200,7 @@ cifs_set_port(struct sockaddr *addr, const unsigned short int port) } /***************************************************************************** -convert a NT status code to a dos class/code + *convert a NT status code to a dos class/code *****************************************************************************/ /* NT status -> dos error map */ static const struct { @@ -885,11 +885,16 @@ map_smb_to_linux_error(char *buf, bool logErr) /* generic corrective action e.g. reconnect SMB session on * ERRbaduid could be added */ + if (rc == -EIO) + smb_EIO2(smb_eio_trace_smb1_received_error, + le32_to_cpu(smb->Status.CifsError), + le16_to_cpu(smb->Flags2)); return rc; } int -map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) +map_and_check_smb_error(struct TCP_Server_Info *server, + struct mid_q_entry *mid, bool logErr) { int rc; struct smb_hdr *smb = (struct smb_hdr *)mid->resp_buf; @@ -904,7 +909,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) if (class == ERRSRV && code == ERRbaduid) { cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n", code); - cifs_signal_cifsd_for_reconnect(mid->server, false); + cifs_signal_cifsd_for_reconnect(server, false); } } diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 7ff728503ed1..6844f1dc3921 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -775,7 +775,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, if (cfile->srch_inf.ntwrk_buf_start == NULL) { cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n"); - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); } end_of_smb = cfile->srch_inf.ntwrk_buf_start + diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 10c84c095fe7..ce9b923498b5 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -732,7 +732,8 @@ static int parse_reparse_nfs(struct reparse_nfs_data_buffer *buf, len = le16_to_cpu(buf->ReparseDataLength); if (len < sizeof(buf->InodeType)) { cifs_dbg(VFS, "srv returned malformed nfs buffer\n"); - return -EIO; + return smb_EIO2(smb_eio_trace_reparse_nfs_too_short, + len, sizeof(buf->InodeType)); } len -= sizeof(buf->InodeType); @@ -741,7 +742,7 @@ static int parse_reparse_nfs(struct reparse_nfs_data_buffer *buf, case NFS_SPECFILE_LNK: if (len == 0 || (len % 2)) { cifs_dbg(VFS, "srv returned malformed nfs symlink buffer\n"); - return -EIO; + return smb_EIO1(smb_eio_trace_reparse_nfs_symbuf, len); } /* * Check that buffer does not contain UTF-16 null codepoint @@ -749,7 +750,7 @@ static int parse_reparse_nfs(struct reparse_nfs_data_buffer *buf, */ if (UniStrnlen((wchar_t *)buf->DataBuffer, len/2) != len/2) { cifs_dbg(VFS, "srv returned null byte in nfs symlink target location\n"); - return -EIO; + return smb_EIO1(smb_eio_trace_reparse_nfs_nul, len); } data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer, len, true, @@ -764,7 +765,7 @@ static int parse_reparse_nfs(struct reparse_nfs_data_buffer *buf, /* DataBuffer for block and char devices contains two 32-bit numbers */ if (len != 8) { cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type); - return -EIO; + return smb_EIO1(smb_eio_trace_reparse_nfs_dev, len); } break; case NFS_SPECFILE_FIFO: @@ -772,7 +773,7 @@ static int parse_reparse_nfs(struct reparse_nfs_data_buffer *buf, /* DataBuffer for fifos and sockets is empty */ if (len != 0) { cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type); - return -EIO; + return smb_EIO1(smb_eio_trace_reparse_nfs_sockfifo, len); } break; default: @@ -796,13 +797,13 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, int abs_path_len; char *abs_path; int levels; - int rc; + int rc, ulen; int i; /* Check that length it valid */ if (!len || (len % 2)) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_reparse_native_nul, len); goto out; } @@ -810,9 +811,10 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, * Check that buffer does not contain UTF-16 null codepoint * because Linux cannot process symlink with null byte. */ - if (UniStrnlen((wchar_t *)buf, len/2) != len/2) { + ulen = UniStrnlen((wchar_t *)buf, len/2); + if (ulen != len/2) { cifs_dbg(VFS, "srv returned null byte in native symlink target location\n"); - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_reparse_native_nul, ulen, len); goto out; } @@ -996,7 +998,8 @@ static int parse_reparse_native_symlink(struct reparse_symlink_data_buffer *sym, len = le16_to_cpu(sym->SubstituteNameLength); if (offs + 20 > plen || offs + len + 20 > plen) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); - return -EIO; + return smb_EIO2(smb_eio_trace_reparse_native_sym_len, + offs << 16 | len, plen); } return smb2_parse_native_symlink(&data->symlink_target, @@ -1019,13 +1022,16 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf if (len <= data_offset) { cifs_dbg(VFS, "srv returned malformed wsl symlink buffer\n"); - return -EIO; + return smb_EIO2(smb_eio_trace_reparse_wsl_symbuf, + len, data_offset); } /* MS-FSCC 2.1.2.7 defines layout of the Target field only for Version 2. */ - if (le32_to_cpu(buf->Version) != 2) { - cifs_dbg(VFS, "srv returned unsupported wsl symlink version %u\n", le32_to_cpu(buf->Version)); - return -EIO; + u32 version = le32_to_cpu(buf->Version); + + if (version != 2) { + cifs_dbg(VFS, "srv returned unsupported wsl symlink version %u\n", version); + return smb_EIO1(smb_eio_trace_reparse_wsl_ver, version); } /* Target for Version 2 is in UTF-8 but without trailing null-term byte */ @@ -1034,9 +1040,12 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf * Check that buffer does not contain null byte * because Linux cannot process symlink with null byte. */ - if (strnlen(buf->Target, symname_utf8_len) != symname_utf8_len) { + size_t ulen = strnlen(buf->Target, symname_utf8_len); + + if (ulen != symname_utf8_len) { cifs_dbg(VFS, "srv returned null byte in wsl symlink target location\n"); - return -EIO; + return smb_EIO2(smb_eio_trace_reparse_wsl_ver, + ulen, symname_utf8_len); } symname_utf16 = kzalloc(symname_utf8_len * 2, GFP_KERNEL); if (!symname_utf16) @@ -1083,13 +1092,17 @@ int parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_AF_UNIX: case IO_REPARSE_TAG_LX_FIFO: case IO_REPARSE_TAG_LX_CHR: - case IO_REPARSE_TAG_LX_BLK: - if (le16_to_cpu(buf->ReparseDataLength) != 0) { + case IO_REPARSE_TAG_LX_BLK: { + u16 dlen = le16_to_cpu(buf->ReparseDataLength); + + if (dlen != 0) { + u32 rtag = le32_to_cpu(buf->ReparseTag); cifs_dbg(VFS, "srv returned malformed buffer for reparse point: 0x%08x\n", - le32_to_cpu(buf->ReparseTag)); - return -EIO; + rtag); + return smb_EIO2(smb_eio_trace_reparse_data_len, dlen, rtag); } return 0; + } default: return -EOPNOTSUPP; } diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index ef3b498b0a02..a72d6a6d20f0 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -265,12 +265,16 @@ int cifs_try_adding_channels(struct cifs_ses *ses) } /* - * called when multichannel is disabled by the server. - * this always gets called from smb2_reconnect - * and cannot get called in parallel threads. + * cifs_decrease_secondary_channels - Reduce the number of active secondary channels + * @ses: pointer to the CIFS session structure + * @disable_mchan: if true, reduce to a single channel; if false, reduce to chan_max + * + * This function disables and cleans up extra secondary channels for a CIFS session. + * If called during reconfiguration, it reduces the channel count to the new maximum (chan_max). + * Otherwise, it disables all but the primary channel. */ void -cifs_disable_secondary_channels(struct cifs_ses *ses) +cifs_decrease_secondary_channels(struct cifs_ses *ses, bool disable_mchan) { int i, chan_count; struct TCP_Server_Info *server; @@ -281,12 +285,16 @@ cifs_disable_secondary_channels(struct cifs_ses *ses) if (chan_count == 1) goto done; - ses->chan_count = 1; - - /* for all secondary channels reset the need reconnect bit */ - ses->chans_need_reconnect &= 1; + /* Update the chan_count to the new maximum */ + if (disable_mchan) { + cifs_dbg(FYI, "server does not support multichannel anymore.\n"); + ses->chan_count = 1; + } else { + ses->chan_count = ses->chan_max; + } - for (i = 1; i < chan_count; i++) { + /* Disable all secondary channels beyond the new chan_count */ + for (i = ses->chan_count ; i < chan_count; i++) { iface = ses->chans[i].iface; server = ses->chans[i].server; @@ -318,6 +326,15 @@ cifs_disable_secondary_channels(struct cifs_ses *ses) spin_lock(&ses->chan_lock); } + /* For extra secondary channels, reset the need reconnect bit */ + if (ses->chan_count == 1) { + cifs_dbg(VFS, "Disable all secondary channels\n"); + ses->chans_need_reconnect &= 1; + } else { + cifs_dbg(VFS, "Disable extra secondary channels\n"); + ses->chans_need_reconnect &= ((1UL << ses->chan_max) - 1); + } + done: spin_unlock(&ses->chan_lock); } @@ -1313,6 +1330,7 @@ struct sess_data { struct nls_table *nls_cp; void (*func)(struct sess_data *); int result; + unsigned int in_len; /* we will send the SMB in three pieces: * a fixed length beginning part, an optional @@ -1336,11 +1354,12 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct) rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, (void **)&smb_buf); - if (rc) + if (rc < 0) return rc; + sess_data->in_len = rc; sess_data->iov[0].iov_base = (char *)smb_buf; - sess_data->iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4; + sess_data->iov[0].iov_len = sess_data->in_len; /* * This variable will be used to clear the buffer * allocated above in case of any error in the calling function. @@ -1418,7 +1437,7 @@ sess_sendreceive(struct sess_data *sess_data) struct kvec rsp_iov = { NULL, 0 }; count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len; - be32_add_cpu(&smb_buf->smb_buf_length, count); + sess_data->in_len += count; put_bcc(count, smb_buf); rc = SendReceive2(sess_data->xid, sess_data->ses, @@ -1501,7 +1520,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data) smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; if (smb_buf->WordCount != 3) { - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_sess_nl2_wcc, smb_buf->WordCount); cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); goto out; } @@ -1627,7 +1646,7 @@ sess_auth_kerberos(struct sess_data *sess_data) smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; if (smb_buf->WordCount != 4) { - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_sess_krb_wcc, smb_buf->WordCount); cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); goto out_put_spnego_key; } @@ -1788,7 +1807,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); if (smb_buf->WordCount != 4) { - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_sess_rawnl_neg_wcc, smb_buf->WordCount); cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); goto out_free_ntlmsspblob; } @@ -1878,7 +1897,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; if (smb_buf->WordCount != 4) { - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_sess_rawnl_auth_wcc, smb_buf->WordCount); cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); goto out_free_ntlmsspblob; } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index a479fe1396c0..9729b56bd9d4 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -30,20 +30,25 @@ * SMB_COM_NT_CANCEL request and then sends it. */ static int -send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, - struct mid_q_entry *mid) +send_nt_cancel(struct cifs_ses *ses, struct TCP_Server_Info *server, + struct smb_rqst *rqst, struct mid_q_entry *mid, + unsigned int xid) { - int rc = 0; struct smb_hdr *in_buf = (struct smb_hdr *)rqst->rq_iov[0].iov_base; + struct kvec iov[1]; + struct smb_rqst crqst = { .rq_iov = iov, .rq_nvec = 1 }; + int rc = 0; - /* -4 for RFC1001 length and +2 for BCC field */ - in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2); + /* +2 for BCC field */ in_buf->Command = SMB_COM_NT_CANCEL; in_buf->WordCount = 0; put_bcc(0, in_buf); + iov[0].iov_base = in_buf; + iov[0].iov_len = sizeof(struct smb_hdr) + 2; + cifs_server_lock(server); - rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); + rc = cifs_sign_rqst(&crqst, server, &mid->sequence_number); if (rc) { cifs_server_unlock(server); return rc; @@ -55,7 +60,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, * after signing here. */ --server->sequence_number; - rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); + rc = __smb_send_rqst(server, 1, &crqst); if (rc < 0) server->sequence_number--; @@ -67,6 +72,46 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, return rc; } +/* + * Send a LOCKINGX_CANCEL_LOCK to cause the Windows blocking lock to + * return. + */ +static int +send_lock_cancel(struct cifs_ses *ses, struct TCP_Server_Info *server, + struct smb_rqst *rqst, struct mid_q_entry *mid, + unsigned int xid) +{ + struct smb_hdr *in_buf = (struct smb_hdr *)rqst->rq_iov[0].iov_base; + unsigned int in_len = rqst->rq_iov[0].iov_len; + LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; + int rc; + + /* We just modify the current in_buf to change + * the type of lock from LOCKING_ANDX_SHARED_LOCK + * or LOCKING_ANDX_EXCLUSIVE_LOCK to + * LOCKING_ANDX_CANCEL_LOCK. + */ + pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; + pSMB->Timeout = 0; + pSMB->hdr.Mid = get_next_mid(ses->server); + + rc = SendReceive(xid, ses, in_buf, in_len, NULL, NULL, 0); + if (rc == -ENOLCK) + rc = 0; /* If we get back -ENOLCK, it probably means we managed + * to cancel the lock command before it took effect. + */ + return rc; +} + +static int cifs_send_cancel(struct cifs_ses *ses, struct TCP_Server_Info *server, + struct smb_rqst *rqst, struct mid_q_entry *mid, + unsigned int xid) +{ + if (mid->sr_flags & CIFS_WINDOWS_LOCK) + return send_lock_cancel(ses, server, rqst, mid, xid); + return send_nt_cancel(ses, server, rqst, mid, xid); +} + static bool cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) { @@ -101,7 +146,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) if (compare_mid(mid->mid, buf) && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { - kref_get(&mid->refcount); + smb_get_mid(mid); spin_unlock(&server->mid_queue_lock); return mid; } @@ -289,7 +334,7 @@ check2ndT2(char *buf) } static int -coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) +coalesce_t2(char *second_buf, struct smb_hdr *target_hdr, unsigned int *pdu_len) { struct smb_t2_rsp *pSMBs = (struct smb_t2_rsp *)second_buf; struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)target_hdr; @@ -355,15 +400,15 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) } put_bcc(byte_count, target_hdr); - byte_count = be32_to_cpu(target_hdr->smb_buf_length); + byte_count = *pdu_len; byte_count += total_in_src; /* don't allow buffer to overflow */ - if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { cifs_dbg(FYI, "coalesced BCC exceeds buffer size (%u)\n", byte_count); return -ENOBUFS; } - target_hdr->smb_buf_length = cpu_to_be32(byte_count); + *pdu_len = byte_count; /* copy second buffer into end of first buffer */ memcpy(data_area_of_tgt, data_area_of_src, total_in_src); @@ -398,12 +443,12 @@ cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, mid->multiRsp = true; if (mid->resp_buf) { /* merge response - fix up 1st*/ - malformed = coalesce_t2(buf, mid->resp_buf); + malformed = coalesce_t2(buf, mid->resp_buf, &mid->response_pdu_len); if (malformed > 0) return true; /* All parts received or packet is malformed. */ mid->multiEnd = true; - dequeue_mid(mid, malformed); + dequeue_mid(server, mid, malformed); return true; } if (!server->large_buf) { @@ -461,7 +506,7 @@ smb1_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) if (!(server->capabilities & CAP_LARGE_WRITE_X) || (!(server->capabilities & CAP_UNIX) && server->sign)) wsize = min_t(unsigned int, wsize, - server->maxBuf - sizeof(WRITE_REQ) + 4); + server->maxBuf - sizeof(WRITE_REQ)); /* hard limit of CIFS_MAX_WSIZE */ wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); @@ -1393,7 +1438,7 @@ cifs_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) } struct smb_version_operations smb1_operations = { - .send_cancel = send_nt_cancel, + .send_cancel = cifs_send_cancel, .compare_fids = cifs_compare_fids, .setup_request = cifs_setup_request, .setup_async_request = cifs_setup_async_request, @@ -1487,7 +1532,6 @@ struct smb_version_values smb1_values = { .exclusive_lock_type = 0, .shared_lock_type = LOCKING_ANDX_SHARED_LOCK, .unlock_lock_type = 0, - .header_preamble_size = 4, .header_size = sizeof(struct smb_hdr), .max_header_size = MAX_CIFS_HDR_SIZE, .read_rsp_size = sizeof(READ_RSP), diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index a7f629238830..7f11ae6bb785 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -76,11 +76,11 @@ int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_i return 0; if (!*target) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); len = strlen(*target); if (!len) - return -EIO; + return smb_EIO1(smb_eio_trace_sym_target_len, len); /* * If this is directory symlink and it does not have trailing slash then @@ -104,7 +104,7 @@ int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_i * both Windows and Linux systems. So return an error for such symlink. */ if (!directory && (*target)[len-1] == '/') - return -EIO; + return smb_EIO(smb_eio_trace_sym_slash); return 0; } @@ -140,7 +140,8 @@ int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec cifs_sb); } -int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf) +int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, + __u32 *oplock, void *buf) { int rc; __le16 *smb2_path; diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index d0aad4821ed4..2ded3246600c 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -21,7 +21,6 @@ #include "cifs_unicode.h" #include "fscache.h" #include "smb2glob.h" -#include "smb2pdu.h" #include "smb2proto.h" #include "cached_dir.h" #include "../common/smb2status.h" @@ -31,16 +30,20 @@ static struct reparse_data_buffer *reparse_buf_ptr(struct kvec *iov) struct reparse_data_buffer *buf; struct smb2_ioctl_rsp *io = iov->iov_base; u32 off, count, len; + u16 rdlen; count = le32_to_cpu(io->OutputCount); off = le32_to_cpu(io->OutputOffset); if (check_add_overflow(off, count, &len) || len > iov->iov_len) - return ERR_PTR(-EIO); + return ERR_PTR(smb_EIO2(smb_eio_trace_reparse_overlong, + off, count)); buf = (struct reparse_data_buffer *)((u8 *)io + off); len = sizeof(*buf); - if (count < len || count < le16_to_cpu(buf->ReparseDataLength) + len) - return ERR_PTR(-EIO); + rdlen = le16_to_cpu(buf->ReparseDataLength); + + if (count < len || count < rdlen + len) + return ERR_PTR(smb_EIO2(smb_eio_trace_reparse_rdlen, count, rdlen)); return buf; } @@ -1635,7 +1638,7 @@ int smb2_rename_pending_delete(const char *full_path, } else { cifs_tcon_dbg(FYI, "%s: failed to rename '%s' to '%s': %d\n", __func__, full_path, to_name, rc); - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_pend_del_fail, rc); } out: cifs_put_tlink(tlink); diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c index 12c2b868789f..4e1db02d22cb 100644 --- a/fs/smb/client/smb2maperror.c +++ b/fs/smb/client/smb2maperror.c @@ -9,11 +9,11 @@ */ #include <linux/errno.h> #include "cifsglob.h" +#include "cifsproto.h" #include "cifs_debug.h" -#include "smb2pdu.h" #include "smb2proto.h" -#include "../common/smb2status.h" #include "smb2glob.h" +#include "../common/smb2status.h" #include "trace.h" struct status_to_posix_error { @@ -23,14 +23,13 @@ struct status_to_posix_error { }; static const struct status_to_posix_error smb2_error_map_table[] = { - {STATUS_SUCCESS, 0, "STATUS_SUCCESS"}, - {STATUS_WAIT_0, 0, "STATUS_WAIT_0"}, {STATUS_WAIT_1, -EIO, "STATUS_WAIT_1"}, {STATUS_WAIT_2, -EIO, "STATUS_WAIT_2"}, {STATUS_WAIT_3, -EIO, "STATUS_WAIT_3"}, {STATUS_WAIT_63, -EIO, "STATUS_WAIT_63"}, - {STATUS_ABANDONED, -EIO, "STATUS_ABANDONED"}, - {STATUS_ABANDONED_WAIT_0, -EIO, "STATUS_ABANDONED_WAIT_0"}, + {STATUS_ABANDONED, -EIO, "STATUS_ABANDONED or STATUS_ABANDONED_WAIT_0"}, + {STATUS_ABANDONED_WAIT_0, -EIO, + "STATUS_ABANDONED or STATUS_ABANDONED_WAIT_0"}, {STATUS_ABANDONED_WAIT_63, -EIO, "STATUS_ABANDONED_WAIT_63"}, {STATUS_USER_APC, -EIO, "STATUS_USER_APC"}, {STATUS_KERNEL_APC, -EIO, "STATUS_KERNEL_APC"}, @@ -736,6 +735,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_FS_DRIVER_REQUIRED, -EOPNOTSUPP, "STATUS_FS_DRIVER_REQUIRED"}, {STATUS_IMAGE_ALREADY_LOADED_AS_DLL, -EIO, "STATUS_IMAGE_ALREADY_LOADED_AS_DLL"}, + {STATUS_INVALID_LOCK_RANGE, -EIO, "STATUS_INVALID_LOCK_RANGE"}, {STATUS_NETWORK_OPEN_RESTRICTION, -EIO, "STATUS_NETWORK_OPEN_RESTRICTION"}, {STATUS_NO_USER_SESSION_KEY, -EIO, "STATUS_NO_USER_SESSION_KEY"}, @@ -2298,8 +2298,9 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_FWP_LIFETIME_MISMATCH, -EIO, "STATUS_FWP_LIFETIME_MISMATCH"}, {STATUS_FWP_BUILTIN_OBJECT, -EIO, "STATUS_FWP_BUILTIN_OBJECT"}, {STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS, -EIO, - "STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS"}, - {STATUS_FWP_TOO_MANY_CALLOUTS, -EIO, "STATUS_FWP_TOO_MANY_CALLOUTS"}, + "STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS or STATUS_FWP_TOO_MANY_CALLOUTS"}, + {STATUS_FWP_TOO_MANY_CALLOUTS, -EIO, + "STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS or STATUS_FWP_TOO_MANY_CALLOUTS"}, {STATUS_FWP_NOTIFICATION_DROPPED, -EIO, "STATUS_FWP_NOTIFICATION_DROPPED"}, {STATUS_FWP_TRAFFIC_MISMATCH, -EIO, "STATUS_FWP_TRAFFIC_MISMATCH"}, @@ -2415,27 +2416,10 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_IPSEC_INTEGRITY_CHECK_FAILED, -EIO, "STATUS_IPSEC_INTEGRITY_CHECK_FAILED"}, {STATUS_IPSEC_CLEAR_TEXT_DROP, -EIO, "STATUS_IPSEC_CLEAR_TEXT_DROP"}, - {0, 0, NULL} + {STATUS_SMB_NO_PREAUTH_INTEGRITY_HASH_OVERLAP, -EIO, + "STATUS_SMB_NO_PREAUTH_INTEGRITY_HASH_OVERLAP"}, }; -/***************************************************************************** - Print an error message from the status code - *****************************************************************************/ -static void -smb2_print_status(__le32 status) -{ - int idx = 0; - - while (smb2_error_map_table[idx].status_string != NULL) { - if ((smb2_error_map_table[idx].smb2_status) == status) { - pr_notice("Status code returned 0x%08x %s\n", status, - smb2_error_map_table[idx].status_string); - } - idx++; - } - return; -} - int map_smb2_to_linux_error(char *buf, bool log_err) { @@ -2452,16 +2436,16 @@ map_smb2_to_linux_error(char *buf, bool log_err) return 0; } - /* mask facility */ - if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) && - (smb2err != STATUS_END_OF_FILE)) - smb2_print_status(smb2err); - else if (cifsFYI & CIFS_RC) - smb2_print_status(smb2err); + log_err = (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) && + (smb2err != STATUS_END_OF_FILE)) || + (cifsFYI & CIFS_RC); for (i = 0; i < sizeof(smb2_error_map_table) / sizeof(struct status_to_posix_error); i++) { if (smb2_error_map_table[i].smb2_status == smb2err) { + if (log_err) + pr_notice("Status code returned 0x%08x %s\n", smb2err, + smb2_error_map_table[i].status_string); rc = smb2_error_map_table[i].posix_error; break; } @@ -2477,5 +2461,7 @@ map_smb2_to_linux_error(char *buf, bool log_err) le16_to_cpu(shdr->Command), le64_to_cpu(shdr->MessageId), le32_to_cpu(smb2err), rc); + if (rc == -EIO) + smb_EIO1(smb_eio_trace_smb2_received_error, le32_to_cpu(smb2err)); return rc; } diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 96bfe4c63ccf..f3cb62d91450 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -134,7 +134,8 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, } int -smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) +smb2_check_message(char *buf, unsigned int pdu_len, unsigned int len, + struct TCP_Server_Info *server) { struct TCP_Server_Info *pserver; struct smb2_hdr *shdr = (struct smb2_hdr *)buf; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7ace6d4d305b..a16ded46b5a2 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -17,9 +17,9 @@ #include <uapi/linux/magic.h> #include "cifsfs.h" #include "cifsglob.h" -#include "smb2pdu.h" -#include "smb2proto.h" #include "cifsproto.h" +#include "smb2proto.h" +#include "smb2pdu.h" #include "cifs_debug.h" #include "cifs_unicode.h" #include "../common/smb2status.h" @@ -406,7 +406,7 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) if ((mid->mid == wire_mid) && (mid->mid_state == MID_REQUEST_SUBMITTED) && (mid->command == shdr->Command)) { - kref_get(&mid->refcount); + smb_get_mid(mid); if (dequeue) { list_del_init(&mid->qhead); mid->deleted_from_q = true; @@ -432,7 +432,7 @@ smb2_find_dequeue_mid(struct TCP_Server_Info *server, char *buf) } static void -smb2_dump_detail(void *buf, struct TCP_Server_Info *server) +smb2_dump_detail(void *buf, size_t buf_len, struct TCP_Server_Info *server) { #ifdef CONFIG_CIFS_DEBUG2 struct smb2_hdr *shdr = (struct smb2_hdr *)buf; @@ -440,7 +440,7 @@ smb2_dump_detail(void *buf, struct TCP_Server_Info *server) cifs_server_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n", shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId, shdr->Id.SyncId.ProcessId); - if (!server->ops->check_message(buf, server->total_read, server)) { + if (!server->ops->check_message(buf, buf_len, server->total_read, server)) { cifs_server_dbg(VFS, "smb buf %p len %u\n", buf, server->ops->calc_smb_size(buf)); } @@ -1046,7 +1046,8 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, if (src_size < 8 + name_len + 1 + value_len) { cifs_dbg(FYI, "EA entry goes beyond length of list\n"); - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_ea_overrun, + src_size, 8 + name_len + 1 + value_len); goto out; } @@ -1607,7 +1608,7 @@ replay_again: } if (!ses || !server) { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_null_pointers); goto free_vars; } @@ -1942,7 +1943,7 @@ retry: if (unlikely(ret_data_len != sizeof(*cc_rsp))) { cifs_tcon_dbg(VFS, "Copychunk invalid response: size %u/%zu\n", ret_data_len, sizeof(*cc_rsp)); - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_copychunk_inv_rsp, ret_data_len); goto out; } @@ -1952,11 +1953,18 @@ retry: if (rc == 0) { /* Check if server claimed to write more than we asked */ - if (unlikely(!bytes_written || bytes_written > copy_bytes || - !chunks_written || chunks_written > chunks)) { - cifs_tcon_dbg(VFS, "Copychunk invalid response: bytes written %u/%u, chunks written %u/%u\n", - bytes_written, copy_bytes, chunks_written, chunks); - rc = -EIO; + if (unlikely(!bytes_written || bytes_written > copy_bytes)) { + cifs_tcon_dbg(VFS, "Copychunk invalid response: bytes written %u/%u\n", + bytes_written, copy_bytes); + rc = smb_EIO2(smb_eio_trace_copychunk_overcopy_b, + bytes_written, copy_bytes); + goto out; + } + if (unlikely(!chunks_written || chunks_written > chunks)) { + cifs_tcon_dbg(VFS, "Copychunk invalid response: chunks written %u/%u\n", + chunks_written, chunks); + rc = smb_EIO2(smb_eio_trace_copychunk_overcopy_c, + chunks_written, chunks); goto out; } @@ -3127,7 +3135,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, } if (!rc && !dfs_rsp) - rc = -EIO; + rc = smb_EIO(smb_eio_trace_dfsref_no_rsp); if (rc) { if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP) cifs_tcon_dbg(FYI, "%s: ioctl error: rc=%d\n", __func__, rc); @@ -4555,7 +4563,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, buffer, 0, 0, size); if (!cifs_copy_iter_to_folioq(&old->rq_iter, size, buffer)) { - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_tx_copy_iter_to_buf, size); goto err_free; } } @@ -4656,7 +4664,8 @@ cifs_copy_folioq_to_iter(struct folio_queue *folioq, size_t data_size, n = copy_folio_to_iter(folio, skip, len, iter); if (n != len) { cifs_dbg(VFS, "%s: something went wrong\n", __func__); - return -EIO; + return smb_EIO2(smb_eio_trace_rx_copy_to_iter, + n, len); } data_size -= n; skip = 0; @@ -4716,7 +4725,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, if (is_offloaded) mid->mid_state = MID_RESPONSE_RECEIVED; else - dequeue_mid(mid, false); + dequeue_mid(server, mid, false); return 0; } @@ -4739,11 +4748,11 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, /* data_offset is beyond the end of smallbuf */ cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n", __func__, data_offset); - rdata->result = -EIO; + rdata->result = smb_EIO1(smb_eio_trace_rx_overlong, data_offset); if (is_offloaded) mid->mid_state = MID_RESPONSE_MALFORMED; else - dequeue_mid(mid, rdata->result); + dequeue_mid(server, mid, rdata->result); return 0; } @@ -4758,21 +4767,21 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, /* data offset is beyond the 1st page of response */ cifs_dbg(FYI, "%s: data offset (%u) beyond 1st page of response\n", __func__, data_offset); - rdata->result = -EIO; + rdata->result = smb_EIO1(smb_eio_trace_rx_overpage, data_offset); if (is_offloaded) mid->mid_state = MID_RESPONSE_MALFORMED; else - dequeue_mid(mid, rdata->result); + dequeue_mid(server, mid, rdata->result); return 0; } if (data_len > buffer_len - pad_len) { /* data_len is corrupt -- discard frame */ - rdata->result = -EIO; + rdata->result = smb_EIO1(smb_eio_trace_rx_bad_datalen, data_len); if (is_offloaded) mid->mid_state = MID_RESPONSE_MALFORMED; else - dequeue_mid(mid, rdata->result); + dequeue_mid(server, mid, rdata->result); return 0; } @@ -4783,7 +4792,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, if (is_offloaded) mid->mid_state = MID_RESPONSE_MALFORMED; else - dequeue_mid(mid, rdata->result); + dequeue_mid(server, mid, rdata->result); return 0; } rdata->got_bytes = buffer_len; @@ -4793,23 +4802,23 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, WARN_ONCE(buffer, "read data can be either in buf or in buffer"); copied = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter); if (copied == 0) - return -EIO; + return smb_EIO2(smb_eio_trace_rx_copy_to_iter, copied, data_len); rdata->got_bytes = copied; } else { /* read response payload cannot be in both buf and pages */ WARN_ONCE(1, "buf can not contain only a part of read data"); - rdata->result = -EIO; + rdata->result = smb_EIO(smb_eio_trace_rx_both_buf); if (is_offloaded) mid->mid_state = MID_RESPONSE_MALFORMED; else - dequeue_mid(mid, rdata->result); + dequeue_mid(server, mid, rdata->result); return 0; } if (is_offloaded) mid->mid_state = MID_RESPONSE_RECEIVED; else - dequeue_mid(mid, false); + dequeue_mid(server, mid, false); return 0; } @@ -4856,7 +4865,7 @@ static void smb2_decrypt_offload(struct work_struct *work) dw->server->ops->is_network_name_deleted(dw->buf, dw->server); - mid_execute_callback(mid); + mid_execute_callback(dw->server, mid); } else { spin_lock(&dw->server->srv_lock); if (dw->server->tcpStatus == CifsNeedReconnect) { @@ -4864,7 +4873,7 @@ static void smb2_decrypt_offload(struct work_struct *work) mid->mid_state = MID_RETRY_NEEDED; spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); - mid_execute_callback(mid); + mid_execute_callback(dw->server, mid); } else { spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; @@ -4875,7 +4884,7 @@ static void smb2_decrypt_offload(struct work_struct *work) spin_unlock(&dw->server->srv_lock); } } - release_mid(mid); + release_mid(dw->server, mid); } free_pages: @@ -5767,7 +5776,6 @@ struct smb_version_values smb20_values = { .shared_lock_type = SMB2_LOCKFLAG_SHARED, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), - .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp), .lock_cmd = SMB2_LOCK, @@ -5789,7 +5797,6 @@ struct smb_version_values smb21_values = { .shared_lock_type = SMB2_LOCKFLAG_SHARED, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), - .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp), .lock_cmd = SMB2_LOCK, @@ -5810,7 +5817,6 @@ struct smb_version_values smb3any_values = { .shared_lock_type = SMB2_LOCKFLAG_SHARED, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), - .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp), .lock_cmd = SMB2_LOCK, @@ -5831,7 +5837,6 @@ struct smb_version_values smbdefault_values = { .shared_lock_type = SMB2_LOCKFLAG_SHARED, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), - .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp), .lock_cmd = SMB2_LOCK, @@ -5852,7 +5857,6 @@ struct smb_version_values smb30_values = { .shared_lock_type = SMB2_LOCKFLAG_SHARED, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), - .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp), .lock_cmd = SMB2_LOCK, @@ -5873,7 +5877,6 @@ struct smb_version_values smb302_values = { .shared_lock_type = SMB2_LOCKFLAG_SHARED, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), - .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp), .lock_cmd = SMB2_LOCK, @@ -5894,7 +5897,6 @@ struct smb_version_values smb311_values = { .shared_lock_type = SMB2_LOCKFLAG_SHARED, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), - .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp), .lock_cmd = SMB2_LOCK, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index ef2c6ac500f7..5d57c895ca37 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -26,8 +26,8 @@ #include <linux/netfs.h> #include <trace/events/netfs.h> #include "cifsglob.h" -#include "cifsacl.h" #include "cifsproto.h" +#include "cifsacl.h" #include "smb2proto.h" #include "cifs_unicode.h" #include "cifs_debug.h" @@ -168,7 +168,7 @@ out: static int cifs_chan_skip_or_disable(struct cifs_ses *ses, struct TCP_Server_Info *server, - bool from_reconnect) + bool from_reconnect, bool disable_mchan) { struct TCP_Server_Info *pserver; unsigned int chan_index; @@ -206,14 +206,46 @@ skip_terminate: return -EHOSTDOWN; } - cifs_server_dbg(VFS, - "server does not support multichannel anymore. Disable all other channels\n"); - cifs_disable_secondary_channels(ses); - + cifs_decrease_secondary_channels(ses, disable_mchan); return 0; } +/* + * smb3_update_ses_channels - Synchronize session channels with new configuration + * @ses: pointer to the CIFS session structure + * @server: pointer to the TCP server info structure + * @from_reconnect: indicates if called from reconnect context + * @disable_mchan: indicates if called from reconnect to disable multichannel + * + * Returns 0 on success or error code on failure. + * + * Outside of reconfigure, this function is called from cifs_mount() during mount + * and from reconnect scenarios to adjust channel count when the + * server's multichannel support changes. + */ +int smb3_update_ses_channels(struct cifs_ses *ses, struct TCP_Server_Info *server, + bool from_reconnect, bool disable_mchan) +{ + int rc = 0; + /* + * Manage session channels based on current count vs max: + * - If disable requested, skip or disable the channel + * - If below max channels, attempt to add more + * - If above max channels, skip or disable excess channels + */ + if (disable_mchan) + rc = cifs_chan_skip_or_disable(ses, server, from_reconnect, disable_mchan); + else { + if (ses->chan_count < ses->chan_max) + rc = cifs_try_adding_channels(ses); + else if (ses->chan_count > ses->chan_max) + rc = cifs_chan_skip_or_disable(ses, server, from_reconnect, disable_mchan); + } + + return rc; +} + static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, struct TCP_Server_Info *server, bool from_reconnect) @@ -249,15 +281,15 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, ses = tcon->ses; if (!ses) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); spin_lock(&ses->ses_lock); if (ses->ses_status == SES_EXITING) { spin_unlock(&ses->ses_lock); - return -EIO; + return smb_EIO(smb_eio_trace_sess_exiting); } spin_unlock(&ses->ses_lock); if (!ses->server || !server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { @@ -355,8 +387,8 @@ again: */ if (ses->chan_count > 1 && !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { - rc = cifs_chan_skip_or_disable(ses, server, - from_reconnect); + rc = smb3_update_ses_channels(ses, server, + from_reconnect, true /* disable_mchan */); if (rc) { mutex_unlock(&ses->session_mutex); goto out; @@ -438,8 +470,9 @@ skip_sess_setup: * treat this as server not supporting multichannel */ - rc = cifs_chan_skip_or_disable(ses, server, - from_reconnect); + rc = smb3_update_ses_channels(ses, server, + from_reconnect, + true /* disable_mchan */); goto skip_add_channels; } else if (rc) cifs_tcon_dbg(FYI, "%s: failed to query server interfaces: %d\n", @@ -451,7 +484,8 @@ skip_sess_setup: if (ses->chan_count == 1) cifs_server_dbg(VFS, "supports multichannel now\n"); - cifs_try_adding_channels(ses); + smb3_update_ses_channels(ses, server, from_reconnect, + false /* disable_mchan */); } } else { mutex_unlock(&ses->session_mutex); @@ -463,7 +497,7 @@ skip_add_channels: spin_unlock(&ses->ses_lock); if (smb2_command != SMB2_INTERNAL_CMD) - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + cifs_queue_server_reconn(server); atomic_inc(&tconInfoReconnectCount); out: @@ -1061,7 +1095,7 @@ SMB2_negotiate(const unsigned int xid, if (!server) { WARN(1, "%s: server is NULL!\n", __func__); - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); } rc = smb2_plain_req_init(SMB2_NEGOTIATE, NULL, server, @@ -1105,8 +1139,7 @@ SMB2_negotiate(const unsigned int xid, req->SecurityMode = 0; req->Capabilities = cpu_to_le32(server->vals->req_capabilities); - if (ses->chan_max > 1) - req->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL); + req->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL); /* ClientGUID must be zero for SMB2.02 dialect */ if (server->vals->protocol_id == SMB20_PROT_ID) @@ -1142,64 +1175,84 @@ SMB2_negotiate(const unsigned int xid, } else if (rc != 0) goto neg_exit; - rc = -EIO; + u16 dialect = le16_to_cpu(rsp->DialectRevision); if (strcmp(server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { - if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { + switch (dialect) { + case SMB20_PROT_ID: cifs_server_dbg(VFS, "SMB2 dialect returned but not requested\n"); + rc = smb_EIO2(smb_eio_trace_neg_unreq_dialect, dialect, 3); goto neg_exit; - } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { + case SMB21_PROT_ID: cifs_server_dbg(VFS, "SMB2.1 dialect returned but not requested\n"); + rc = smb_EIO2(smb_eio_trace_neg_unreq_dialect, dialect, 3); goto neg_exit; - } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { + case SMB311_PROT_ID: /* ops set to 3.0 by default for default so update */ server->ops = &smb311_operations; server->vals = &smb311_values; + break; + default: + break; } } else if (strcmp(server->vals->version_string, - SMBDEFAULT_VERSION_STRING) == 0) { - if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { + SMBDEFAULT_VERSION_STRING) == 0) { + switch (dialect) { + case SMB20_PROT_ID: cifs_server_dbg(VFS, "SMB2 dialect returned but not requested\n"); + rc = smb_EIO2(smb_eio_trace_neg_unreq_dialect, dialect, 0); goto neg_exit; - } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { + case SMB21_PROT_ID: /* ops set to 3.0 by default for default so update */ server->ops = &smb21_operations; server->vals = &smb21_values; - } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { + break; + case SMB311_PROT_ID: server->ops = &smb311_operations; server->vals = &smb311_values; + break; + default: + break; } - } else if (le16_to_cpu(rsp->DialectRevision) != - server->vals->protocol_id) { + } else if (dialect != server->vals->protocol_id) { /* if requested single dialect ensure returned dialect matched */ cifs_server_dbg(VFS, "Invalid 0x%x dialect returned: not requested\n", - le16_to_cpu(rsp->DialectRevision)); + dialect); + rc = smb_EIO2(smb_eio_trace_neg_unreq_dialect, + dialect, server->vals->protocol_id); goto neg_exit; } cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); - if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) + switch (dialect) { + case SMB20_PROT_ID: cifs_dbg(FYI, "negotiated smb2.0 dialect\n"); - else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) + break; + case SMB21_PROT_ID: cifs_dbg(FYI, "negotiated smb2.1 dialect\n"); - else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) + break; + case SMB30_PROT_ID: cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); - else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID)) + break; + case SMB302_PROT_ID: cifs_dbg(FYI, "negotiated smb3.02 dialect\n"); - else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) + break; + case SMB311_PROT_ID: cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n"); - else { + break; + default: cifs_server_dbg(VFS, "Invalid dialect returned by server 0x%x\n", - le16_to_cpu(rsp->DialectRevision)); + dialect); + rc = smb_EIO1(smb_eio_trace_neg_inval_dialect, dialect); goto neg_exit; } rc = 0; - server->dialect = le16_to_cpu(rsp->DialectRevision); + server->dialect = dialect; /* * Keep a copy of the hash after negprot. This hash will be @@ -1255,10 +1308,10 @@ SMB2_negotiate(const unsigned int xid, if (rc == 1) rc = 0; else if (rc == 0) - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_neg_decode_token, rc); } - if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { + if (server->dialect == SMB311_PROT_ID) { if (rsp->NegotiateContextCount) rc = smb311_decode_neg_context(rsp, server, rsp_iov.iov_len); @@ -1312,8 +1365,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) pneg_inbuf->Capabilities = cpu_to_le32(server->vals->req_capabilities); - if (tcon->ses->chan_max > 1) - pneg_inbuf->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL); + pneg_inbuf->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL); memcpy(pneg_inbuf->Guid, server->client_guid, SMB2_CLIENT_GUID_SIZE); @@ -1371,32 +1423,47 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } else if (rc != 0) { cifs_tcon_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_neg_info_fail, rc); goto out_free_inbuf; } - rc = -EIO; if (rsplen != sizeof(*pneg_rsp)) { cifs_tcon_dbg(VFS, "Invalid protocol negotiate response size: %d\n", rsplen); /* relax check since Mac returns max bufsize allowed on ioctl */ - if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp)) + if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp)) { + rc = smb_EIO1(smb_eio_trace_neg_bad_rsplen, rsplen); goto out_free_rsp; + } } /* check validate negotiate info response matches what we got earlier */ - if (pneg_rsp->Dialect != cpu_to_le16(server->dialect)) + u16 dialect = le16_to_cpu(pneg_rsp->Dialect); + + if (dialect != server->dialect) { + rc = smb_EIO2(smb_eio_trace_neg_info_dialect, + dialect, server->dialect); goto vneg_out; + } - if (pneg_rsp->SecurityMode != cpu_to_le16(server->sec_mode)) + u16 sec_mode = le16_to_cpu(pneg_rsp->SecurityMode); + + if (sec_mode != server->sec_mode) { + rc = smb_EIO2(smb_eio_trace_neg_info_sec_mode, + sec_mode, server->sec_mode); goto vneg_out; + } /* do not validate server guid because not saved at negprot time yet */ + u32 caps = le32_to_cpu(pneg_rsp->Capabilities); - if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND | - SMB2_LARGE_FILES) != server->capabilities) + if ((caps | SMB2_NT_FIND | + SMB2_LARGE_FILES) != server->capabilities) { + rc = smb_EIO2(smb_eio_trace_neg_info_caps, + caps, server->capabilities); goto vneg_out; + } /* validate negotiate successful */ rc = 0; @@ -1628,8 +1695,6 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) spnego_key = cifs_get_spnego_key(ses, server); if (IS_ERR(spnego_key)) { rc = PTR_ERR(spnego_key); - if (rc == -ENOKEY) - cifs_dbg(VFS, "Verify user has a krb5 ticket and keyutils is installed\n"); spnego_key = NULL; goto out; } @@ -1758,11 +1823,11 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) if (rc) goto out; - if (offsetof(struct smb2_sess_setup_rsp, Buffer) != - le16_to_cpu(rsp->SecurityBufferOffset)) { - cifs_dbg(VFS, "Invalid security buffer offset %d\n", - le16_to_cpu(rsp->SecurityBufferOffset)); - rc = -EIO; + u16 boff = le16_to_cpu(rsp->SecurityBufferOffset); + + if (offsetof(struct smb2_sess_setup_rsp, Buffer) != boff) { + cifs_dbg(VFS, "Invalid security buffer offset %d\n", boff); + rc = smb_EIO1(smb_eio_trace_sess_buf_off, boff); goto out; } rc = decode_ntlmssp_challenge(rsp->Buffer, @@ -1916,7 +1981,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, if (!server) { WARN(1, "%s: server is NULL!\n", __func__); - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); } sess_data = kzalloc(sizeof(struct SMB2_sess_data), GFP_KERNEL); @@ -1966,10 +2031,9 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) cifs_dbg(FYI, "disconnect session %p\n", ses); - if (ses && (ses->server)) - server = ses->server; - else - return -EIO; + if (!ses || !ses->server) + return smb_EIO(smb_eio_trace_null_pointers); + server = ses->server; /* no need to send SMB logoff if uid already closed due to reconnect */ spin_lock(&ses->chan_lock); @@ -2048,7 +2112,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, cifs_dbg(FYI, "TCON\n"); if (!server || !tree) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL); if (unc_path == NULL) @@ -2186,7 +2250,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) cifs_dbg(FYI, "Tree Disconnect\n"); if (!ses || !(ses->server)) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); trace_smb3_tdis_enter(xid, tcon->tid, ses->Suid, tcon->tree_name); spin_lock(&ses->chan_lock); @@ -2856,7 +2920,7 @@ replay_again: return -ENOMEM; if (!ses || !server) { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_null_pointers); goto err_free_path; } @@ -2973,7 +3037,7 @@ replay_again: */ rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; if (rsp == NULL) { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_mkdir_no_rsp); kfree(pc_buf); goto err_free_req; } @@ -3211,7 +3275,7 @@ replay_again: cifs_dbg(FYI, "create/open\n"); if (!ses || !server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -3417,11 +3481,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, int retries = 0, cur_sleep = 1; if (!tcon) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); ses = tcon->ses; if (!ses) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); replay_again: /* reinitialize for possible replay */ @@ -3429,7 +3493,7 @@ replay_again: server = cifs_pick_channel(ses); if (!server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); cifs_dbg(FYI, "SMB2 IOCTL\n"); @@ -3492,7 +3556,7 @@ replay_again: * warning) */ if (rsp == NULL) { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_ioctl_no_rsp); goto ioctl_exit; } @@ -3503,16 +3567,18 @@ replay_again: goto ioctl_exit; /* server returned no data */ else if (*plen > rsp_iov.iov_len || *plen > 0xFF00) { cifs_tcon_dbg(VFS, "srv returned invalid ioctl length: %d\n", *plen); + rc = smb_EIO2(smb_eio_trace_ioctl_data_len, *plen, rsp_iov.iov_len); *plen = 0; - rc = -EIO; goto ioctl_exit; } - if (rsp_iov.iov_len - *plen < le32_to_cpu(rsp->OutputOffset)) { - cifs_tcon_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen, - le32_to_cpu(rsp->OutputOffset)); + u32 outoff = le32_to_cpu(rsp->OutputOffset); + + if (rsp_iov.iov_len - *plen < outoff) { + cifs_tcon_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", + *plen, outoff); + rc = smb_EIO2(smb_eio_trace_ioctl_out_off, rsp_iov.iov_len - *plen, outoff); *plen = 0; - rc = -EIO; goto ioctl_exit; } @@ -3620,7 +3686,7 @@ replay_again: cifs_dbg(FYI, "Close\n"); if (!ses || !server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -3817,7 +3883,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_dbg(FYI, "Query Info\n"); if (!ses) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); replay_again: /* reinitialize for possible replay */ @@ -3826,7 +3892,7 @@ replay_again: server = cifs_pick_channel(ses); if (!server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -3913,7 +3979,8 @@ int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, /* currently unused, as now we are doing compounding instead (see smb311_posix_query_path_info) */ int SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, struct smb311_posix_qinfo *data, u32 *plen) + u64 persistent_fid, u64 volatile_fid, + struct smb311_posix_qinfo *data, u32 *plen) { size_t output_len = sizeof(struct smb311_posix_qinfo *) + (sizeof(struct smb_sid) * 2) + (PATH_MAX * 2); @@ -4011,7 +4078,7 @@ replay_again: cifs_dbg(FYI, "change notify\n"); if (!ses || !server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -4091,9 +4158,8 @@ replay_again: * FIXME: maybe we should consider checking that the reply matches request? */ static void -smb2_echo_callback(struct mid_q_entry *mid) +smb2_echo_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { - struct TCP_Server_Info *server = mid->callback_data; struct smb2_echo_rsp *rsp = (struct smb2_echo_rsp *)mid->resp_buf; struct cifs_credits credits = { .value = 0, .instance = 0 }; @@ -4103,7 +4169,7 @@ smb2_echo_callback(struct mid_q_entry *mid) credits.instance = server->reconnect_instance; } - release_mid(mid); + release_mid(server, mid); add_credits(server, &credits, CIFS_ECHO_OP); } @@ -4248,7 +4314,7 @@ void smb2_reconnect_server(struct work_struct *work) done: cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); if (resched) - queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ); + cifs_requeue_server_reconn(server); mutex_unlock(&pserver->reconnect_mutex); /* now we can safely release srv struct */ @@ -4272,7 +4338,7 @@ SMB2_echo(struct TCP_Server_Info *server) server->ops->need_neg(server)) { spin_unlock(&server->srv_lock); /* No need to send echo on newly established connections */ - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + cifs_queue_server_reconn(server); return rc; } spin_unlock(&server->srv_lock); @@ -4348,7 +4414,7 @@ replay_again: cifs_dbg(FYI, "flush\n"); if (!ses || !(ses->server)) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -4518,21 +4584,19 @@ smb2_new_read_req(void **buf, unsigned int *total_len, } static void -smb2_readv_callback(struct mid_q_entry *mid) +smb2_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { struct cifs_io_subrequest *rdata = mid->callback_data; struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode); struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); - struct TCP_Server_Info *server = rdata->server; - struct smb2_hdr *shdr = - (struct smb2_hdr *)rdata->iov[0].iov_base; + struct smb2_hdr *shdr = (struct smb2_hdr *)rdata->iov[0].iov_base; struct cifs_credits credits = { .value = 0, .instance = 0, .rreq_debug_id = rdata->rreq->debug_id, .rreq_debug_index = rdata->subreq.debug_index, }; - struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], .rq_nvec = 1 }; + struct smb_rqst rqst = { .rq_iov = &rdata->iov[0], .rq_nvec = 1 }; unsigned int rreq_debug_id = rdata->rreq->debug_id; unsigned int subreq_debug_index = rdata->subreq.debug_index; @@ -4540,9 +4604,9 @@ smb2_readv_callback(struct mid_q_entry *mid) rqst.rq_iter = rdata->subreq.io_iter; } - WARN_ONCE(rdata->server != mid->server, + WARN_ONCE(rdata->server != server, "rdata server %p != mid server %p", - rdata->server, mid->server); + rdata->server, server); cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu/%zu\n", __func__, mid->mid, mid->mid_state, rdata->result, @@ -4585,11 +4649,12 @@ do_retry: trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(shdr->CreditRequest); credits.instance = server->reconnect_instance; - rdata->result = -EIO; + rdata->result = smb_EIO(smb_eio_trace_read_rsp_malformed); break; default: trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); - rdata->result = -EIO; + rdata->result = smb_EIO1(smb_eio_trace_read_mid_state_unknown, + mid->mid_state); break; } #ifdef CONFIG_CIFS_SMB_DIRECT @@ -4628,7 +4693,7 @@ do_retry: } else { size_t trans = rdata->subreq.transferred + rdata->got_bytes; if (trans < rdata->subreq.len && - rdata->subreq.start + trans == ictx->remote_i_size) { + rdata->subreq.start + trans >= ictx->remote_i_size) { __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); rdata->result = 0; } @@ -4643,7 +4708,7 @@ do_retry: rdata->subreq.transferred += rdata->got_bytes; trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress); netfs_read_subreq_terminated(&rdata->subreq); - release_mid(mid); + release_mid(server, mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, server->credits, server->in_flight, credits.value, cifs_trace_rw_credits_read_response_add); @@ -4798,7 +4863,8 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, (*nbytes > io_parms->length)) { cifs_dbg(FYI, "bad length %d for count %d\n", *nbytes, io_parms->length); - rc = -EIO; + rc = smb_EIO2(smb_eio_trace_read_overlarge, + *nbytes, io_parms->length); *nbytes = 0; } @@ -4820,11 +4886,10 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, * workqueue completion task. */ static void -smb2_writev_callback(struct mid_q_entry *mid) +smb2_writev_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { struct cifs_io_subrequest *wdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); - struct TCP_Server_Info *server = wdata->server; struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; struct cifs_credits credits = { .value = 0, @@ -4837,9 +4902,9 @@ smb2_writev_callback(struct mid_q_entry *mid) ssize_t result = 0; size_t written; - WARN_ONCE(wdata->server != mid->server, + WARN_ONCE(wdata->server != server, "wdata server %p != mid server %p", - wdata->server, mid->server); + wdata->server, server); switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: @@ -4885,11 +4950,12 @@ smb2_writev_callback(struct mid_q_entry *mid) trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; - result = -EIO; + result = smb_EIO(smb_eio_trace_write_rsp_malformed); break; default: trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); - result = -EIO; + result = smb_EIO1(smb_eio_trace_write_mid_state_unknown, + mid->mid_state); break; } #ifdef CONFIG_CIFS_SMB_DIRECT @@ -4929,7 +4995,7 @@ smb2_writev_callback(struct mid_q_entry *mid) 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; cifs_write_subrequest_terminated(wdata, result ?: written); - release_mid(mid); + release_mid(server, mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, server->credits, server->in_flight, credits.value, cifs_trace_rw_credits_write_response_add); @@ -5532,7 +5598,7 @@ replay_again: server = cifs_pick_channel(ses); if (!ses || !(ses->server)) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -5667,7 +5733,7 @@ replay_again: server = cifs_pick_channel(ses); if (!ses || !server) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); if (!num) return -EINVAL; @@ -5864,7 +5930,7 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, cifs_dbg(FYI, "Query FSInfo level %d\n", level); if ((tcon->ses == NULL) || server == NULL) - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, server, (void **) &req, &total_len); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 5241daaae543..063c9f83bbcd 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -9,8 +9,10 @@ */ #ifndef _SMB2PROTO_H #define _SMB2PROTO_H + #include <linux/nls.h> #include <linux/key-type.h> +#include "cached_dir.h" struct statfs; struct smb_rqst; @@ -21,7 +23,7 @@ struct smb_rqst; ***************************************************************** */ extern int map_smb2_to_linux_error(char *buf, bool log_err); -extern int smb2_check_message(char *buf, unsigned int length, +extern int smb2_check_message(char *buf, unsigned int pdu_len, unsigned int length, struct TCP_Server_Info *server); extern unsigned int smb2_calc_size(void *buf); extern char *smb2_get_data_area_len(int *off, int *len, @@ -39,15 +41,11 @@ extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); -extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); extern bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv); extern int smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid); -extern int smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, const char *path, - __u32 *reparse_tag); struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, struct super_block *sb, const unsigned int xid, @@ -300,17 +298,9 @@ extern int smb2_query_info_compound(const unsigned int xid, struct kvec *rsp, int *buftype, struct cifs_sb_info *cifs_sb); /* query path info from the server using SMB311 POSIX extensions*/ -int smb311_posix_query_path_info(const unsigned int xid, - struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, - const char *full_path, - struct cifs_open_info_data *data); int posix_info_parse(const void *beg, const void *end, struct smb2_posix_info_parsed *out); int posix_info_sid_size(const void *beg, const void *end); -int smb2_make_nfs_node(unsigned int xid, struct inode *inode, - struct dentry *dentry, struct cifs_tcon *tcon, - const char *full_path, umode_t mode, dev_t dev); int smb2_rename_pending_delete(const char *full_path, struct dentry *dentry, const unsigned int xid); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 6a9b80385b86..8b9000a83181 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -153,7 +153,7 @@ static int smb2_get_sign_key(struct TCP_Server_Info *server, memcpy(key, ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); } else { - rc = -EIO; + rc = smb_EIO(smb_eio_trace_no_auth_key); } break; default: @@ -653,16 +653,15 @@ smb2_mid_entry_alloc(const struct smb2_hdr *shdr, return NULL; } - temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); + temp = mempool_alloc(&cifs_mid_pool, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); - kref_init(&temp->refcount); + refcount_set(&temp->refcount, 1); spin_lock_init(&temp->mid_lock); temp->mid = le64_to_cpu(shdr->MessageId); temp->credits = credits > 0 ? credits : 1; temp->pid = current->pid; temp->command = shdr->Command; /* Always LE */ temp->when_alloc = jiffies; - temp->server = server; /* * The default is for the mid to be synchronous, so the @@ -685,43 +684,35 @@ static int smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server, struct smb2_hdr *shdr, struct mid_q_entry **mid) { - spin_lock(&server->srv_lock); - if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->srv_lock); + switch (READ_ONCE(server->tcpStatus)) { + case CifsExiting: return -ENOENT; - } - - if (server->tcpStatus == CifsNeedReconnect) { - spin_unlock(&server->srv_lock); + case CifsNeedReconnect: cifs_dbg(FYI, "tcp session dead - return to caller to retry\n"); return -EAGAIN; - } - - if (server->tcpStatus == CifsNeedNegotiate && - shdr->Command != SMB2_NEGOTIATE) { - spin_unlock(&server->srv_lock); - return -EAGAIN; - } - spin_unlock(&server->srv_lock); - - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_NEW) { - if ((shdr->Command != SMB2_SESSION_SETUP) && - (shdr->Command != SMB2_NEGOTIATE)) { - spin_unlock(&ses->ses_lock); + case CifsNeedNegotiate: + if (shdr->Command != SMB2_NEGOTIATE) return -EAGAIN; - } - /* else ok - we are setting up session */ + break; + default: + break; } - if (ses->ses_status == SES_EXITING) { - if (shdr->Command != SMB2_LOGOFF) { - spin_unlock(&ses->ses_lock); + switch (READ_ONCE(ses->ses_status)) { + case SES_NEW: + if (shdr->Command != SMB2_SESSION_SETUP && + shdr->Command != SMB2_NEGOTIATE) + return -EAGAIN; + /* else ok - we are setting up session */ + break; + case SES_EXITING: + if (shdr->Command != SMB2_LOGOFF) return -EAGAIN; - } /* else ok - we are shutting down the session */ + break; + default: + break; } - spin_unlock(&ses->ses_lock); *mid = smb2_mid_entry_alloc(shdr, server); if (*mid == NULL) @@ -779,7 +770,7 @@ smb2_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *server, rc = smb2_sign_rqst(rqst, server); if (rc) { revert_current_mid_from_hdr(server, shdr); - delete_mid(mid); + delete_mid(server, mid); return ERR_PTR(rc); } @@ -813,7 +804,7 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) rc = smb2_sign_rqst(rqst, server); if (rc) { revert_current_mid_from_hdr(server, shdr); - release_mid(mid); + release_mid(server, mid); return ERR_PTR(rc); } diff --git a/fs/smb/client/trace.c b/fs/smb/client/trace.c index 16b0e719731f..8a99b68d0c71 100644 --- a/fs/smb/client/trace.c +++ b/fs/smb/client/trace.c @@ -5,5 +5,6 @@ * Author(s): Steve French <stfrench@microsoft.com> */ #include "cifsglob.h" +#include "cifs_spnego.h" #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 28e00c34df1c..b0fbc2df642e 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -20,6 +20,136 @@ /* * Specify enums for tracing information. */ +#define smb_eio_traces \ + EM(smb_eio_trace_compress_copy, "compress_copy") \ + EM(smb_eio_trace_copychunk_inv_rsp, "copychunk_inv_rsp") \ + EM(smb_eio_trace_copychunk_overcopy_b, "copychunk_overcopy_b") \ + EM(smb_eio_trace_copychunk_overcopy_c, "copychunk_overcopy_c") \ + EM(smb_eio_trace_create_rsp_too_small, "create_rsp_too_small") \ + EM(smb_eio_trace_dfsref_no_rsp, "dfsref_no_rsp") \ + EM(smb_eio_trace_ea_overrun, "ea_overrun") \ + EM(smb_eio_trace_extract_will_pin, "extract_will_pin") \ + EM(smb_eio_trace_forced_shutdown, "forced_shutdown") \ + EM(smb_eio_trace_getacl_bcc_too_small, "getacl_bcc_too_small") \ + EM(smb_eio_trace_getcifsacl_param_count, "getcifsacl_param_count") \ + EM(smb_eio_trace_getdfsrefer_bcc_too_small, "getdfsrefer_bcc_too_small") \ + EM(smb_eio_trace_getextattr_bcc_too_small, "getextattr_bcc_too_small") \ + EM(smb_eio_trace_getextattr_inv_size, "getextattr_inv_size") \ + EM(smb_eio_trace_getsrvinonum_bcc_too_small, "getsrvinonum_bcc_too_small") \ + EM(smb_eio_trace_getsrvinonum_size, "getsrvinonum_size") \ + EM(smb_eio_trace_ioctl_data_len, "ioctl_data_len") \ + EM(smb_eio_trace_ioctl_no_rsp, "ioctl_no_rsp") \ + EM(smb_eio_trace_ioctl_out_off, "ioctl_out_off") \ + EM(smb_eio_trace_lock_bcc_too_small, "lock_bcc_too_small") \ + EM(smb_eio_trace_lock_data_too_small, "lock_data_too_small") \ + EM(smb_eio_trace_malformed_ksid_key, "malformed_ksid_key") \ + EM(smb_eio_trace_malformed_sid_key, "malformed_sid_key") \ + EM(smb_eio_trace_mkdir_no_rsp, "mkdir_no_rsp") \ + EM(smb_eio_trace_neg_bad_rsplen, "neg_bad_rsplen") \ + EM(smb_eio_trace_neg_decode_token, "neg_decode_token") \ + EM(smb_eio_trace_neg_info_caps, "neg_info_caps") \ + EM(smb_eio_trace_neg_info_dialect, "neg_info_dialect") \ + EM(smb_eio_trace_neg_info_fail, "neg_info_fail") \ + EM(smb_eio_trace_neg_info_sec_mode, "neg_info_sec_mode") \ + EM(smb_eio_trace_neg_inval_dialect, "neg_inval_dialect") \ + EM(smb_eio_trace_neg_no_crypt_key, "neg_no_crypt_key") \ + EM(smb_eio_trace_neg_sec_blob_too_small, "neg_sec_blob_too_small") \ + EM(smb_eio_trace_neg_unreq_dialect, "neg_unreq_dialect") \ + EM(smb_eio_trace_no_auth_key, "no_auth_key") \ + EM(smb_eio_trace_no_lease_key, "no_lease_key") \ + EM(smb_eio_trace_not_netfs_writeback, "not_netfs_writeback") \ + EM(smb_eio_trace_null_pointers, "null_pointers") \ + EM(smb_eio_trace_oldqfsinfo_bcc_too_small, "oldqfsinfo_bcc_too_small") \ + EM(smb_eio_trace_pend_del_fail, "pend_del_fail") \ + EM(smb_eio_trace_qalleas_bcc_too_small, "qalleas_bcc_too_small") \ + EM(smb_eio_trace_qalleas_ea_overlong, "qalleas_ea_overlong") \ + EM(smb_eio_trace_qalleas_overlong, "qalleas_overlong") \ + EM(smb_eio_trace_qfileinfo_bcc_too_small, "qfileinfo_bcc_too_small") \ + EM(smb_eio_trace_qfileinfo_invalid, "qfileinfo_invalid") \ + EM(smb_eio_trace_qfsattrinfo_bcc_too_small, "qfsattrinfo_bcc_too_small") \ + EM(smb_eio_trace_qfsdevinfo_bcc_too_small, "qfsdevinfo_bcc_too_small") \ + EM(smb_eio_trace_qfsinfo_bcc_too_small, "qfsinfo_bcc_too_small") \ + EM(smb_eio_trace_qfsposixinfo_bcc_too_small, "qfsposixinfo_bcc_too_small") \ + EM(smb_eio_trace_qfsunixinfo_bcc_too_small, "qfsunixinfo_bcc_too_small") \ + EM(smb_eio_trace_qpathinfo_bcc_too_small, "qpathinfo_bcc_too_small") \ + EM(smb_eio_trace_qpathinfo_invalid, "qpathinfo_invalid") \ + EM(smb_eio_trace_qreparse_data_area, "qreparse_data_area") \ + EM(smb_eio_trace_qreparse_rep_datalen, "qreparse_rep_datalen") \ + EM(smb_eio_trace_qreparse_ret_datalen, "qreparse_ret_datalen") \ + EM(smb_eio_trace_qreparse_setup_count, "qreparse_setup_count") \ + EM(smb_eio_trace_qreparse_sizes_wrong, "qreparse_sizes_wrong") \ + EM(smb_eio_trace_qsym_bcc_too_small, "qsym_bcc_too_small") \ + EM(smb_eio_trace_read_mid_state_unknown, "read_mid_state_unknown") \ + EM(smb_eio_trace_read_overlarge, "read_overlarge") \ + EM(smb_eio_trace_read_rsp_malformed, "read_rsp_malformed") \ + EM(smb_eio_trace_read_rsp_short, "read_rsp_short") \ + EM(smb_eio_trace_read_too_far, "read_too_far") \ + EM(smb_eio_trace_reparse_data_len, "reparse_data_len") \ + EM(smb_eio_trace_reparse_native_len, "reparse_native_len") \ + EM(smb_eio_trace_reparse_native_nul, "reparse_native_nul") \ + EM(smb_eio_trace_reparse_native_sym_len, "reparse_native_sym_len") \ + EM(smb_eio_trace_reparse_nfs_dev, "reparse_nfs_dev") \ + EM(smb_eio_trace_reparse_nfs_nul, "reparse_nfs_nul") \ + EM(smb_eio_trace_reparse_nfs_sockfifo, "reparse_nfs_sockfifo") \ + EM(smb_eio_trace_reparse_nfs_symbuf, "reparse_nfs_symbuf") \ + EM(smb_eio_trace_reparse_nfs_too_short, "reparse_nfs_too_short") \ + EM(smb_eio_trace_reparse_overlong, "reparse_overlong") \ + EM(smb_eio_trace_reparse_rdlen, "reparse_rdlen") \ + EM(smb_eio_trace_reparse_wsl_nul, "reparse_wsl_nul") \ + EM(smb_eio_trace_reparse_wsl_symbuf, "reparse_wsl_symbuf") \ + EM(smb_eio_trace_reparse_wsl_ver, "reparse_wsl_ver") \ + EM(smb_eio_trace_rx_b_read_short, "rx_b_read_short") \ + EM(smb_eio_trace_rx_bad_datalen, "rx_bad_datalen") \ + EM(smb_eio_trace_rx_both_buf, "rx_both_buf") \ + EM(smb_eio_trace_rx_calc_len_too_big, "rx_calc_len_too_big") \ + EM(smb_eio_trace_rx_check_rsp, "rx_check_rsp") \ + EM(smb_eio_trace_rx_copy_to_iter, "rx_copy_to_iter") \ + EM(smb_eio_trace_rx_insuff_res, "rx_insuff_res") \ + EM(smb_eio_trace_rx_inv_bcc, "rx_inv_bcc") \ + EM(smb_eio_trace_rx_mid_unready, "rx_mid_unready") \ + EM(smb_eio_trace_rx_neg_sess_resp, "rx_neg_sess_resp") \ + EM(smb_eio_trace_rx_overlong, "rx_overlong") \ + EM(smb_eio_trace_rx_overpage, "rx_overpage") \ + EM(smb_eio_trace_rx_pos_sess_resp, "rx_pos_sess_resp") \ + EM(smb_eio_trace_rx_rfc1002_magic, "rx_rfc1002_magic") \ + EM(smb_eio_trace_rx_sync_mid_invalid, "rx_sync_mid_invalid") \ + EM(smb_eio_trace_rx_sync_mid_malformed, "rx_sync_mid_malformed") \ + EM(smb_eio_trace_rx_too_short, "rx_too_short") \ + EM(smb_eio_trace_rx_trans2_extract, "rx_trans2_extract") \ + EM(smb_eio_trace_rx_unknown_resp, "rx_unknown_resp") \ + EM(smb_eio_trace_rx_unspec_error, "rx_unspec_error") \ + EM(smb_eio_trace_sess_buf_off, "sess_buf_off") \ + EM(smb_eio_trace_sess_exiting, "sess_exiting") \ + EM(smb_eio_trace_sess_krb_wcc, "sess_krb_wcc") \ + EM(smb_eio_trace_sess_nl2_wcc, "sess_nl2_wcc") \ + EM(smb_eio_trace_sess_rawnl_auth_wcc, "sess_rawnl_auth_wcc") \ + EM(smb_eio_trace_sess_rawnl_neg_wcc, "sess_rawnl_neg_wcc") \ + EM(smb_eio_trace_short_symlink_write, "short_symlink_write") \ + EM(smb_eio_trace_sid_too_many_auth, "sid_too_many_auth") \ + EM(smb_eio_trace_sig_data_too_small, "sig_data_too_small") \ + EM(smb_eio_trace_sig_iter, "sig_iter") \ + EM(smb_eio_trace_smb1_received_error, "smb1_received_error") \ + EM(smb_eio_trace_smb2_received_error, "smb2_received_error") \ + EM(smb_eio_trace_sym_slash, "sym_slash") \ + EM(smb_eio_trace_sym_target_len, "sym_target_len") \ + EM(smb_eio_trace_symlink_file_size, "symlink_file_size") \ + EM(smb_eio_trace_tdis_in_reconnect, "tdis_in_reconnect") \ + EM(smb_eio_trace_tx_chained_async, "tx_chained_async") \ + EM(smb_eio_trace_tx_compress_failed, "tx_compress_failed") \ + EM(smb_eio_trace_tx_copy_iter_to_buf, "tx_copy_iter_to_buf") \ + EM(smb_eio_trace_tx_copy_to_buf, "tx_copy_to_buf") \ + EM(smb_eio_trace_tx_max_compound, "tx_max_compound") \ + EM(smb_eio_trace_tx_miscopy_to_buf, "tx_miscopy_to_buf") \ + EM(smb_eio_trace_tx_need_transform, "tx_need_transform") \ + EM(smb_eio_trace_tx_too_long, "sr_too_long") \ + EM(smb_eio_trace_unixqfileinfo_bcc_too_small, "unixqfileinfo_bcc_too_small") \ + EM(smb_eio_trace_unixqpathinfo_bcc_too_small, "unixqpathinfo_bcc_too_small") \ + EM(smb_eio_trace_user_iter, "user_iter") \ + EM(smb_eio_trace_write_bad_buf_type, "write_bad_buf_type") \ + EM(smb_eio_trace_write_mid_state_unknown, "write_mid_state_unknown") \ + EM(smb_eio_trace_write_rsp_malformed, "write_rsp_malformed") \ + E_(smb_eio_trace_write_too_far, "write_too_far") + #define smb3_rw_credits_traces \ EM(cifs_trace_rw_credits_call_readv_adjust, "rd-call-adj") \ EM(cifs_trace_rw_credits_call_writev_adjust, "wr-call-adj") \ @@ -79,6 +209,7 @@ #define EM(a, b) a, #define E_(a, b) a +enum smb_eio_trace { smb_eio_traces } __mode(byte); enum smb3_rw_credits_trace { smb3_rw_credits_traces } __mode(byte); enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); @@ -92,6 +223,7 @@ enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +smb_eio_traces; smb3_rw_credits_traces; smb3_tcon_ref_traces; @@ -1560,6 +1692,49 @@ DEFINE_SMB3_CREDIT_EVENT(waitff_credits); DEFINE_SMB3_CREDIT_EVENT(overflow_credits); DEFINE_SMB3_CREDIT_EVENT(set_credits); +TRACE_EVENT(smb3_kerberos_auth, + TP_PROTO(struct TCP_Server_Info *server, + struct cifs_ses *ses, + int rc), + TP_ARGS(server, ses, rc), + TP_STRUCT__entry( + __field(pid_t, pid) + __field(uid_t, uid) + __field(uid_t, cruid) + __string(host, server->hostname) + __string(user, ses->user_name) + __array(__u8, addr, sizeof(struct sockaddr_storage)) + __array(char, sec, sizeof("ntlmsspi")) + __array(char, upcall_target, sizeof("mount")) + __field(int, rc) + ), + TP_fast_assign( + __entry->pid = current->pid; + __entry->uid = from_kuid_munged(&init_user_ns, ses->linux_uid); + __entry->cruid = from_kuid_munged(&init_user_ns, ses->cred_uid); + __assign_str(host); + __assign_str(user); + memcpy(__entry->addr, &server->dstaddr, sizeof(__entry->addr)); + + if (server->sec_kerberos) + memcpy(__entry->sec, "krb5", sizeof("krb5")); + else if (server->sec_mskerberos) + memcpy(__entry->sec, "mskrb5", sizeof("mskrb5")); + else if (server->sec_iakerb) + memcpy(__entry->sec, "iakerb", sizeof("iakerb")); + else + memcpy(__entry->sec, "krb5", sizeof("krb5")); + + if (ses->upcall_target == UPTARGET_MOUNT) + memcpy(__entry->upcall_target, "mount", sizeof("mount")); + else + memcpy(__entry->upcall_target, "app", sizeof("app")); + __entry->rc = rc; + ), + TP_printk("vers=%d host=%s ip=%pISpsfc sec=%s uid=%d cruid=%d user=%s pid=%d upcall_target=%s err=%d", + CIFS_SPNEGO_UPCALL_VERSION, __get_str(host), __entry->addr, + __entry->sec, __entry->uid, __entry->cruid, __get_str(user), + __entry->pid, __entry->upcall_target, __entry->rc)) TRACE_EVENT(smb3_tcon_ref, TP_PROTO(unsigned int tcon_debug_id, int ref, @@ -1616,6 +1791,23 @@ TRACE_EVENT(smb3_rw_credits, __entry->server_credits, __entry->in_flight) ); +TRACE_EVENT(smb3_eio, + TP_PROTO(enum smb_eio_trace trace, unsigned long info, unsigned long info2), + TP_ARGS(trace, info, info2), + TP_STRUCT__entry( + __field(enum smb_eio_trace, trace) + __field(unsigned long, info) + __field(unsigned long, info2) + ), + TP_fast_assign( + __entry->trace = trace; + __entry->info = info; + __entry->info2 = info2; + ), + TP_printk("%s info=%lx,%lx", + __print_symbolic(__entry->trace, smb_eio_traces), + __entry->info, __entry->info2) + ); #undef EM #undef E_ diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 915cedde5d66..3b34c3f4da2d 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -32,24 +32,21 @@ #include "compress.h" void -cifs_wake_up_task(struct mid_q_entry *mid) +cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid) { if (mid->mid_state == MID_RESPONSE_RECEIVED) mid->mid_state = MID_RESPONSE_READY; wake_up_process(mid->callback_data); } -void __release_mid(struct kref *refcount) +void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry) { - struct mid_q_entry *midEntry = - container_of(refcount, struct mid_q_entry, refcount); #ifdef CONFIG_CIFS_STATS2 - __le16 command = midEntry->server->vals->lock_cmd; + __le16 command = server->vals->lock_cmd; __u16 smb_cmd = le16_to_cpu(midEntry->command); unsigned long now; unsigned long roundtrip_time; #endif - struct TCP_Server_Info *server = midEntry->server; if (midEntry->resp_buf && (midEntry->wait_cancelled) && (midEntry->mid_state == MID_RESPONSE_RECEIVED || @@ -116,20 +113,21 @@ void __release_mid(struct kref *refcount) #endif put_task_struct(midEntry->creator); - mempool_free(midEntry, cifs_mid_poolp); + mempool_free(midEntry, &cifs_mid_pool); } void -delete_mid(struct mid_q_entry *mid) +delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid) { - spin_lock(&mid->server->mid_queue_lock); - if (mid->deleted_from_q == false) { + spin_lock(&server->mid_queue_lock); + + if (!mid->deleted_from_q) { list_del_init(&mid->qhead); mid->deleted_from_q = true; } - spin_unlock(&mid->server->mid_queue_lock); + spin_unlock(&server->mid_queue_lock); - release_mid(mid); + release_mid(server, mid); } /* @@ -289,8 +287,8 @@ int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, sigfillset(&mask); sigprocmask(SIG_BLOCK, &mask, &oldmask); - /* Generate a rfc1002 marker for SMB2+ */ - if (!is_smb1(server)) { + /* Generate a rfc1002 marker */ + { struct kvec hiov = { .iov_base = &rfc1002_marker, .iov_len = 4 @@ -404,11 +402,11 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, return __smb_send_rqst(server, num_rqst, rqst); if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) - return -EIO; + return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst); if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); - return -EIO; + return smb_EIO(smb_eio_trace_tx_need_transform); } new_rqst[0].rq_iov = &iov; @@ -640,14 +638,18 @@ cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, return 0; } -int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) +int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid) { + unsigned int sleep_state = TASK_KILLABLE; int error; + if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT) + sleep_state = TASK_INTERRUPTIBLE; + error = wait_event_state(server->response_q, - midQ->mid_state != MID_REQUEST_SUBMITTED && - midQ->mid_state != MID_RESPONSE_RECEIVED, - (TASK_KILLABLE|TASK_FREEZABLE_UNSAFE)); + mid->mid_state != MID_REQUEST_SUBMITTED && + mid->mid_state != MID_RESPONSE_RECEIVED, + (sleep_state | TASK_FREEZABLE_UNSAFE)); if (error < 0) return -ERESTARTSYS; @@ -660,8 +662,8 @@ int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) */ int cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, - mid_receive_t *receive, mid_callback_t *callback, - mid_handle_t *handle, void *cbdata, const int flags, + mid_receive_t receive, mid_callback_t callback, + mid_handle_t handle, void *cbdata, const int flags, const struct cifs_credits *exist_credits) { int rc; @@ -701,6 +703,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, return PTR_ERR(mid); } + mid->sr_flags = flags; mid->receive = receive; mid->callback = callback; mid->callback_data = cbdata; @@ -722,7 +725,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, if (rc < 0) { revert_current_mid(server, mid->credits); server->sequence_number -= 2; - delete_mid(mid); + delete_mid(server, mid); } cifs_server_unlock(server); @@ -750,7 +753,7 @@ int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server rc = -EAGAIN; break; case MID_RESPONSE_MALFORMED: - rc = -EIO; + rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed); break; case MID_SHUTDOWN: rc = -EHOSTDOWN; @@ -766,20 +769,19 @@ int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server spin_unlock(&server->mid_queue_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); - rc = -EIO; + rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state); goto sync_mid_done; } spin_unlock(&server->mid_queue_lock); sync_mid_done: - release_mid(mid); + release_mid(server, mid); return rc; } static void -cifs_compound_callback(struct mid_q_entry *mid) +cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { - struct TCP_Server_Info *server = mid->server; struct cifs_credits credits = { .value = server->ops->get_credits(mid), .instance = server->reconnect_instance, @@ -792,17 +794,17 @@ cifs_compound_callback(struct mid_q_entry *mid) } static void -cifs_compound_last_callback(struct mid_q_entry *mid) +cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { - cifs_compound_callback(mid); - cifs_wake_up_task(mid); + cifs_compound_callback(server, mid); + cifs_wake_up_task(server, mid); } static void -cifs_cancelled_callback(struct mid_q_entry *mid) +cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) { - cifs_compound_callback(mid); - release_mid(mid); + cifs_compound_callback(server, mid); + release_mid(server, mid); } /* @@ -866,7 +868,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, int *resp_buf_type, struct kvec *resp_iov) { int i, j, optype, rc = 0; - struct mid_q_entry *midQ[MAX_COMPOUND]; + struct mid_q_entry *mid[MAX_COMPOUND]; bool cancelled_mid[MAX_COMPOUND] = {false}; struct cifs_credits credits[MAX_COMPOUND] = { { .value = 0, .instance = 0 } @@ -881,7 +883,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, if (!ses || !ses->server || !server) { cifs_dbg(VFS, "Null session\n"); - return -EIO; + return smb_EIO(smb_eio_trace_null_pointers); } spin_lock(&server->srv_lock); @@ -932,35 +934,36 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, } for (i = 0; i < num_rqst; i++) { - midQ[i] = server->ops->setup_request(ses, server, &rqst[i]); - if (IS_ERR(midQ[i])) { + mid[i] = server->ops->setup_request(ses, server, &rqst[i]); + if (IS_ERR(mid[i])) { revert_current_mid(server, i); for (j = 0; j < i; j++) - delete_mid(midQ[j]); + delete_mid(server, mid[j]); cifs_server_unlock(server); /* Update # of requests on wire to server */ for (j = 0; j < num_rqst; j++) add_credits(server, &credits[j], optype); - return PTR_ERR(midQ[i]); + return PTR_ERR(mid[i]); } - midQ[i]->mid_state = MID_REQUEST_SUBMITTED; - midQ[i]->optype = optype; + mid[i]->sr_flags = flags; + mid[i]->mid_state = MID_REQUEST_SUBMITTED; + mid[i]->optype = optype; /* * Invoke callback for every part of the compound chain * to calculate credits properly. Wake up this thread only when * the last element is received. */ if (i < num_rqst - 1) - midQ[i]->callback = cifs_compound_callback; + mid[i]->callback = cifs_compound_callback; else - midQ[i]->callback = cifs_compound_last_callback; + mid[i]->callback = cifs_compound_last_callback; } rc = smb_send_rqst(server, num_rqst, rqst, flags); for (i = 0; i < num_rqst; i++) - cifs_save_when_sent(midQ[i]); + cifs_save_when_sent(mid[i]); if (rc < 0) { revert_current_mid(server, num_rqst); @@ -994,6 +997,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { spin_unlock(&ses->ses_lock); + if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov)) + return -EINVAL; + cifs_server_lock(server); smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec); cifs_server_unlock(server); @@ -1003,23 +1009,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, spin_unlock(&ses->ses_lock); for (i = 0; i < num_rqst; i++) { - rc = wait_for_response(server, midQ[i]); + rc = wait_for_response(server, mid[i]); if (rc != 0) break; } if (rc != 0) { for (; i < num_rqst; i++) { cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", - midQ[i]->mid, le16_to_cpu(midQ[i]->command)); - send_cancel(server, &rqst[i], midQ[i]); - spin_lock(&midQ[i]->mid_lock); - midQ[i]->wait_cancelled = true; - if (midQ[i]->callback) { - midQ[i]->callback = cifs_cancelled_callback; + mid[i]->mid, le16_to_cpu(mid[i]->command)); + send_cancel(ses, server, &rqst[i], mid[i], xid); + spin_lock(&mid[i]->mid_lock); + mid[i]->wait_cancelled = true; + if (mid[i]->mid_state == MID_REQUEST_SUBMITTED || + mid[i]->mid_state == MID_RESPONSE_RECEIVED) { + mid[i]->callback = cifs_cancelled_callback; cancelled_mid[i] = true; credits[i].value = 0; } - spin_unlock(&midQ[i]->mid_lock); + spin_unlock(&mid[i]->mid_lock); } } @@ -1027,37 +1034,37 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, if (rc < 0) goto out; - rc = cifs_sync_mid_result(midQ[i], server); + rc = cifs_sync_mid_result(mid[i], server); if (rc != 0) { /* mark this mid as cancelled to not free it below */ cancelled_mid[i] = true; goto out; } - if (!midQ[i]->resp_buf || - midQ[i]->mid_state != MID_RESPONSE_READY) { - rc = -EIO; + if (!mid[i]->resp_buf || + mid[i]->mid_state != MID_RESPONSE_READY) { + rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state); cifs_dbg(FYI, "Bad MID state?\n"); goto out; } - buf = (char *)midQ[i]->resp_buf; - resp_iov[i].iov_base = buf; - resp_iov[i].iov_len = midQ[i]->resp_buf_size + - HEADER_PREAMBLE_SIZE(server); - - if (midQ[i]->large_buf) - resp_buf_type[i] = CIFS_LARGE_BUFFER; - else - resp_buf_type[i] = CIFS_SMALL_BUFFER; + rc = server->ops->check_receive(mid[i], server, + flags & CIFS_LOG_ERROR); - rc = server->ops->check_receive(midQ[i], server, - flags & CIFS_LOG_ERROR); + if (resp_iov) { + buf = (char *)mid[i]->resp_buf; + resp_iov[i].iov_base = buf; + resp_iov[i].iov_len = mid[i]->resp_buf_size; - /* mark it so buf will not be freed by delete_mid */ - if ((flags & CIFS_NO_RSP_BUF) == 0) - midQ[i]->resp_buf = NULL; + if (mid[i]->large_buf) + resp_buf_type[i] = CIFS_LARGE_BUFFER; + else + resp_buf_type[i] = CIFS_SMALL_BUFFER; + /* mark it so buf will not be freed by delete_mid */ + if ((flags & CIFS_NO_RSP_BUF) == 0) + mid[i]->resp_buf = NULL; + } } /* @@ -1086,7 +1093,7 @@ out: */ for (i = 0; i < num_rqst; i++) { if (!cancelled_mid[i]) - delete_mid(midQ[i]); + delete_mid(server, mid[i]); } return rc; @@ -1111,8 +1118,7 @@ int cifs_discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = server->pdu_size; - size_t remaining = rfclen + HEADER_PREAMBLE_SIZE(server) - - server->total_read; + size_t remaining = rfclen - server->total_read; while (remaining > 0) { ssize_t length; @@ -1136,7 +1142,7 @@ __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid, int length; length = cifs_discard_remaining_data(server); - dequeue_mid(mid, malformed); + dequeue_mid(server, mid, malformed); mid->resp_buf = server->smallbuf; server->smallbuf = NULL; return length; @@ -1157,7 +1163,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) unsigned int data_offset, data_len; struct cifs_io_subrequest *rdata = mid->callback_data; char *buf = server->smallbuf; - unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server); + unsigned int buflen = server->pdu_size; bool use_rdma_mr = false; cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n", @@ -1191,14 +1197,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* set up first two iov for signature check and to get credits */ rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = HEADER_PREAMBLE_SIZE(server); - rdata->iov[1].iov_base = buf + HEADER_PREAMBLE_SIZE(server); - rdata->iov[1].iov_len = - server->total_read - HEADER_PREAMBLE_SIZE(server); + rdata->iov[0].iov_len = server->total_read; cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", rdata->iov[0].iov_base, rdata->iov[0].iov_len); - cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", - rdata->iov[1].iov_base, rdata->iov[1].iov_len); /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); @@ -1214,12 +1215,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n", __func__, server->total_read, server->vals->read_rsp_size); - rdata->result = -EIO; + rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short, + server->total_read, server->vals->read_rsp_size); return cifs_readv_discard(server, mid); } - data_offset = server->ops->read_data_offset(buf) + - HEADER_PREAMBLE_SIZE(server); + data_offset = server->ops->read_data_offset(buf); if (data_offset < server->total_read) { /* * win2k8 sometimes sends an offset of 0 when the read @@ -1233,7 +1234,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* data_offset is beyond the end of smallbuf */ cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n", __func__, data_offset); - rdata->result = -EIO; + rdata->result = smb_EIO1(smb_eio_trace_read_overlarge, + data_offset); return cifs_readv_discard(server, mid); } @@ -1248,6 +1250,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) if (length < 0) return length; server->total_read += length; + rdata->iov[0].iov_len = server->total_read; } /* how much data is in the response? */ @@ -1257,7 +1260,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) data_len = server->ops->read_data_length(buf, use_rdma_mr); if (!use_rdma_mr && (data_offset + data_len > buflen)) { /* data_len is corrupt -- discard frame */ - rdata->result = -EIO; + rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed, + data_offset + data_len, buflen); return cifs_readv_discard(server, mid); } @@ -1279,7 +1283,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) if (server->total_read < buflen) return cifs_readv_discard(server, mid); - dequeue_mid(mid, false); + dequeue_mid(server, mid, false); mid->resp_buf = server->smallbuf; server->smallbuf = NULL; return length; diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index 029910d56c22..6bc89c59164a 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -397,7 +397,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) void *page; if (unlikely(cifs_forced_shutdown(cifs_sb))) - return -EIO; + return smb_EIO(smb_eio_trace_forced_shutdown); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) return -EOPNOTSUPP; diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index f38c5739a9d2..945a8e0cf36c 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -2016,9 +2016,6 @@ struct smb2_lease_ack { * MS-SMB 2.2.3.1 */ struct smb_hdr { - __be32 smb_buf_length; /* BB length is only two (rarely three) bytes, - with one or two byte "type" preceding it that will be - zero - we could mask the type byte off */ __u8 Protocol[4]; __u8 Command; union { diff --git a/fs/smb/common/smb2status.h b/fs/smb/common/smb2status.h index 14b4a5f04564..7d6b8ed304fc 100644 --- a/fs/smb/common/smb2status.h +++ b/fs/smb/common/smb2status.h @@ -631,6 +631,7 @@ struct ntstatus { #define STATUS_DOMAIN_TRUST_INCONSISTENT cpu_to_le32(0xC000019B) #define STATUS_FS_DRIVER_REQUIRED cpu_to_le32(0xC000019C) #define STATUS_IMAGE_ALREADY_LOADED_AS_DLL cpu_to_le32(0xC000019D) +#define STATUS_INVALID_LOCK_RANGE cpu_to_le32(0xC00001A1) #define STATUS_NETWORK_OPEN_RESTRICTION cpu_to_le32(0xC0000201) #define STATUS_NO_USER_SESSION_KEY cpu_to_le32(0xC0000202) #define STATUS_USER_SESSION_DELETED cpu_to_le32(0xC0000203) @@ -1773,5 +1774,5 @@ struct ntstatus { #define STATUS_IPSEC_INVALID_PACKET cpu_to_le32(0xC0360005) #define STATUS_IPSEC_INTEGRITY_CHECK_FAILED cpu_to_le32(0xC0360006) #define STATUS_IPSEC_CLEAR_TEXT_DROP cpu_to_le32(0xC0360007) -#define STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP cpu_to_le32(0xC05D0000) -#define STATUS_INVALID_LOCK_RANGE cpu_to_le32(0xC00001a1) +/* See MS-SMB2 3.3.5.4 */ +#define STATUS_SMB_NO_PREAUTH_INTEGRITY_HASH_OVERLAP cpu_to_le32(0xC05D0000) diff --git a/fs/smb/common/smbglob.h b/fs/smb/common/smbglob.h index 7853b5771128..9562845a5617 100644 --- a/fs/smb/common/smbglob.h +++ b/fs/smb/common/smbglob.h @@ -26,7 +26,6 @@ struct smb_version_values { __u32 exclusive_lock_type; __u32 shared_lock_type; __u32 unlock_lock_type; - size_t header_preamble_size; size_t header_size; size_t max_header_size; size_t read_rsp_size; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 7ea6144c692c..f3184b217575 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -896,7 +896,7 @@ static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn, return STATUS_INVALID_PARAMETER; if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512) - return STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP; + return STATUS_SMB_NO_PREAUTH_INTEGRITY_HASH_OVERLAP; conn->preauth_info->Preauth_HashId = SMB2_PREAUTH_INTEGRITY_SHA512; return STATUS_SUCCESS; diff --git a/fs/super.c b/fs/super.c index 7c66b96b59be..3d85265d1400 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1189,7 +1189,7 @@ static void filesystems_freeze_callback(struct super_block *sb, void *freeze_all if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super) return; - if (freeze_all_ptr && !(sb->s_type->fs_flags & FS_POWER_FREEZE)) + if (!freeze_all_ptr && !(sb->s_type->fs_flags & FS_POWER_FREEZE)) return; if (!get_active_super(sb)) @@ -1292,14 +1292,6 @@ void kill_anon_super(struct super_block *sb) } EXPORT_SYMBOL(kill_anon_super); -void kill_litter_super(struct super_block *sb) -{ - if (sb->s_root) - d_genocide(sb->s_root); - kill_anon_super(sb); -} -EXPORT_SYMBOL(kill_litter_super); - int set_anon_super_fc(struct super_block *sb, struct fs_context *fc) { return set_anon_super(sb, NULL); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index e142bac4f9f8..e1e639f515a0 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -36,6 +36,9 @@ static umode_t __first_visible(const struct attribute_group *grp, struct kobject if (grp->attrs && grp->attrs[0] && grp->is_visible) return grp->is_visible(kobj, grp->attrs[0], 0); + if (grp->attrs && grp->attrs[0] && grp->is_visible_const) + return grp->is_visible_const(kobj, grp->attrs[0], 0); + if (grp->bin_attrs && grp->bin_attrs[0] && grp->is_bin_visible) return grp->is_bin_visible(kobj, grp->bin_attrs[0], 0); @@ -61,8 +64,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, */ if (update) kernfs_remove_by_name(parent, (*attr)->name); - if (grp->is_visible) { - mode = grp->is_visible(kobj, *attr, i); + if (grp->is_visible || grp->is_visible_const) { + if (grp->is_visible) + mode = grp->is_visible(kobj, *attr, i); + else + mode = grp->is_visible_const(kobj, *attr, i); mode &= ~SYSFS_GROUP_INVISIBLE; if (!mode) continue; diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 8705c77a9e75..61cbdafa2411 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -757,7 +757,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry const struct eventfs_entry *entries, int size, void *data) { - struct dentry *dentry = tracefs_start_creating(name, parent); + struct dentry *dentry; struct eventfs_root_inode *rei; struct eventfs_inode *ei; struct tracefs_inode *ti; @@ -768,6 +768,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry if (security_locked_down(LOCKDOWN_TRACEFS)) return NULL; + dentry = tracefs_start_creating(name, parent); if (IS_ERR(dentry)) return ERR_CAST(dentry); @@ -822,7 +823,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry * something not worth much. Keeping directory links at 1 * tells userspace not to trust the link number. */ - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); /* The dentry of the "events" parent does keep track though */ inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); @@ -909,5 +910,5 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) * and destroyed dynamically. */ d_invalidate(dentry); - dput(dentry); + d_make_discardable(dentry); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 0c023941a316..d9d8932a7b9c 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -538,7 +538,7 @@ static struct file_system_type trace_fs_type = { .name = "tracefs", .init_fs_context = tracefs_init_fs_context, .parameters = tracefs_param_specs, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("tracefs"); @@ -571,16 +571,15 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent) struct dentry *tracefs_failed_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - dput(dentry); + simple_done_creating(dentry); simple_release_fs(&tracefs_mount, &tracefs_mount_count); return NULL; } struct dentry *tracefs_end_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed } /* Find the inode that this will use for default */ @@ -661,7 +660,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, inode->i_private = data; inode->i_uid = d_inode(dentry->d_parent)->i_uid; inode->i_gid = d_inode(dentry->d_parent)->i_gid; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return tracefs_end_creating(dentry); } @@ -692,7 +691,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return tracefs_end_creating(dentry); diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index a79f229df475..6c6d68242779 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -327,8 +327,6 @@ out: */ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) { - uint32_t crc; - ubifs_assert(c, pad >= 0); if (pad >= UBIFS_PAD_NODE_SZ) { @@ -343,8 +341,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); pad -= UBIFS_PAD_NODE_SZ; pad_node->pad_len = cpu_to_le32(pad); - crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); - ch->crc = cpu_to_le32(crc); + ubifs_crc_node(buf, UBIFS_PAD_NODE_SZ); memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); } else if (pad > 0) /* Too little space, padding node won't fit */ @@ -395,7 +392,7 @@ void ubifs_init_node(struct ubifs_info *c, void *node, int len, int pad) } } -void ubifs_crc_node(struct ubifs_info *c, void *node, int len) +void ubifs_crc_node(void *node, int len) { struct ubifs_ch *ch = node; uint32_t crc; @@ -432,7 +429,7 @@ int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len, return err; } - ubifs_crc_node(c, node, len); + ubifs_crc_node(node, len); return 0; } @@ -469,7 +466,6 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) */ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) { - uint32_t crc; struct ubifs_ch *ch = node; unsigned long long sqnum = next_sqnum(c); @@ -483,8 +479,7 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) ch->group_type = UBIFS_IN_NODE_GROUP; ch->sqnum = cpu_to_le64(sqnum); ch->padding[0] = ch->padding[1] = 0; - crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); - ch->crc = cpu_to_le32(crc); + ubifs_crc_node(node, len); } /** diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 441d0beca4cf..dde0aa3287f4 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -628,8 +628,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); buf = vmalloc(c->leb_size); - ltab = vmalloc(array_size(sizeof(struct ubifs_lpt_lprops), - c->lpt_lebs)); + ltab = vmalloc_array(c->lpt_lebs, + sizeof(struct ubifs_lpt_lprops)); if (!pnode || !nnode || !buf || !ltab || !lsave) { err = -ENOMEM; goto out; @@ -1777,8 +1777,8 @@ static int lpt_init_rd(struct ubifs_info *c) { int err, i; - c->ltab = vmalloc(array_size(sizeof(struct ubifs_lpt_lprops), - c->lpt_lebs)); + c->ltab = vmalloc_array(c->lpt_lebs, + sizeof(struct ubifs_lpt_lprops)); if (!c->ltab) return -ENOMEM; @@ -1846,8 +1846,8 @@ static int lpt_init_wr(struct ubifs_info *c) { int err, i; - c->ltab_cmt = vmalloc(array_size(sizeof(struct ubifs_lpt_lprops), - c->lpt_lebs)); + c->ltab_cmt = vmalloc_array(c->lpt_lebs, + sizeof(struct ubifs_lpt_lprops)); if (!c->ltab_cmt) return -ENOMEM; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index f0d51dd21c9e..b36dc9b032f4 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1406,7 +1406,6 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) union ubifs_key key; int err, lnum, offs, len; loff_t i_size; - uint32_t crc; /* Locate the inode node LEB number and offset */ ino_key_init(c, &key, e->inum); @@ -1428,8 +1427,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) ino = c->sbuf + offs; ino->size = cpu_to_le64(e->d_size); len = le32_to_cpu(ino->ch.len); - crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); - ino->ch.crc = cpu_to_le32(crc); + ubifs_crc_node((void *)ino, len); /* Work out where data in the LEB ends and free space begins */ p = c->sbuf; len = c->leb_size - 1; diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index d3f8a6aa1f49..10b222dc6a53 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -321,7 +321,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, c->fanout, znode->child_cnt); ubifs_err(c, "max levels %d, znode level %d", UBIFS_MAX_LEVELS, znode->level); - err = 1; goto out_dump; } @@ -342,7 +341,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, zbr->lnum >= c->leb_cnt || zbr->offs < 0 || zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { ubifs_err(c, "bad branch %d", i); - err = 2; goto out_dump; } @@ -355,7 +353,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, default: ubifs_err(c, "bad key type at slot %d: %d", i, key_type(c, &zbr->key)); - err = 3; goto out_dump; } @@ -368,7 +365,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, ubifs_err(c, "bad target node (type %d) length (%d)", type, zbr->len); ubifs_err(c, "have to be %d", c->ranges[type].len); - err = 4; goto out_dump; } } else if (zbr->len < c->ranges[type].min_len || @@ -378,7 +374,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, ubifs_err(c, "have to be in range of %d-%d", c->ranges[type].min_len, c->ranges[type].max_len); - err = 5; goto out_dump; } } @@ -396,13 +391,11 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, cmp = keys_cmp(c, key1, key2); if (cmp > 0) { ubifs_err(c, "bad key order (keys %d and %d)", i, i + 1); - err = 6; goto out_dump; } else if (cmp == 0 && !is_hash_key(c, key1)) { /* These can only be keys with colliding hash */ ubifs_err(c, "keys %d and %d are not hashed but equivalent", i, i + 1); - err = 7; goto out_dump; } } @@ -411,7 +404,7 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, return 0; out_dump: - ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err); + ubifs_err(c, "bad indexing node at LEB %d:%d", lnum, offs); ubifs_dump_node(c, idx, c->max_idx_node_sz); kfree(idx); return -EINVAL; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 49e50431741c..118392aa9f2a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1747,7 +1747,7 @@ int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, int lnum, int offs, int quiet, int must_chk_crc); void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad); -void ubifs_crc_node(struct ubifs_info *c, void *buf, int len); +void ubifs_crc_node(void *buf, int len); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len, int hmac_offs, int pad); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index e6e74b384087..c5ba1f4487bd 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -29,7 +29,7 @@ #include <linux/ioctl.h> #include <linux/security.h> #include <linux/hugetlb.h> -#include <linux/swapops.h> +#include <linux/leafops.h> #include <linux/miscdevice.h> #include <linux/uio.h> @@ -233,40 +233,48 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, { struct vm_area_struct *vma = vmf->vma; pte_t *ptep, pte; - bool ret = true; assert_fault_locked(vmf); ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma)); if (!ptep) - goto out; + return true; - ret = false; pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep); /* * Lockless access: we're in a wait_event so it's ok if it - * changes under us. PTE markers should be handled the same as none - * ptes here. + * changes under us. + */ + + /* Entry is still missing, wait for userspace to resolve the fault. */ + if (huge_pte_none(pte)) + return true; + /* UFFD PTE markers require userspace to resolve the fault. */ + if (pte_is_uffd_marker(pte)) + return true; + /* + * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to + * resolve the fault. */ - if (huge_pte_none_mostly(pte)) - ret = true; if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) - ret = true; -out: - return ret; + return true; + + return false; } #else static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { - return false; /* should never get here */ + /* Should never get here. */ + VM_WARN_ON_ONCE(1); + return false; } #endif /* CONFIG_HUGETLB_PAGE */ /* - * Verify the pagetables are still not ok after having reigstered into + * Verify the pagetables are still not ok after having registered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any * userfault that has already been resolved, if userfaultfd_read_iter and * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different @@ -284,53 +292,63 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, pmd_t *pmd, _pmd; pte_t *pte; pte_t ptent; - bool ret = true; + bool ret; assert_fault_locked(vmf); pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) - goto out; + return true; p4d = p4d_offset(pgd, address); if (!p4d_present(*p4d)) - goto out; + return true; pud = pud_offset(p4d, address); if (!pud_present(*pud)) - goto out; + return true; pmd = pmd_offset(pud, address); again: _pmd = pmdp_get_lockless(pmd); if (pmd_none(_pmd)) - goto out; + return true; - ret = false; + /* + * A race could arise which would result in a softleaf entry such as + * migration entry unexpectedly being present in the PMD, so explicitly + * check for this and bail out if so. + */ if (!pmd_present(_pmd)) - goto out; + return false; - if (pmd_trans_huge(_pmd)) { - if (!pmd_write(_pmd) && (reason & VM_UFFD_WP)) - ret = true; - goto out; - } + if (pmd_trans_huge(_pmd)) + return !pmd_write(_pmd) && (reason & VM_UFFD_WP); pte = pte_offset_map(pmd, address); - if (!pte) { - ret = true; + if (!pte) goto again; - } + /* * Lockless access: we're in a wait_event so it's ok if it - * changes under us. PTE markers should be handled the same as none - * ptes here. + * changes under us. */ ptent = ptep_get(pte); - if (pte_none_mostly(ptent)) - ret = true; + + ret = true; + /* Entry is still missing, wait for userspace to resolve the fault. */ + if (pte_none(ptent)) + goto out; + /* UFFD PTE markers require userspace to resolve the fault. */ + if (pte_is_uffd_marker(ptent)) + goto out; + /* + * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to + * resolve the fault. + */ if (!pte_write(ptent) && (reason & VM_UFFD_WP)) - ret = true; - pte_unmap(pte); + goto out; + ret = false; out: + pte_unmap(pte); return ret; } @@ -490,12 +508,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) set_current_state(blocking_state); spin_unlock_irq(&ctx->fault_pending_wqh.lock); - if (!is_vm_hugetlb_page(vma)) - must_wait = userfaultfd_must_wait(ctx, vmf, reason); - else + if (is_vm_hugetlb_page(vma)) { must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason); - if (is_vm_hugetlb_page(vma)) hugetlb_vma_unlock_read(vma); + } else { + must_wait = userfaultfd_must_wait(ctx, vmf, reason); + } + release_fault_lock(vmf); if (likely(must_wait && !READ_ONCE(ctx->released))) { @@ -1270,9 +1289,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING) vm_flags |= VM_UFFD_MISSING; if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) { -#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP - goto out; -#endif + if (!pgtable_supports_uffd_wp()) + goto out; + vm_flags |= VM_UFFD_WP; } if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) { @@ -1980,14 +1999,14 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, uffdio_api.features &= ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM); #endif -#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP - uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; -#endif -#ifndef CONFIG_PTE_MARKER_UFFD_WP - uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; - uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; - uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; -#endif + if (!pgtable_supports_uffd_wp()) + uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; + + if (!uffd_supports_wp_marker()) { + uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; + uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; + uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; + } ret = -EINVAL; if (features & ~uffdio_api.features) |
