diff options
Diffstat (limited to 'fs')
41 files changed, 875 insertions, 683 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index a537368ba0db..fd9f28b8a933 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -332,11 +332,18 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac) */ int afs_end_cursor(struct afs_addr_cursor *ac) { - if (ac->responded && ac->index != ac->start) - WRITE_ONCE(ac->alist->index, ac->index); + struct afs_addr_list *alist; + + alist = ac->alist; + if (alist) { + if (ac->responded && ac->index != ac->start) + WRITE_ONCE(alist->index, ac->index); + afs_put_addrlist(alist); + } - afs_put_addrlist(ac->alist); + ac->addr = NULL; ac->alist = NULL; + ac->begun = false; return ac->error; } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 23c7f395d718..ba2b458b36d1 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -17,10 +17,13 @@ #include <linux/pagemap.h> #include <linux/ctype.h> #include <linux/sched.h> +#include <linux/dns_resolver.h> #include "internal.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); +static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); @@ -64,6 +67,17 @@ const struct inode_operations afs_dir_inode_operations = { .listxattr = afs_listxattr, }; +const struct file_operations afs_dynroot_file_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .iterate_shared = dcache_readdir, + .llseek = dcache_dir_lseek, +}; + +const struct inode_operations afs_dynroot_inode_operations = { + .lookup = afs_dynroot_lookup, +}; + const struct dentry_operations afs_fs_dentry_operations = { .d_revalidate = afs_d_revalidate, .d_delete = afs_d_delete, @@ -468,25 +482,58 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, } /* + * Probe to see if a cell may exist. This prevents positive dentries from + * being created unnecessarily. + */ +static int afs_probe_cell_name(struct dentry *dentry) +{ + struct afs_cell *cell; + const char *name = dentry->d_name.name; + size_t len = dentry->d_name.len; + int ret; + + /* Names prefixed with a dot are R/W mounts. */ + if (name[0] == '.') { + if (len == 1) + return -EINVAL; + name++; + len--; + } + + cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + if (!IS_ERR(cell)) { + afs_put_cell(afs_d2net(dentry), cell); + return 0; + } + + ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); + if (ret == -ENODATA) + ret = -EDESTADDRREQ; + return ret; +} + +/* * Try to auto mount the mountpoint with pseudo directory, if the autocell * operation is setted. */ -static struct inode *afs_try_auto_mntpt( - int ret, struct dentry *dentry, struct inode *dir, struct key *key, - struct afs_fid *fid) +static struct inode *afs_try_auto_mntpt(struct dentry *dentry, + struct inode *dir, struct afs_fid *fid) { - const char *devname = dentry->d_name.name; struct afs_vnode *vnode = AFS_FS_I(dir); struct inode *inode; + int ret = -ENOENT; - _enter("%d, %p{%pd}, {%x:%u}, %p", - ret, dentry, dentry, vnode->fid.vid, vnode->fid.vnode, key); + _enter("%p{%pd}, {%x:%u}", + dentry, dentry, vnode->fid.vid, vnode->fid.vnode); + + if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + goto out; - if (ret != -ENOENT || - !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + ret = afs_probe_cell_name(dentry); + if (ret < 0) goto out; - inode = afs_iget_autocell(dir, devname, strlen(devname), key); + inode = afs_iget_pseudo_dir(dir->i_sb, false); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto out; @@ -545,13 +592,16 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, ret = afs_do_lookup(dir, dentry, &fid, key); if (ret < 0) { - inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid); - if (!IS_ERR(inode)) { - key_put(key); - goto success; + if (ret == -ENOENT) { + inode = afs_try_auto_mntpt(dentry, dir, &fid); + if (!IS_ERR(inode)) { + key_put(key); + goto success; + } + + ret = PTR_ERR(inode); } - ret = PTR_ERR(inode); key_put(key); if (ret == -ENOENT) { d_add(dentry, NULL); @@ -583,12 +633,53 @@ success: } /* + * Look up an entry in a dynroot directory. + */ +static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct afs_vnode *vnode; + struct afs_fid fid; + struct inode *inode; + int ret; + + vnode = AFS_FS_I(dir); + + _enter("%pd", dentry); + + ASSERTCMP(d_inode(dentry), ==, NULL); + + if (dentry->d_name.len >= AFSNAMEMAX) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } + + inode = afs_try_auto_mntpt(dentry, dir, &fid); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + if (ret == -ENOENT) { + d_add(dentry, NULL); + _leave(" = NULL [negative]"); + return NULL; + } + _leave(" = %d [do]", ret); + return ERR_PTR(ret); + } + + d_add(dentry, inode); + _leave(" = 0 { ino=%lu v=%u }", + d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); + return NULL; +} + +/* * check that a dentry lookup hit has found a valid entry * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) { + struct afs_super_info *as = dentry->d_sb->s_fs_info; struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); struct dentry *parent; @@ -600,6 +691,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & LOOKUP_RCU) return -ECHILD; + if (as->dyn_root) + return 1; + if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); _enter("{v={%x:%u} n=%pd fl=%lx},", diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c7f17c44c7ce..6b39d0255b72 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -147,7 +147,7 @@ int afs_iget5_test(struct inode *inode, void *opaque) * * These pseudo inodes don't match anything. */ -static int afs_iget5_autocell_test(struct inode *inode, void *opaque) +static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque) { return 0; } @@ -169,31 +169,34 @@ static int afs_iget5_set(struct inode *inode, void *opaque) } /* - * inode retrieval for autocell + * Create an inode for a dynamic root directory or an autocell dynamic + * automount dir. */ -struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, - int namesz, struct key *key) +struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) { struct afs_iget_data data; struct afs_super_info *as; struct afs_vnode *vnode; - struct super_block *sb; struct inode *inode; static atomic_t afs_autocell_ino; - _enter("{%x:%u},%*.*s,", - AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode, - namesz, namesz, dev_name ?: ""); + _enter(""); - sb = dir->i_sb; as = sb->s_fs_info; - data.volume = as->volume; - data.fid.vid = as->volume->vid; - data.fid.unique = 0; - data.fid.vnode = 0; + if (as->volume) { + data.volume = as->volume; + data.fid.vid = as->volume->vid; + } + if (root) { + data.fid.vnode = 1; + data.fid.unique = 1; + } else { + data.fid.vnode = atomic_inc_return(&afs_autocell_ino); + data.fid.unique = 0; + } - inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino), - afs_iget5_autocell_test, afs_iget5_set, + inode = iget5_locked(sb, data.fid.vnode, + afs_iget5_pseudo_dir_test, afs_iget5_set, &data); if (!inode) { _leave(" = -ENOMEM"); @@ -211,7 +214,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, inode->i_size = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - inode->i_op = &afs_autocell_inode_operations; + if (root) { + inode->i_op = &afs_dynroot_inode_operations; + inode->i_fop = &afs_dynroot_file_operations; + } else { + inode->i_op = &afs_autocell_inode_operations; + } set_nlink(inode, 2); inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; @@ -223,8 +231,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, inode->i_generation = 0; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); - set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - inode->i_flags |= S_AUTOMOUNT | S_NOATIME; + if (!root) { + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + inode->i_flags |= S_AUTOMOUNT; + } + + inode->i_flags |= S_NOATIME; unlock_new_inode(inode); _leave(" = %p", inode); return inode; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 804d1f905622..f38d6a561a84 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -36,6 +36,7 @@ struct afs_mount_params { bool rwpath; /* T if the parent should be considered R/W */ bool force; /* T to force cell type */ bool autocell; /* T if set auto mount operation */ + bool dyn_root; /* T if dynamic root */ afs_voltype_t type; /* type of volume requested */ int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ @@ -186,6 +187,7 @@ struct afs_super_info { struct afs_net *net; /* Network namespace */ struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ + bool dyn_root; /* True if dynamic root */ }; static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) @@ -634,10 +636,13 @@ extern bool afs_cm_incoming_call(struct afs_call *); /* * dir.c */ -extern bool afs_dir_check_page(struct inode *, struct page *); +extern const struct file_operations afs_dir_file_operations; extern const struct inode_operations afs_dir_inode_operations; +extern const struct file_operations afs_dynroot_file_operations; +extern const struct inode_operations afs_dynroot_inode_operations; extern const struct dentry_operations afs_fs_dentry_operations; -extern const struct file_operations afs_dir_file_operations; + +extern bool afs_dir_check_page(struct inode *, struct page *); /* * file.c @@ -695,8 +700,7 @@ extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, */ extern int afs_fetch_status(struct afs_vnode *, struct key *); extern int afs_iget5_test(struct inode *, void *); -extern struct inode *afs_iget_autocell(struct inode *, const char *, int, - struct key *); +extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_fid *, struct afs_file_status *, struct afs_callback *, diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 690fea9d84c3..99fd13500a97 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -72,7 +72,7 @@ static int afs_mntpt_open(struct inode *inode, struct file *file) */ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) { - struct afs_super_info *super; + struct afs_super_info *as; struct vfsmount *mnt; struct afs_vnode *vnode; struct page *page; @@ -104,13 +104,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) goto error_no_page; if (mntpt->d_name.name[0] == '.') { - devname[0] = '#'; - memcpy(devname + 1, mntpt->d_name.name, size - 1); + devname[0] = '%'; + memcpy(devname + 1, mntpt->d_name.name + 1, size - 1); memcpy(devname + size, afs_root_cell, sizeof(afs_root_cell)); rwpath = true; } else { - devname[0] = '%'; + devname[0] = '#'; memcpy(devname + 1, mntpt->d_name.name, size); memcpy(devname + size + 1, afs_root_cell, sizeof(afs_root_cell)); @@ -142,11 +142,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) } /* work out what options we want */ - super = AFS_FS_S(mntpt->d_sb); - memcpy(options, "cell=", 5); - strcpy(options + 5, super->volume->cell->name); - if (super->volume->type == AFSVL_RWVOL || rwpath) - strcat(options, ",rwpath"); + as = AFS_FS_S(mntpt->d_sb); + if (as->cell) { + memcpy(options, "cell=", 5); + strcpy(options + 5, as->cell->name); + if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath) + strcat(options, ",rwpath"); + } /* try and do the mount */ _debug("--- attempting mount %s -o %s ---", devname, options); diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index d04511fb3879..ad1328d85526 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -330,26 +330,6 @@ start: if (!afs_start_fs_iteration(fc, vnode)) goto failed; - goto use_server; - -next_server: - _debug("next"); - afs_put_cb_interest(afs_v2net(vnode), fc->cbi); - fc->cbi = NULL; - fc->index++; - if (fc->index >= fc->server_list->nr_servers) - fc->index = 0; - if (fc->index != fc->start) - goto use_server; - - /* That's all the servers poked to no good effect. Try again if some - * of them were busy. - */ - if (fc->flags & AFS_FS_CURSOR_VBUSY) - goto restart_from_beginning; - - fc->ac.error = -EDESTADDRREQ; - goto failed; use_server: _debug("use"); @@ -383,6 +363,7 @@ use_server: afs_get_addrlist(alist); read_unlock(&server->fs_lock); + memset(&fc->ac, 0, sizeof(fc->ac)); /* Probe the current fileserver if we haven't done so yet. */ if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { @@ -397,12 +378,8 @@ use_server: else afs_put_addrlist(alist); - fc->ac.addr = NULL; fc->ac.start = READ_ONCE(alist->index); fc->ac.index = fc->ac.start; - fc->ac.error = 0; - fc->ac.begun = false; - goto iterate_address; iterate_address: ASSERT(fc->ac.alist); @@ -410,16 +387,35 @@ iterate_address: /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ - if (afs_iterate_addresses(&fc->ac)) { - _leave(" = t"); - return true; - } + if (!afs_iterate_addresses(&fc->ac)) + goto next_server; + + _leave(" = t"); + return true; +next_server: + _debug("next"); afs_end_cursor(&fc->ac); - goto next_server; + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + fc->index++; + if (fc->index >= fc->server_list->nr_servers) + fc->index = 0; + if (fc->index != fc->start) + goto use_server; + + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (fc->flags & AFS_FS_CURSOR_VBUSY) + goto restart_from_beginning; + + fc->ac.error = -EDESTADDRREQ; + goto failed; failed: fc->flags |= AFS_FS_CURSOR_STOP; + afs_end_cursor(&fc->ac); _leave(" = f [failed %d]", fc->ac.error); return false; } @@ -458,12 +454,10 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) return false; } + memset(&fc->ac, 0, sizeof(fc->ac)); fc->ac.alist = alist; - fc->ac.addr = NULL; fc->ac.start = READ_ONCE(alist->index); fc->ac.index = fc->ac.start; - fc->ac.error = 0; - fc->ac.begun = false; goto iterate_address; case 0: @@ -520,238 +514,3 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc) return fc->ac.error; } - -#if 0 -/* - * Set a filesystem server cursor for using a specific FS server. - */ -int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - afs_init_fs_cursor(fc, vnode); - - read_seqlock_excl(&vnode->cb_lock); - if (vnode->cb_interest) { - if (vnode->cb_interest->server->fs_state == 0) - fc->server = afs_get_server(vnode->cb_interest->server); - else - fc->ac.error = vnode->cb_interest->server->fs_state; - } else { - fc->ac.error = -ESTALE; - } - read_sequnlock_excl(&vnode->cb_lock); - - return fc->ac.error; -} - -/* - * pick a server to use to try accessing this volume - * - returns with an elevated usage count on the server chosen - */ -bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - struct afs_volume *volume = vnode->volume; - struct afs_server *server; - int ret, state, loop; - - _enter("%s", volume->vlocation->vldb.name); - - /* stick with the server we're already using if we can */ - if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) { - fc->server = afs_get_server(vnode->cb_interest->server); - goto set_server; - } - - down_read(&volume->server_sem); - - /* handle the no-server case */ - if (volume->nservers == 0) { - fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE; - up_read(&volume->server_sem); - _leave(" = f [no servers %d]", fc->ac.error); - return false; - } - - /* basically, just search the list for the first live server and use - * that */ - ret = 0; - for (loop = 0; loop < volume->nservers; loop++) { - server = volume->servers[loop]; - state = server->fs_state; - - _debug("consider %d [%d]", loop, state); - - switch (state) { - case 0: - goto picked_server; - - case -ENETUNREACH: - if (ret == 0) - ret = state; - break; - - case -EHOSTUNREACH: - if (ret == 0 || - ret == -ENETUNREACH) - ret = state; - break; - - case -ECONNREFUSED: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH) - ret = state; - break; - - default: - case -EREMOTEIO: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH || - ret == -ECONNREFUSED) - ret = state; - break; - } - } - -error: - fc->ac.error = ret; - - /* no available servers - * - TODO: handle the no active servers case better - */ - up_read(&volume->server_sem); - _leave(" = f [%d]", fc->ac.error); - return false; - -picked_server: - /* Found an apparently healthy server. We need to register an interest - * in receiving callbacks before we talk to it. - */ - ret = afs_register_server_cb_interest(vnode, - &volume->cb_interests[loop], server); - if (ret < 0) - goto error; - - fc->server = afs_get_server(server); - up_read(&volume->server_sem); -set_server: - fc->ac.alist = afs_get_addrlist(fc->server->addrs); - fc->ac.addr = &fc->ac.alist->addrs[0]; - _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport); - _leave(" = t (picked %pIS)", &fc->ac.addr->transport); - return true; -} - -/* - * release a server after use - * - releases the ref on the server struct that was acquired by picking - * - records result of using a particular server to access a volume - * - return true to try again, false if okay or to issue error - * - the caller must release the server struct if result was false - */ -bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc, - struct afs_vnode *vnode) -{ - struct afs_volume *volume = vnode->volume; - struct afs_server *server = fc->server; - unsigned loop; - - _enter("%s,%pIS,%d", - volume->vlocation->vldb.name, &fc->ac.addr->transport, - fc->ac.error); - - switch (fc->ac.error) { - /* success */ - case 0: - server->fs_state = 0; - _leave(" = f"); - return false; - - /* the fileserver denied all knowledge of the volume */ - case -ENOMEDIUM: - down_write(&volume->server_sem); - - /* firstly, find where the server is in the active list (if it - * is) */ - for (loop = 0; loop < volume->nservers; loop++) - if (volume->servers[loop] == server) - goto present; - - /* no longer there - may have been discarded by another op */ - goto try_next_server_upw; - - present: - volume->nservers--; - memmove(&volume->servers[loop], - &volume->servers[loop + 1], - sizeof(volume->servers[loop]) * - (volume->nservers - loop)); - volume->servers[volume->nservers] = NULL; - afs_put_server(afs_v2net(vnode), server); - volume->rjservers++; - - if (volume->nservers > 0) - /* another server might acknowledge its existence */ - goto try_next_server_upw; - - /* handle the case where all the fileservers have rejected the - * volume - * - TODO: try asking the fileservers for volume information - * - TODO: contact the VL server again to see if the volume is - * no longer registered - */ - up_write(&volume->server_sem); - afs_put_server(afs_v2net(vnode), server); - fc->server = NULL; - _leave(" = f [completely rejected]"); - return false; - - /* problem reaching the server */ - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - case -ETIME: - case -ETIMEDOUT: - case -EREMOTEIO: - /* mark the server as dead - * TODO: vary dead timeout depending on error - */ - spin_lock(&server->fs_lock); - if (!server->fs_state) { - server->fs_state = fc->ac.error; - printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error); - } - spin_unlock(&server->fs_lock); - goto try_next_server; - - /* miscellaneous error */ - default: - case -ENOMEM: - case -ENONET: - /* tell the caller to accept the result */ - afs_put_server(afs_v2net(vnode), server); - fc->server = NULL; - _leave(" = f [local failure]"); - return false; - } - - /* tell the caller to loop around and try the next server */ -try_next_server_upw: - up_write(&volume->server_sem); -try_next_server: - afs_put_server(afs_v2net(vnode), server); - _leave(" = t [try next server]"); - return true; -} - -/* - * Clean up a fileserver cursor. - */ -int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) -{ - afs_end_cursor(&fc->ac); - afs_put_server(net, fc->server); - return fc->ac.error; -} - -#endif diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 0ab3f8457839..0f8dc4c8f07c 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -58,7 +58,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, server = afs_lookup_server(cell, key, &vldb->fs_server[i]); if (IS_ERR(server)) { ret = PTR_ERR(server); - if (ret == -ENOENT) + if (ret == -ENOENT || + ret == -ENOMEDIUM) continue; goto error_2; } diff --git a/fs/afs/super.c b/fs/afs/super.c index 1037dd41a622..3623c952b6ff 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -64,6 +64,7 @@ static atomic_t afs_count_active_inodes; enum { afs_no_opt, afs_opt_cell, + afs_opt_dyn, afs_opt_rwpath, afs_opt_vol, afs_opt_autocell, @@ -71,6 +72,7 @@ enum { static const match_table_t afs_options_list = { { afs_opt_cell, "cell=%s" }, + { afs_opt_dyn, "dyn" }, { afs_opt_rwpath, "rwpath" }, { afs_opt_vol, "vol=%s" }, { afs_opt_autocell, "autocell" }, @@ -148,6 +150,11 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) const char *suf = ""; char pref = '%'; + if (as->dyn_root) { + seq_puts(m, "none"); + return 0; + } + switch (volume->type) { case AFSVL_RWVOL: break; @@ -171,8 +178,12 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) */ static int afs_show_options(struct seq_file *m, struct dentry *root) { + struct afs_super_info *as = AFS_FS_S(root->d_sb); + + if (as->dyn_root) + seq_puts(m, ",dyn"); if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags)) - seq_puts(m, "autocell"); + seq_puts(m, ",autocell"); return 0; } @@ -212,7 +223,7 @@ static int afs_parse_options(struct afs_mount_params *params, break; case afs_opt_rwpath: - params->rwpath = 1; + params->rwpath = true; break; case afs_opt_vol: @@ -220,7 +231,11 @@ static int afs_parse_options(struct afs_mount_params *params, break; case afs_opt_autocell: - params->autocell = 1; + params->autocell = true; + break; + + case afs_opt_dyn: + params->dyn_root = true; break; default: @@ -254,7 +269,7 @@ static int afs_parse_device_name(struct afs_mount_params *params, int cellnamesz; _enter(",%s", name); - + if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; @@ -336,7 +351,14 @@ static int afs_test_super(struct super_block *sb, void *data) struct afs_super_info *as1 = data; struct afs_super_info *as = AFS_FS_S(sb); - return as->net == as1->net && as->volume->vid == as1->volume->vid; + return (as->net == as1->net && + as->volume && + as->volume->vid == as1->volume->vid); +} + +static int afs_dynroot_test_super(struct super_block *sb, void *data) +{ + return false; } static int afs_set_super(struct super_block *sb, void *data) @@ -365,24 +387,30 @@ static int afs_fill_super(struct super_block *sb, sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; - sb->s_xattr = afs_xattr_handlers; + if (!as->dyn_root) + sb->s_xattr = afs_xattr_handlers; ret = super_setup_bdi(sb); if (ret) return ret; sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; - sprintf(sb->s_id, "%u", as->volume->vid); - - afs_activate_volume(as->volume); /* allocate the root inode and dentry */ - fid.vid = as->volume->vid; - fid.vnode = 1; - fid.unique = 1; - inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); + if (as->dyn_root) { + inode = afs_iget_pseudo_dir(sb, true); + sb->s_flags |= SB_RDONLY; + } else { + sprintf(sb->s_id, "%u", as->volume->vid); + afs_activate_volume(as->volume); + fid.vid = as->volume->vid; + fid.vnode = 1; + fid.unique = 1; + inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); + } + if (IS_ERR(inode)) return PTR_ERR(inode); - if (params->autocell) + if (params->autocell || params->dyn_root) set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); ret = -ENOMEM; @@ -407,7 +435,10 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { as->net = afs_get_net(params->net); - as->cell = afs_get_cell(params->cell); + if (params->dyn_root) + as->dyn_root = true; + else + as->cell = afs_get_cell(params->cell); } return as; } @@ -451,18 +482,20 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, goto error; } - ret = afs_parse_device_name(¶ms, dev_name); - if (ret < 0) - goto error; + if (!params.dyn_root) { + ret = afs_parse_device_name(¶ms, dev_name); + if (ret < 0) + goto error; - /* try and do the mount securely */ - key = afs_request_key(params.cell); - if (IS_ERR(key)) { - _leave(" = %ld [key]", PTR_ERR(key)); - ret = PTR_ERR(key); - goto error; + /* try and do the mount securely */ + key = afs_request_key(params.cell); + if (IS_ERR(key)) { + _leave(" = %ld [key]", PTR_ERR(key)); + ret = PTR_ERR(key); + goto error; + } + params.key = key; } - params.key = key; /* allocate a superblock info record */ ret = -ENOMEM; @@ -470,20 +503,25 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, if (!as) goto error_key; - /* Assume we're going to need a volume record; at the very least we can - * use it to update the volume record if we have one already. This - * checks that the volume exists within the cell. - */ - candidate = afs_create_volume(¶ms); - if (IS_ERR(candidate)) { - ret = PTR_ERR(candidate); - goto error_as; - } + if (!params.dyn_root) { + /* Assume we're going to need a volume record; at the very + * least we can use it to update the volume record if we have + * one already. This checks that the volume exists within the + * cell. + */ + candidate = afs_create_volume(¶ms); + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_as; + } - as->volume = candidate; + as->volume = candidate; + } /* allocate a deviceless superblock */ - sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); + sb = sget(fs_type, + as->dyn_root ? afs_dynroot_test_super : afs_test_super, + afs_set_super, flags, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto error_as; @@ -529,9 +567,11 @@ static void afs_kill_super(struct super_block *sb) /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ - afs_clear_callback_interests(as->net, as->volume->servers); + if (as->volume) + afs_clear_callback_interests(as->net, as->volume->servers); kill_anon_super(sb); - afs_deactivate_volume(as->volume); + if (as->volume) + afs_deactivate_volume(as->volume); afs_destroy_sbi(as); } @@ -619,12 +659,24 @@ static void afs_destroy_inode(struct inode *inode) */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct afs_super_info *as = AFS_FS_S(dentry->d_sb); struct afs_fs_cursor fc; struct afs_volume_status vs; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; int ret; + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = AFS_BLOCK_SIZE; + buf->f_namelen = AFSNAMEMAX - 1; + + if (as->dyn_root) { + buf->f_blocks = 1; + buf->f_bavail = 0; + buf->f_bfree = 0; + return 0; + } + key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) return PTR_ERR(key); @@ -645,10 +697,6 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) key_put(key); if (ret == 0) { - buf->f_type = dentry->d_sb->s_magic; - buf->f_bsize = AFS_BLOCK_SIZE; - buf->f_namelen = AFSNAMEMAX - 1; - if (vs.max_quota == 0) buf->f_blocks = vs.part_max_blocks; else diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index e372f89fd36a..5d8562f1ad4a 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) struct afs_uvldbentry__xdr *uvldb; struct afs_vldb_entry *entry; bool new_only = false; - u32 tmp; + u32 tmp, nr_servers; int i, ret; _enter(""); @@ -36,6 +36,10 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) uvldb = call->buffer; entry = call->reply[0]; + nr_servers = ntohl(uvldb->nServers); + if (nr_servers > AFS_NMAXNSERVERS) + nr_servers = AFS_NMAXNSERVERS; + for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++) entry->name[i] = (u8)ntohl(uvldb->name[i]); entry->name[i] = 0; @@ -44,14 +48,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) /* If there is a new replication site that we can use, ignore all the * sites that aren't marked as new. */ - for (i = 0; i < AFS_NMAXNSERVERS; i++) { + for (i = 0; i < nr_servers; i++) { tmp = ntohl(uvldb->serverFlags[i]); if (!(tmp & AFS_VLSF_DONTUSE) && (tmp & AFS_VLSF_NEWREPSITE)) new_only = true; } - for (i = 0; i < AFS_NMAXNSERVERS; i++) { + for (i = 0; i < nr_servers; i++) { struct afs_uuid__xdr *xdr; struct afs_uuid *uuid; int j; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 684c48293353..b517a588781f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -26,9 +26,8 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, unsigned long type_mask) { struct afs_server_list *slist; - struct afs_server *server; struct afs_volume *volume; - int ret = -ENOMEM, nr_servers = 0, i, j; + int ret = -ENOMEM, nr_servers = 0, i; for (i = 0; i < vldb->nr_servers; i++) if (vldb->fs_mask[i] & type_mask) @@ -58,50 +57,10 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, refcount_set(&slist->usage, 1); volume->servers = slist; - - /* Make sure a records exists for each server this volume occupies. */ - for (i = 0; i < nr_servers; i++) { - if (!(vldb->fs_mask[i] & type_mask)) - continue; - - server = afs_lookup_server(params->cell, params->key, - &vldb->fs_server[i]); - if (IS_ERR(server)) { - ret = PTR_ERR(server); - if (ret == -ENOENT) - continue; - goto error_2; - } - - /* Insertion-sort by server pointer */ - for (j = 0; j < slist->nr_servers; j++) - if (slist->servers[j].server >= server) - break; - if (j < slist->nr_servers) { - if (slist->servers[j].server == server) { - afs_put_server(params->net, server); - continue; - } - - memmove(slist->servers + j + 1, - slist->servers + j, - (slist->nr_servers - j) * sizeof(struct afs_server_entry)); - } - - slist->servers[j].server = server; - slist->nr_servers++; - } - - if (slist->nr_servers == 0) { - ret = -EDESTADDRREQ; - goto error_2; - } - return volume; -error_2: - afs_put_serverlist(params->net, slist); error_1: + afs_put_cell(params->net, volume->cell); kfree(volume); error_0: return ERR_PTR(ret); @@ -327,7 +286,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* See if the volume's server list got updated. */ new = afs_alloc_server_list(volume->cell, key, - vldb, (1 << volume->type)); + vldb, (1 << volume->type)); if (IS_ERR(new)) { ret = PTR_ERR(new); goto error_vldb; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index dbf07051aacd..b4336b42ce3b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -299,7 +299,8 @@ unlock: * start an async read(ahead) operation. return nr_pages we submitted * a read for on success, or negative error code. */ -static int start_read(struct inode *inode, struct list_head *page_list, int max) +static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, + struct list_head *page_list, int max) { struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; @@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) int got = 0; int ret = 0; - if (!current->journal_info) { + if (!rw_ctx) { /* caller of readpages does not hold buffer and read caps * (fadvise, madvise and readahead cases) */ int want = CEPH_CAP_FILE_CACHE; @@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, { struct inode *inode = file_inode(file); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_file_info *ci = file->private_data; + struct ceph_rw_context *rw_ctx; int rc = 0; int max = 0; @@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc == 0) goto out; + rw_ctx = ceph_find_rw_context(ci); max = fsc->mount_options->rsize >> PAGE_SHIFT; - dout("readpages %p file %p nr_pages %d max %d\n", - inode, file, nr_pages, max); + dout("readpages %p file %p ctx %p nr_pages %d max %d\n", + inode, file, rw_ctx, nr_pages, max); while (!list_empty(page_list)) { - rc = start_read(inode, page_list, max); + rc = start_read(inode, rw_ctx, page_list, max); if (rc < 0) goto out; } @@ -574,7 +578,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_fs_client *fsc; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); - long writeback_stat; int err, len = PAGE_SIZE; struct ceph_writeback_ctl ceph_wbc; @@ -615,8 +618,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n", inode, page, page->index, page_off, len, snapc, snapc->seq); - writeback_stat = atomic_long_inc_return(&fsc->writeback_count); - if (writeback_stat > + if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); @@ -651,6 +653,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) end_page_writeback(page); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); ceph_put_snap_context(snapc); /* page's reference */ + + if (atomic_long_dec_return(&fsc->writeback_count) < + CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) + clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); + return err; } @@ -1450,9 +1457,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf) if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || ci->i_inline_version == CEPH_INLINE_NONE) { - current->journal_info = vma->vm_file; + CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); + ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); - current->journal_info = NULL; + ceph_del_rw_context(fi, &rw_ctx); } else ret = -EAGAIN; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a14b2c974c9e..6582c4507e6c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -154,13 +154,19 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) spin_unlock(&mdsc->caps_list_lock); } -void ceph_reserve_caps(struct ceph_mds_client *mdsc, +/* + * Called under mdsc->mutex. + */ +int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need) { - int i; + int i, j; struct ceph_cap *cap; int have; int alloc = 0; + int max_caps; + bool trimmed = false; + struct ceph_mds_session *s; LIST_HEAD(newcaps); dout("reserve caps ctx=%p need=%d\n", ctx, need); @@ -179,16 +185,37 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc, spin_unlock(&mdsc->caps_list_lock); for (i = have; i < need; i++) { +retry: cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); - if (!cap) - break; + if (!cap) { + if (!trimmed) { + for (j = 0; j < mdsc->max_sessions; j++) { + s = __ceph_lookup_mds_session(mdsc, j); + if (!s) + continue; + mutex_unlock(&mdsc->mutex); + + mutex_lock(&s->s_mutex); + max_caps = s->s_nr_caps - (need - i); + ceph_trim_caps(mdsc, s, max_caps); + mutex_unlock(&s->s_mutex); + + ceph_put_mds_session(s); + mutex_lock(&mdsc->mutex); + } + trimmed = true; + goto retry; + } else { + pr_warn("reserve caps ctx=%p ENOMEM " + "need=%d got=%d\n", + ctx, need, have + alloc); + goto out_nomem; + } + } list_add(&cap->caps_item, &newcaps); alloc++; } - /* we didn't manage to reserve as much as we needed */ - if (have + alloc != need) - pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", - ctx, need, have + alloc); + BUG_ON(have + alloc != need); spin_lock(&mdsc->caps_list_lock); mdsc->caps_total_count += alloc; @@ -204,6 +231,24 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc, dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", ctx, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); + return 0; + +out_nomem: + while (!list_empty(&newcaps)) { + cap = list_first_entry(&newcaps, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + + spin_lock(&mdsc->caps_list_lock); + mdsc->caps_avail_count += have; + mdsc->caps_reserve_count -= have; + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + spin_unlock(&mdsc->caps_list_lock); + return -ENOMEM; } int ceph_unreserve_caps(struct ceph_mds_client *mdsc, @@ -498,7 +543,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, */ if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) { if (issued & CEPH_CAP_FILE_SHARED) - ci->i_shared_gen++; + atomic_inc(&ci->i_shared_gen); if (S_ISDIR(ci->vfs_inode.i_mode)) { dout(" marking %p NOT complete\n", &ci->vfs_inode); __ceph_dir_clear_complete(ci); @@ -577,18 +622,30 @@ void ceph_add_cap(struct inode *inode, } } - if (!ci->i_snap_realm) { + if (!ci->i_snap_realm || + ((flags & CEPH_CAP_FLAG_AUTH) && + realmino != (u64)-1 && ci->i_snap_realm->ino != realmino)) { /* * add this inode to the appropriate snap realm */ struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc, realmino); if (realm) { + struct ceph_snap_realm *oldrealm = ci->i_snap_realm; + if (oldrealm) { + spin_lock(&oldrealm->inodes_with_caps_lock); + list_del_init(&ci->i_snap_realm_item); + spin_unlock(&oldrealm->inodes_with_caps_lock); + } + spin_lock(&realm->inodes_with_caps_lock); ci->i_snap_realm = realm; list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); spin_unlock(&realm->inodes_with_caps_lock); + + if (oldrealm) + ceph_put_snap_realm(mdsc, oldrealm); } else { pr_err("ceph_add_cap: couldn't find snap realm %llx\n", realmino); @@ -890,6 +947,11 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check) /* * called under i_ceph_lock */ +static int __ceph_is_single_caps(struct ceph_inode_info *ci) +{ + return rb_first(&ci->i_caps) == rb_last(&ci->i_caps); +} + static int __ceph_is_any_caps(struct ceph_inode_info *ci) { return !RB_EMPTY_ROOT(&ci->i_caps); @@ -1703,21 +1765,24 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, int mds = -1; /* keep track of how far we've gone through i_caps list to avoid an infinite loop on retry */ struct rb_node *p; - int delayed = 0, sent = 0, num; - bool is_delayed = flags & CHECK_CAPS_NODELAY; + int delayed = 0, sent = 0; + bool no_delay = flags & CHECK_CAPS_NODELAY; bool queue_invalidate = false; - bool force_requeue = false; bool tried_invalidate = false; /* if we are unmounting, flush any unused caps immediately. */ if (mdsc->stopping) - is_delayed = true; + no_delay = true; spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_FLUSH) flags |= CHECK_CAPS_FLUSH; + if (!(flags & CHECK_CAPS_AUTHONLY) || + (ci->i_auth_cap && __ceph_is_single_caps(ci))) + __cap_delay_cancel(mdsc, ci); + goto retry_locked; retry: spin_lock(&ci->i_ceph_lock); @@ -1772,7 +1837,7 @@ retry_locked: * have cached pages, but don't want them, then try to invalidate. * If we fail, it's because pages are locked.... try again later. */ - if ((!is_delayed || mdsc->stopping) && + if ((!no_delay || mdsc->stopping) && !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ !(ci->i_wb_ref || ci->i_wrbuffer_ref) && /* no dirty pages... */ inode->i_data.nrpages && /* have cached pages */ @@ -1781,27 +1846,16 @@ retry_locked: !tried_invalidate) { dout("check_caps trying to invalidate on %p\n", inode); if (try_nonblocking_invalidate(inode) < 0) { - if (revoking & (CEPH_CAP_FILE_CACHE| - CEPH_CAP_FILE_LAZYIO)) { - dout("check_caps queuing invalidate\n"); - queue_invalidate = true; - ci->i_rdcache_revoking = ci->i_rdcache_gen; - } else { - dout("check_caps failed to invalidate pages\n"); - /* we failed to invalidate pages. check these - caps again later. */ - force_requeue = true; - __cap_set_timeouts(mdsc, ci); - } + dout("check_caps queuing invalidate\n"); + queue_invalidate = true; + ci->i_rdcache_revoking = ci->i_rdcache_gen; } tried_invalidate = true; goto retry_locked; } - num = 0; for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); - num++; /* avoid looping forever */ if (mds >= cap->mds || @@ -1864,7 +1918,7 @@ retry_locked: cap->mds_wanted == want) continue; /* nope, all good */ - if (is_delayed) + if (no_delay) goto ack; /* delay? */ @@ -1955,15 +2009,8 @@ ack: goto retry; /* retake i_ceph_lock and restart our cap scan. */ } - /* - * Reschedule delayed caps release if we delayed anything, - * otherwise cancel. - */ - if (delayed && is_delayed) - force_requeue = true; /* __send_cap delayed release; requeue */ - if (!delayed && !is_delayed) - __cap_delay_cancel(mdsc, ci); - else if (!is_delayed || force_requeue) + /* Reschedule delayed caps release if we delayed anything */ + if (delayed) __cap_delay_requeue(mdsc, ci); spin_unlock(&ci->i_ceph_lock); @@ -2160,7 +2207,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) u64 flush_tid; int err = 0; int dirty; - int wait = wbc->sync_mode == WB_SYNC_ALL; + int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync); dout("write_inode %p wait=%d\n", inode, wait); if (wait) { @@ -3426,7 +3473,14 @@ retry: */ issued = cap->issued; - WARN_ON(issued != cap->implemented); + if (issued != cap->implemented) + pr_err_ratelimited("handle_cap_export: issued != implemented: " + "ino (%llx.%llx) mds%d seq %d mseq %d " + "issued %s implemented %s\n", + ceph_vinop(inode), mds, cap->seq, cap->mseq, + ceph_cap_string(issued), + ceph_cap_string(cap->implemented)); + tcap = __get_cap_for_mds(ci, target); if (tcap) { @@ -3572,12 +3626,13 @@ retry: if ((ph->flags & CEPH_CAP_FLAG_AUTH) && (ocap->seq != le32_to_cpu(ph->seq) || ocap->mseq != le32_to_cpu(ph->mseq))) { - pr_err("handle_cap_import: mismatched seq/mseq: " - "ino (%llx.%llx) mds%d seq %d mseq %d " - "importer mds%d has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, ocap->seq, - ocap->mseq, mds, le32_to_cpu(ph->seq), - le32_to_cpu(ph->mseq)); + pr_err_ratelimited("handle_cap_import: " + "mismatched seq/mseq: ino (%llx.%llx) " + "mds%d seq %d mseq %d importer mds%d " + "has peer seq %d mseq %d\n", + ceph_vinop(inode), peer, ocap->seq, + ocap->mseq, mds, le32_to_cpu(ph->seq), + le32_to_cpu(ph->mseq)); } __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } @@ -3939,11 +3994,20 @@ int ceph_encode_inode_release(void **p, struct inode *inode, cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { - if (force || - ((cap->issued & drop) && - (cap->issued & unless) == 0)) { - if ((cap->issued & drop) && - (cap->issued & unless) == 0) { + unless &= cap->issued; + if (unless) { + if (unless & CEPH_CAP_AUTH_EXCL) + drop &= ~CEPH_CAP_AUTH_SHARED; + if (unless & CEPH_CAP_LINK_EXCL) + drop &= ~CEPH_CAP_LINK_SHARED; + if (unless & CEPH_CAP_XATTR_EXCL) + drop &= ~CEPH_CAP_XATTR_SHARED; + if (unless & CEPH_CAP_FILE_EXCL) + drop &= ~CEPH_CAP_FILE_SHARED; + } + + if (force || (cap->issued & drop)) { + if (cap->issued & drop) { int wanted = __ceph_caps_wanted(ci); if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0) wanted |= cap->mds_wanted; @@ -3975,7 +4039,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, *p += sizeof(*rel); ret = 1; } else { - dout("encode_inode_release %p cap %p %s\n", + dout("encode_inode_release %p cap %p %s (noop)\n", inode, cap, ceph_cap_string(cap->issued)); } } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 8a5266699b67..0c4346806e17 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -173,7 +173,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, * the MDS if/when the directory is modified). */ static int __dcache_readdir(struct file *file, struct dir_context *ctx, - u32 shared_gen) + int shared_gen) { struct ceph_file_info *fi = file->private_data; struct dentry *parent = file->f_path.dentry; @@ -184,7 +184,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, u64 idx = 0; int err = 0; - dout("__dcache_readdir %p v%u at %llx\n", dir, shared_gen, ctx->pos); + dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos); /* search start position */ if (ctx->pos > 2) { @@ -231,11 +231,17 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, goto out; } - di = ceph_dentry(dentry); spin_lock(&dentry->d_lock); - if (di->lease_shared_gen == shared_gen && - d_really_is_positive(dentry) && - fpos_cmp(ctx->pos, di->offset) <= 0) { + di = ceph_dentry(dentry); + if (d_unhashed(dentry) || + d_really_is_negative(dentry) || + di->lease_shared_gen != shared_gen) { + spin_unlock(&dentry->d_lock); + dput(dentry); + err = -EAGAIN; + goto out; + } + if (fpos_cmp(ctx->pos, di->offset) <= 0) { emit_dentry = true; } spin_unlock(&dentry->d_lock); @@ -333,7 +339,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ceph_snap(inode) != CEPH_SNAPDIR && __ceph_dir_is_complete_ordered(ci) && __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { - u32 shared_gen = ci->i_shared_gen; + int shared_gen = atomic_read(&ci->i_shared_gen); spin_unlock(&ci->i_ceph_lock); err = __dcache_readdir(file, ctx, shared_gen); if (err != -EAGAIN) @@ -381,6 +387,7 @@ more: if (op == CEPH_MDS_OP_READDIR) { req->r_direct_hash = ceph_frag_value(frag); __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); + req->r_inode_drop = CEPH_CAP_FILE_EXCL; } if (fi->last_name) { req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); @@ -750,7 +757,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&ci->i_ceph_lock); dout(" dir %p complete, -ENOENT\n", dir); d_add(dentry, NULL); - di->lease_shared_gen = ci->i_shared_gen; + di->lease_shared_gen = atomic_read(&ci->i_shared_gen); return NULL; } spin_unlock(&ci->i_ceph_lock); @@ -835,7 +842,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (acls.pagelist) { req->r_pagelist = acls.pagelist; @@ -887,7 +894,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry = dget(dentry); req->r_num_caps = 2; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) @@ -936,7 +943,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mkdir.mode = cpu_to_le32(mode); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (acls.pagelist) { req->r_pagelist = acls.pagelist; @@ -983,7 +990,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_SHARED on source inode (mds will lock it) */ - req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; + req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; err = ceph_mdsc_do_request(mdsc, dir, req); if (err) { d_drop(dentry); @@ -1096,7 +1103,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_RDCACHE on source inode (mds will lock it) */ - req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; + req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; if (d_really_is_positive(new_dentry)) req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry)); err = ceph_mdsc_do_request(mdsc, old_dir, req); @@ -1106,16 +1113,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * do_request, above). If there is no trace, we need * to do it here. */ - - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_complete(old_dir); - ceph_dir_clear_complete(new_dir); - d_move(old_dentry, new_dentry); - - /* ensure target dentry is invalidated, despite - rehashing bug in vfs_rename_dir */ - ceph_invalidate_dentry_lease(new_dentry); } ceph_mdsc_put_request(req); return err; @@ -1199,12 +1197,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) int valid = 0; spin_lock(&ci->i_ceph_lock); - if (ci->i_shared_gen == di->lease_shared_gen) + if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen) valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); spin_unlock(&ci->i_ceph_lock); dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", - dir, (unsigned)ci->i_shared_gen, dentry, - (unsigned)di->lease_shared_gen, valid); + dir, (unsigned)atomic_read(&ci->i_shared_gen), + dentry, (unsigned)di->lease_shared_gen, valid); return valid; } @@ -1332,24 +1330,37 @@ static void ceph_d_release(struct dentry *dentry) */ static void ceph_d_prune(struct dentry *dentry) { - dout("ceph_d_prune %p\n", dentry); + struct ceph_inode_info *dir_ci; + struct ceph_dentry_info *di; + + dout("ceph_d_prune %pd %p\n", dentry, dentry); /* do we have a valid parent? */ if (IS_ROOT(dentry)) return; - /* if we are not hashed, we don't affect dir's completeness */ - if (d_unhashed(dentry)) + /* we hold d_lock, so d_parent is stable */ + dir_ci = ceph_inode(d_inode(dentry->d_parent)); + if (dir_ci->i_vino.snap == CEPH_SNAPDIR) return; - if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR) + /* who calls d_delete() should also disable dcache readdir */ + if (d_really_is_negative(dentry)) return; - /* - * we hold d_lock, so d_parent is stable, and d_fsdata is never - * cleared until d_release - */ - ceph_dir_clear_complete(d_inode(dentry->d_parent)); + /* d_fsdata does not get cleared until d_release */ + if (!d_unhashed(dentry)) { + __ceph_dir_clear_complete(dir_ci); + return; + } + + /* Disable dcache readdir just in case that someone called d_drop() + * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED + * properly (dcache readdir is still enabled) */ + di = ceph_dentry(dentry); + if (di->offset > 0 && + di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen)) + __ceph_dir_clear_ordered(dir_ci); } /* diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5c17125f45c7..6639926eed4e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) return -ENOMEM; } cf->fmode = fmode; + + spin_lock_init(&cf->rw_contexts_lock); + INIT_LIST_HEAD(&cf->rw_contexts); + cf->next_offset = 2; cf->readdir_cache_idx = -1; file->private_data = cf; @@ -396,7 +400,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (acls.pagelist) { req->r_pagelist = acls.pagelist; @@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file) ceph_mdsc_put_request(cf->last_readdir); kfree(cf->last_name); kfree(cf->dir_info); + WARN_ON(!list_empty(&cf->rw_contexts)); kmem_cache_free(ceph_file_cachep, cf); /* wake up anyone waiting for caps on this inode */ @@ -1199,12 +1204,13 @@ again: retry_op = READ_INLINE; } } else { + CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); - current->journal_info = filp; + ceph_add_rw_context(fi, &rw_ctx); ret = generic_file_read_iter(iocb, to); - current->journal_info = NULL; + ceph_del_rw_context(fi, &rw_ctx); } dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ab81652198c4..c6ec5aa46100 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -494,7 +494,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_wrbuffer_ref = 0; ci->i_wrbuffer_ref_head = 0; atomic_set(&ci->i_filelock_ref, 0); - ci->i_shared_gen = 0; + atomic_set(&ci->i_shared_gen, 0); ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; @@ -1041,7 +1041,7 @@ static void update_dentry_lease(struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) goto out_unlock; - di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; + di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); if (duration == 0) goto out_unlock; @@ -1080,6 +1080,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) BUG_ON(d_inode(dn)); + if (S_ISDIR(in->i_mode)) { + /* If inode is directory, d_splice_alias() below will remove + * 'realdn' from its origin parent. We need to ensure that + * origin parent's readdir cache will not reference 'realdn' + */ + realdn = d_find_any_alias(in); + if (realdn) { + struct ceph_dentry_info *di = ceph_dentry(realdn); + spin_lock(&realdn->d_lock); + + realdn->d_op->d_prune(realdn); + + di->time = jiffies; + di->lease_shared_gen = 0; + di->offset = 0; + + spin_unlock(&realdn->d_lock); + dput(realdn); + } + } + /* dn must be unhashed */ if (!d_unhashed(dn)) d_drop(dn); @@ -1295,8 +1316,8 @@ retry_lookup: if (!rinfo->head->is_target) { dout("fill_trace null dentry\n"); if (d_really_is_positive(dn)) { - ceph_dir_clear_ordered(dir); dout("d_delete %p\n", dn); + ceph_dir_clear_ordered(dir); d_delete(dn); } else if (have_lease) { if (d_unhashed(dn)) @@ -1323,7 +1344,6 @@ retry_lookup: dout(" %p links to %p %llx.%llx, not %llx.%llx\n", dn, d_inode(dn), ceph_vinop(d_inode(dn)), ceph_vinop(in)); - ceph_dir_clear_ordered(dir); d_invalidate(dn); have_lease = false; } @@ -1573,9 +1593,19 @@ retry_lookup: } else if (d_really_is_positive(dn) && (ceph_ino(d_inode(dn)) != tvino.ino || ceph_snap(d_inode(dn)) != tvino.snap)) { + struct ceph_dentry_info *di = ceph_dentry(dn); dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); - __ceph_dir_clear_ordered(ci); + + spin_lock(&dn->d_lock); + if (di->offset > 0 && + di->lease_shared_gen == + atomic_read(&ci->i_shared_gen)) { + __ceph_dir_clear_ordered(ci); + di->offset = 0; + } + spin_unlock(&dn->d_lock); + d_delete(dn); dput(dn); goto retry_lookup; @@ -1600,9 +1630,7 @@ retry_lookup: &req->r_caps_reservation); if (ret < 0) { pr_err("fill_inode badness on %p\n", in); - if (d_really_is_positive(dn)) - __ceph_dir_clear_ordered(ci); - else + if (d_really_is_negative(dn)) iput(in); d_drop(dn); err = ret; @@ -2000,8 +2028,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) ceph_encode_timespec(&req->r_args.setattr.atime, &attr->ia_atime); mask |= CEPH_SETATTR_ATIME; - release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; + release |= CEPH_CAP_FILE_SHARED | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } } if (ia_valid & ATTR_MTIME) { @@ -2022,8 +2050,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) ceph_encode_timespec(&req->r_args.setattr.mtime, &attr->ia_mtime); mask |= CEPH_SETATTR_MTIME; - release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; + release |= CEPH_CAP_FILE_SHARED | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } } if (ia_valid & ATTR_SIZE) { @@ -2041,8 +2069,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_args.setattr.old_size = cpu_to_le64(inode->i_size); mask |= CEPH_SETATTR_SIZE; - release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; + release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1b468250e947..2e8f90f96540 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -604,10 +604,20 @@ static void __register_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, struct inode *dir) { + int ret = 0; + req->r_tid = ++mdsc->last_tid; - if (req->r_num_caps) - ceph_reserve_caps(mdsc, &req->r_caps_reservation, - req->r_num_caps); + if (req->r_num_caps) { + ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation, + req->r_num_caps); + if (ret < 0) { + pr_err("__register_request %p " + "failed to reserve caps: %d\n", req, ret); + /* set req->r_err to fail early from __do_request */ + req->r_err = ret; + return; + } + } dout("__register_request %p tid %lld\n", req, req->r_tid); ceph_mdsc_get_request(req); insert_request(&mdsc->request_tree, req); @@ -1545,9 +1555,9 @@ out: /* * Trim session cap count down to some max number. */ -static int trim_caps(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session, - int max_caps) +int ceph_trim_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int max_caps) { int trim_caps = session->s_nr_caps - max_caps; @@ -2438,11 +2448,14 @@ out: */ void ceph_invalidate_dir_request(struct ceph_mds_request *req) { - struct inode *inode = req->r_parent; + struct inode *dir = req->r_parent; + struct inode *old_dir = req->r_old_dentry_dir; - dout("invalidate_dir_request %p (complete, lease(s))\n", inode); + dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir); - ceph_dir_clear_complete(inode); + ceph_dir_clear_complete(dir); + if (old_dir) + ceph_dir_clear_complete(old_dir); if (req->r_dentry) ceph_invalidate_dentry_lease(req->r_dentry); if (req->r_old_dentry) @@ -2773,7 +2786,7 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_RECALL_STATE: - trim_caps(mdsc, session, le32_to_cpu(h->max_caps)); + ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps)); break; case CEPH_SESSION_FLUSHMSG: diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 837ac4b087a0..71e3b783ee6f 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -444,4 +444,7 @@ ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target); extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); +extern int ceph_trim_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int max_caps); #endif diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 8a2ca41e4b97..07cf95e6413d 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -922,13 +922,17 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, /* * Move the inode to the new realm */ - spin_lock(&realm->inodes_with_caps_lock); + oldrealm = ci->i_snap_realm; + spin_lock(&oldrealm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + spin_unlock(&oldrealm->inodes_with_caps_lock); + + spin_lock(&realm->inodes_with_caps_lock); list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); - oldrealm = ci->i_snap_realm; ci->i_snap_realm = realm; spin_unlock(&realm->inodes_with_caps_lock); + spin_unlock(&ci->i_ceph_lock); ceph_get_snap_realm(mdsc, realm); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2beeec07fa76..21b2e5b004eb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -256,7 +256,8 @@ struct ceph_inode_xattr { */ struct ceph_dentry_info { struct ceph_mds_session *lease_session; - u32 lease_gen, lease_shared_gen; + int lease_shared_gen; + u32 lease_gen; u32 lease_seq; unsigned long lease_renew_after, lease_renew_from; struct list_head lru; @@ -353,7 +354,7 @@ struct ceph_inode_info { int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; int i_wrbuffer_ref, i_wrbuffer_ref_head; atomic_t i_filelock_ref; - u32 i_shared_gen; /* increment each time we get FILE_SHARED */ + atomic_t i_shared_gen; /* increment each time we get FILE_SHARED */ u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ @@ -648,7 +649,7 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check); extern void ceph_caps_init(struct ceph_mds_client *mdsc); extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); -extern void ceph_reserve_caps(struct ceph_mds_client *mdsc, +extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need); extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx); @@ -668,6 +669,9 @@ struct ceph_file_info { short fmode; /* initialized on open */ short flags; /* CEPH_F_* */ + spinlock_t rw_contexts_lock; + struct list_head rw_contexts; + /* readdir: position within the dir */ u32 frag; struct ceph_mds_request *last_readdir; @@ -684,6 +688,49 @@ struct ceph_file_info { int dir_info_len; }; +struct ceph_rw_context { + struct list_head list; + struct task_struct *thread; + int caps; +}; + +#define CEPH_DEFINE_RW_CONTEXT(_name, _caps) \ + struct ceph_rw_context _name = { \ + .thread = current, \ + .caps = _caps, \ + } + +static inline void ceph_add_rw_context(struct ceph_file_info *cf, + struct ceph_rw_context *ctx) +{ + spin_lock(&cf->rw_contexts_lock); + list_add(&ctx->list, &cf->rw_contexts); + spin_unlock(&cf->rw_contexts_lock); +} + +static inline void ceph_del_rw_context(struct ceph_file_info *cf, + struct ceph_rw_context *ctx) +{ + spin_lock(&cf->rw_contexts_lock); + list_del(&ctx->list); + spin_unlock(&cf->rw_contexts_lock); +} + +static inline struct ceph_rw_context* +ceph_find_rw_context(struct ceph_file_info *cf) +{ + struct ceph_rw_context *ctx, *found = NULL; + spin_lock(&cf->rw_contexts_lock); + list_for_each_entry(ctx, &cf->rw_contexts, list) { + if (ctx->thread == current) { + found = ctx; + break; + } + } + spin_unlock(&cf->rw_contexts_lock); + return found; +} + struct ceph_readdir_cache_control { struct page *page; struct dentry **dentries; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index c7a863219fa3..e35e711db68e 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -128,6 +128,10 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon) seq_puts(m, " type: CDROM "); else seq_printf(m, " type: %d ", dev_type); + if (tcon->seal) + seq_printf(m, " Encrypted"); + if (tcon->unix_ext) + seq_printf(m, " POSIX Extensions"); if (tcon->ses->server->ops->dump_share_caps) tcon->ses->server->ops->dump_share_caps(m, tcon); @@ -246,7 +250,10 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) atomic_read(&server->smbd_conn->mr_used_count)); skip_rdma: #endif - seq_printf(m, "\nNumber of credits: %d", server->credits); + seq_printf(m, "\nNumber of credits: %d Dialect 0x%x", + server->credits, server->dialect); + if (server->sign) + seq_printf(m, " signed"); i++; list_for_each(tmp2, &server->smb_ses_list) { ses = list_entry(tmp2, struct cifs_ses, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4e0922d24eb2..9ceebf30eb22 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -6343,9 +6343,7 @@ SetEARetry: pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_EA); - parm_data = - (struct fealist *) (((char *) &pSMB->hdr.Protocol) + - offset); + parm_data = (void *)pSMB + offsetof(struct smb_hdr, Protocol) + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); pSMB->SetupCount = 1; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6eb9f9691ed4..2a2b34ccaf49 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -192,6 +192,35 @@ struct smb2_symlink_err_rsp { __u8 PathBuffer[0]; } __packed; +/* SMB 3.1.1 and later dialects. See MS-SMB2 section 2.2.2.1 */ +struct smb2_error_context_rsp { + __le32 ErrorDataLength; + __le32 ErrorId; + __u8 ErrorContextData; /* ErrorDataLength long array */ +} __packed; + +/* Defines for Type field below (see MS-SMB2 2.2.2.2.2.1) */ +#define MOVE_DST_IPADDR_V4 cpu_to_le32(0x00000001) +#define MOVE_DST_IPADDR_V6 cpu_to_le32(0x00000002) + +struct move_dst_ipaddr { + __le32 Type; + __u32 Reserved; + __u8 address[16]; /* IPv4 followed by 12 bytes rsvd or IPv6 address */ +} __packed; + +struct share_redirect_error_context_rsp { + __le32 StructureSize; + __le32 NotificationType; + __le32 ResourceNameOffset; + __le32 ResourceNameLength; + __le16 Flags; + __le16 TargetType; + __le32 IPAddrCount; + struct move_dst_ipaddr IpAddrMoveList[0]; + /* __u8 ResourceName[] */ /* Name of share as counted Unicode string */ +} __packed; + #define SMB2_CLIENT_GUID_SIZE 16 struct smb2_negotiate_req { @@ -320,7 +349,9 @@ struct smb2_logoff_rsp { } __packed; /* Flags/Reserved for SMB3.1.1 */ -#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001 +#define SMB2_TREE_CONNECT_FLAG_CLUSTER_RECONNECT cpu_to_le16(0x0001) +#define SMB2_TREE_CONNECT_FLAG_REDIRECT_TO_OWNER cpu_to_le16(0x0002) +#define SMB2_TREE_CONNECT_FLAG_EXTENSION_PRESENT cpu_to_le16(0x0004) struct smb2_tree_connect_req { struct smb2_sync_hdr sync_hdr; @@ -331,6 +362,82 @@ struct smb2_tree_connect_req { __u8 Buffer[1]; /* variable length */ } __packed; +/* See MS-SMB2 section 2.2.9.2 */ +/* Context Types */ +#define SMB2_RESERVED_TREE_CONNECT_CONTEXT_ID 0x0000 +#define SMB2_REMOTED_IDENTITY_TREE_CONNECT_CONTEXT_ID cpu_to_le16(0x0001) + +struct tree_connect_contexts { + __le16 ContextType; + __le16 DataLength; + __le32 Reserved; + __u8 Data[0]; +} __packed; + +/* Remoted identity tree connect context structures - see MS-SMB2 2.2.9.2.1 */ +struct smb3_blob_data { + __le16 BlobSize; + __u8 BlobData[0]; +} __packed; + +/* Valid values for Attr */ +#define SE_GROUP_MANDATORY 0x00000001 +#define SE_GROUP_ENABLED_BY_DEFAULT 0x00000002 +#define SE_GROUP_ENABLED 0x00000004 +#define SE_GROUP_OWNER 0x00000008 +#define SE_GROUP_USE_FOR_DENY_ONLY 0x00000010 +#define SE_GROUP_INTEGRITY 0x00000020 +#define SE_GROUP_INTEGRITY_ENABLED 0x00000040 +#define SE_GROUP_RESOURCE 0x20000000 +#define SE_GROUP_LOGON_ID 0xC0000000 + +/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */ + +struct sid_array_data { + __le16 SidAttrCount; + /* SidAttrList - array of sid_attr_data structs */ +} __packed; + +struct luid_attr_data { + +} __packed; + +/* + * struct privilege_data is the same as BLOB_DATA - see MS-SMB2 2.2.9.2.1.5 + * but with size of LUID_ATTR_DATA struct and BlobData set to LUID_ATTR DATA + */ + +struct privilege_array_data { + __le16 PrivilegeCount; + /* array of privilege_data structs */ +} __packed; + +struct remoted_identity_tcon_context { + __le16 TicketType; /* must be 0x0001 */ + __le16 TicketSize; /* total size of this struct */ + __le16 User; /* offset to SID_ATTR_DATA struct with user info */ + __le16 UserName; /* offset to null terminated Unicode username string */ + __le16 Domain; /* offset to null terminated Unicode domain name */ + __le16 Groups; /* offset to SID_ARRAY_DATA struct with group info */ + __le16 RestrictedGroups; /* similar to above */ + __le16 Privileges; /* offset to PRIVILEGE_ARRAY_DATA struct */ + __le16 PrimaryGroup; /* offset to SID_ARRAY_DATA struct */ + __le16 Owner; /* offset to BLOB_DATA struct */ + __le16 DefaultDacl; /* offset to BLOB_DATA struct */ + __le16 DeviceGroups; /* offset to SID_ARRAY_DATA struct */ + __le16 UserClaims; /* offset to BLOB_DATA struct */ + __le16 DeviceClaims; /* offset to BLOB_DATA struct */ + __u8 TicketInfo[0]; /* variable length buf - remoted identity data */ +} __packed; + +struct smb2_tree_connect_req_extension { + __le32 TreeConnectContextOffset; + __le16 TreeConnectContextCount; + __u8 Reserved[10]; + __u8 PathName[0]; /* variable sized array */ + /* followed by array of TreeConnectContexts */ +} __packed; + struct smb2_tree_connect_rsp { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 16 */ @@ -365,7 +472,8 @@ struct smb2_tree_connect_rsp { #define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000 #define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000 #define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000 -#define SHI1005_FLAGS_ALL 0x0000FF33 +#define SMB2_SHAREFLAG_IDENTITY_REMOTING 0x00040000 /* 3.1.1 */ +#define SHI1005_FLAGS_ALL 0x0004FF33 /* Possible share capabilities */ #define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */ @@ -373,6 +481,7 @@ struct smb2_tree_connect_rsp { #define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */ #define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */ #define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ +#define SMB2_SHARE_CAP_REDIRECT_TO_OWNER cpu_to_le32(0x00000100) /* 3.1.1 */ struct smb2_tree_disconnect_req { struct smb2_sync_hdr sync_hdr; @@ -556,6 +665,7 @@ struct create_context { #define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04) #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02) +#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET cpu_to_le32(0x00000004) #define SMB2_LEASE_KEY_SIZE 16 diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 5130492847eb..91710eb571fb 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -217,9 +217,10 @@ static void smbd_destroy_rdma_work(struct work_struct *work) spin_unlock_irqrestore( &info->reassembly_queue_lock, flags); put_receive_buffer(info, response); - } + } else + spin_unlock_irqrestore(&info->reassembly_queue_lock, flags); } while (response); - spin_unlock_irqrestore(&info->reassembly_queue_lock, flags); + info->reassembly_data_length = 0; log_rdma_event(INFO, "free receive buffers\n"); @@ -1934,15 +1935,16 @@ again: * No need to lock if we are not at the * end of the queue */ - if (!queue_length) + if (queue_length) + list_del(&response->list); + else { spin_lock_irq( &info->reassembly_queue_lock); - list_del(&response->list); - queue_removed++; - if (!queue_length) + list_del(&response->list); spin_unlock_irq( &info->reassembly_queue_lock); - + } + queue_removed++; info->count_reassembly_queue--; info->count_dequeue_reassembly_queue++; put_receive_buffer(info, response); diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index 58e2fe40b2a0..5933f995309a 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -33,8 +33,7 @@ config CRAMFS_BLOCKDEV config CRAMFS_MTD bool "Support CramFs image directly mapped in physical memory" - depends on CRAMFS && MTD - depends on CRAMFS=m || MTD=y + depends on CRAMFS && CRAMFS <= MTD default y if !CRAMFS_BLOCKDEV help This option allows the CramFs driver to load data directly from diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index c53d9cc5ae7a..a03ce3422578 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -275,7 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - size_t len; + ssize_t len; char *buf; if (of->atomic_write_len) { diff --git a/fs/locks.c b/fs/locks.c index 21b4dfa289ee..d6ff4beb70ce 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1554,9 +1554,9 @@ out: EXPORT_SYMBOL(__break_lease); /** - * lease_get_mtime - get the last modified time of an inode + * lease_get_mtime - update modified time of an inode with exclusive lease * @inode: the inode - * @time: pointer to a timespec which will contain the last modified time + * @time: pointer to a timespec which contains the last modified time * * This is to force NFS clients to flush their caches for files with * exclusive leases. The justification is that if someone has an @@ -1580,8 +1580,6 @@ void lease_get_mtime(struct inode *inode, struct timespec *time) if (has_lease) *time = current_time(inode); - else - *time = inode->i_mtime; } EXPORT_SYMBOL(lease_get_mtime); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2758480555fa..1a70581e1cb2 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -251,6 +251,34 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) } /* + * Fill in the pre_op attr for the wcc data + */ +void fill_pre_wcc(struct svc_fh *fhp) +{ + struct inode *inode; + struct kstat stat; + __be32 err; + + if (fhp->fh_pre_saved) + return; + + inode = d_inode(fhp->fh_dentry); + err = fh_getattr(fhp, &stat); + if (err) { + /* Grab the times from inode anyway */ + stat.mtime = inode->i_mtime; + stat.ctime = inode->i_ctime; + stat.size = inode->i_size; + } + + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; + fhp->fh_pre_size = stat.size; + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + fhp->fh_pre_saved = true; +} + +/* * Fill in the post_op attr for the wcc data */ void fill_post_wcc(struct svc_fh *fhp) @@ -261,7 +289,8 @@ void fill_post_wcc(struct svc_fh *fhp) printk("nfsd: inode locked twice during operation.\n"); err = fh_getattr(fhp, &fhp->fh_post_attr); - fhp->fh_post_change = nfsd4_change_attribute(d_inode(fhp->fh_dentry)); + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, + d_inode(fhp->fh_dentry)); if (err) { fhp->fh_post_saved = false; /* Grab the ctime anyway - set_change_info might use it */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 008ea0b627d0..a0bed2b2004d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1363,14 +1363,14 @@ nfsd4_layoutget(struct svc_rqst *rqstp, const struct nfsd4_layout_ops *ops; struct nfs4_layout_stateid *ls; __be32 nfserr; - int accmode; + int accmode = NFSD_MAY_READ_IF_EXEC; switch (lgp->lg_seg.iomode) { case IOMODE_READ: - accmode = NFSD_MAY_READ; + accmode |= NFSD_MAY_READ; break; case IOMODE_RW: - accmode = NFSD_MAY_READ | NFSD_MAY_WRITE; + accmode |= NFSD_MAY_READ | NFSD_MAY_WRITE; break; default: dprintk("%s: invalid iomode %d\n", @@ -1703,6 +1703,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) status = nfserr_minor_vers_mismatch; if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; + status = nfserr_resource; + if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) + goto out; status = nfs41_check_op_ordering(args); if (status) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b29b5a185a2c..150521c9671b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3590,6 +3590,7 @@ nfsd4_verify_open_stid(struct nfs4_stid *s) switch (s->sc_type) { default: break; + case 0: case NFS4_CLOSED_STID: case NFS4_CLOSED_DELEG_STID: ret = nfserr_bad_stateid; @@ -5182,7 +5183,6 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) lockowner(stp->st_stateowner))) goto out; - stp->st_stid.sc_type = NFS4_CLOSED_STID; release_lock_stateid(stp); ret = nfs_ok; @@ -6078,10 +6078,8 @@ out: * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. */ - if (status && new) { - lock_stp->st_stid.sc_type = NFS4_CLOSED_STID; + if (status && new) release_lock_stateid(lock_stp); - } mutex_unlock(&lock_stp->st_mutex); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2c61c6b8ae09..e502fd16246b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -455,8 +455,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, } label->len = 0; -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) && + bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { READ_BUF(4); len += 4; dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ @@ -476,7 +476,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (!label->data) return nfserr_jukebox; } -#endif if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { if (!umask) goto xdr_error; @@ -1918,8 +1917,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (argp->taglen > NFSD4_MAX_TAGLEN) goto xdr_error; - if (argp->opcnt > 100) - goto xdr_error; + /* + * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS + * here, so we return success at the xdr level so that + * nfsd4_proc can handle this is an NFS-level error. + */ + if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) + return 0; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); @@ -1991,7 +1995,7 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); *p++ = 0; } else if (IS_I_VERSION(inode)) { - p = xdr_encode_hyper(p, nfsd4_change_attribute(inode)); + p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); } else { *p++ = cpu_to_be32(stat->ctime.tv_sec); *p++ = cpu_to_be32(stat->ctime.tv_nsec); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index b8444189223b..755e256a9103 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -253,36 +253,20 @@ fh_clear_wcc(struct svc_fh *fhp) * By using both ctime and the i_version counter we guarantee that as * long as time doesn't go backwards we never reuse an old value. */ -static inline u64 nfsd4_change_attribute(struct inode *inode) +static inline u64 nfsd4_change_attribute(struct kstat *stat, + struct inode *inode) { u64 chattr; - chattr = inode->i_ctime.tv_sec; + chattr = stat->ctime.tv_sec; chattr <<= 30; - chattr += inode->i_ctime.tv_nsec; + chattr += stat->ctime.tv_nsec; chattr += inode_query_iversion(inode); return chattr; } -/* - * Fill in the pre_op attr for the wcc data - */ -static inline void -fill_pre_wcc(struct svc_fh *fhp) -{ - struct inode *inode; - - inode = d_inode(fhp->fh_dentry); - if (!fhp->fh_pre_saved) { - fhp->fh_pre_mtime = inode->i_mtime; - fhp->fh_pre_ctime = inode->i_ctime; - fhp->fh_pre_size = inode->i_size; - fhp->fh_pre_change = nfsd4_change_attribute(inode); - fhp->fh_pre_saved = true; - } -} - -extern void fill_post_wcc(struct svc_fh *); +extern void fill_pre_wcc(struct svc_fh *fhp); +extern void fill_post_wcc(struct svc_fh *fhp); #else #define fh_clear_wcc(ignored) #define fill_pre_wcc(ignored) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 644a0342f0e0..79b6064f8977 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -188,6 +188,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl((u32) stat->ino); *p++ = htonl((u32) stat->atime.tv_sec); *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); + time = stat->mtime; lease_get_mtime(d_inode(dentry), &time); *p++ = htonl((u32) time.tv_sec); *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index ae782df5c063..fe484cf93e5c 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -33,7 +33,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", @@ -118,8 +118,12 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) return 0; /* We do not need to continue with negative dentries. */ - if (!dentry->d_inode) - goto out; + if (!dentry->d_inode) { + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: negative dentry or positive dentry and inode valid.\n", + __func__); + return 1; + } /* Now we must perform a getattr to validate the inode contents. */ @@ -129,14 +133,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) __FILE__, __func__, __LINE__); return 0; } - if (ret == 0) - return 0; - -out: - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s: negative dentry or positive dentry and inode valid.\n", - __func__); - return 1; + return !ret; } const struct dentry_operations orangefs_dentry_operations = { diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index c98bba2dbc94..6e3134e6d98a 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -41,7 +41,7 @@ static int orangefs_create(struct inode *dir, ORANGEFS_TYPE_METAFILE, mode); strncpy(new_op->upcall.req.create.d_name, - dentry->d_name.name, ORANGEFS_NAME_MAX); + dentry->d_name.name, ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -142,7 +142,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); gossip_debug(GOSSIP_NAME_DEBUG, "%s: doing lookup on %s under %pU,%d\n", @@ -244,7 +244,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) new_op->upcall.req.remove.parent_refn = parent->refn; strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, "orangefs_unlink", get_interruptible_flag(inode)); @@ -300,8 +300,8 @@ static int orangefs_symlink(struct inode *dir, strncpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); - strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); + strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -372,7 +372,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode ORANGEFS_TYPE_DIRECTORY, mode); strncpy(new_op->upcall.req.mkdir.d_name, - dentry->d_name.name, ORANGEFS_NAME_MAX); + dentry->d_name.name, ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -453,10 +453,10 @@ static int orangefs_rename(struct inode *old_dir, strncpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); strncpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, "orangefs_rename", diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1c59dff530de..6e35f2f3c897 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -328,7 +328,7 @@ static int help_show(struct seq_file *m, void *v) /* * initialize the client-debug file. */ -int orangefs_client_debug_init(void) +static int orangefs_client_debug_init(void) { int rc = -ENOMEM; @@ -1056,7 +1056,7 @@ int orangefs_debugfs_new_debug(void __user *arg) client_debug_string, llu(mask_info.mask_value)); } else { - gossip_lerr("Invalid mask type....\n"); + gossip_err("Invalid mask type....\n"); return -EINVAL; } diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h index b5fd9cd4960f..51147f9ce3d6 100644 --- a/fs/orangefs/orangefs-debugfs.h +++ b/fs/orangefs/orangefs-debugfs.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ int orangefs_debugfs_init(int); void orangefs_debugfs_cleanup(void); -int orangefs_client_debug_init(void); int orangefs_prepare_debugfs_help_string(int); int orangefs_debugfs_new_client_mask(void __user *); int orangefs_debugfs_new_client_string(void __user *); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 2595453fe737..eebbaece85ef 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -56,11 +56,7 @@ #include "orangefs-dev-proto.h" -#ifdef ORANGEFS_KERNEL_DEBUG -#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 10 -#else #define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 20 -#endif #define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS 30 @@ -104,11 +100,11 @@ enum orangefs_vfs_op_states { * orangefs kernel memory related flags */ -#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) +#if (defined CONFIG_DEBUG_SLAB) #define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE #else #define ORANGEFS_CACHE_CREATE_FLAGS 0 -#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ +#endif extern int orangefs_init_acl(struct inode *inode, struct inode *dir); extern const struct xattr_handler *orangefs_xattr_handlers[]; @@ -471,8 +467,6 @@ int orangefs_inode_check_changed(struct inode *inode); int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); -void orangefs_make_bad_inode(struct inode *inode); - int orangefs_unmount_sb(struct super_block *sb); bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 97fe93129f38..ea6256d136d1 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -230,25 +230,42 @@ static int orangefs_inode_type(enum orangefs_ds_type objtype) return -1; } -static int orangefs_inode_is_stale(struct inode *inode, int new, +static void orangefs_make_bad_inode(struct inode *inode) +{ + if (is_root_handle(inode)) { + /* + * if this occurs, the pvfs2-client-core was killed but we + * can't afford to lose the inode operations and such + * associated with the root handle in any case. + */ + gossip_debug(GOSSIP_UTILS_DEBUG, + "*** NOT making bad root inode %pU\n", + get_khandle_from_ino(inode)); + } else { + gossip_debug(GOSSIP_UTILS_DEBUG, + "*** making bad inode %pU\n", + get_khandle_from_ino(inode)); + make_bad_inode(inode); + } +} + +static int orangefs_inode_is_stale(struct inode *inode, struct ORANGEFS_sys_attr_s *attrs, char *link_target) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); int type = orangefs_inode_type(attrs->objtype); - if (!new) { - /* - * If the inode type or symlink target have changed then this - * inode is stale. - */ - if (type == -1 || !(inode->i_mode & type)) { - orangefs_make_bad_inode(inode); - return 1; - } - if (type == S_IFLNK && strncmp(orangefs_inode->link_target, - link_target, ORANGEFS_NAME_MAX)) { - orangefs_make_bad_inode(inode); - return 1; - } + /* + * If the inode type or symlink target have changed then this + * inode is stale. + */ + if (type == -1 || !(inode->i_mode & type)) { + orangefs_make_bad_inode(inode); + return 1; + } + if (type == S_IFLNK && strncmp(orangefs_inode->link_target, + link_target, ORANGEFS_NAME_MAX)) { + orangefs_make_bad_inode(inode); + return 1; } return 0; } @@ -294,16 +311,18 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, if (ret != 0) goto out; - type = orangefs_inode_type(new_op-> - downcall.resp.getattr.attributes.objtype); - ret = orangefs_inode_is_stale(inode, new, - &new_op->downcall.resp.getattr.attributes, - new_op->downcall.resp.getattr.link_target); - if (ret) { - ret = -ESTALE; - goto out; + if (!new) { + ret = orangefs_inode_is_stale(inode, + &new_op->downcall.resp.getattr.attributes, + new_op->downcall.resp.getattr.link_target); + if (ret) { + ret = -ESTALE; + goto out; + } } + type = orangefs_inode_type(new_op-> + downcall.resp.getattr.attributes.objtype); switch (type) { case S_IFREG: inode->i_flags = orangefs_inode_flags(&new_op-> @@ -348,6 +367,12 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, inode->i_link = orangefs_inode->link_target; } break; + /* i.e. -1 */ + default: + /* XXX: ESTALE? This is what is done if it is not new. */ + orangefs_make_bad_inode(inode); + ret = -ESTALE; + goto out; } inode->i_uid = make_kuid(&init_user_ns, new_op-> @@ -401,7 +426,7 @@ int orangefs_inode_check_changed(struct inode *inode) if (ret != 0) goto out; - ret = orangefs_inode_is_stale(inode, 0, + ret = orangefs_inode_is_stale(inode, &new_op->downcall.resp.getattr.attributes, new_op->downcall.resp.getattr.link_target); out: @@ -444,25 +469,6 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) return ret; } -void orangefs_make_bad_inode(struct inode *inode) -{ - if (is_root_handle(inode)) { - /* - * if this occurs, the pvfs2-client-core was killed but we - * can't afford to lose the inode operations and such - * associated with the root handle in any case. - */ - gossip_debug(GOSSIP_UTILS_DEBUG, - "*** NOT making bad root inode %pU\n", - get_khandle_from_ino(inode)); - } else { - gossip_debug(GOSSIP_UTILS_DEBUG, - "*** making bad inode %pU\n", - get_khandle_from_ino(inode)); - make_bad_inode(inode); - } -} - /* * The following is a very dirty hack that is now a permanent part of the * ORANGEFS protocol. See protocol.h for more error definitions. @@ -537,6 +543,7 @@ int orangefs_normalize_to_errno(__s32 error_code) */ } else { gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n"); + error_code = -EINVAL; } return error_code; } diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index e0bf5e4dce0d..dc6e3e6269c3 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -395,13 +395,6 @@ struct ORANGEFS_dev_map_desc { /* gossip.h *****************************************************************/ -#ifdef GOSSIP_DISABLE_DEBUG -#define gossip_debug(mask, fmt, ...) \ -do { \ - if (0) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ -} while (0) -#else extern __u64 orangefs_gossip_debug_mask; /* try to avoid function call overhead by checking masks in macro */ @@ -410,13 +403,5 @@ do { \ if (orangefs_gossip_debug_mask & (mask)) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) -#endif /* GOSSIP_DISABLE_DEBUG */ - -/* do file and line number printouts w/ the GNU preprocessor */ -#define gossip_ldebug(mask, fmt, ...) \ - gossip_debug(mask, "%s: " fmt, __func__, ##__VA_ARGS__) #define gossip_err pr_err -#define gossip_lerr(fmt, ...) \ - gossip_err("%s line %d: " fmt, \ - __FILE__, __LINE__, ##__VA_ARGS__) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 62d49e53061c..3ae5fdba0225 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -335,7 +335,7 @@ static int orangefs_encode_fh(struct inode *inode, struct orangefs_object_kref refn; if (*max_len < len) { - gossip_lerr("fh buffer is too small for encoding\n"); + gossip_err("fh buffer is too small for encoding\n"); *max_len = len; type = 255; goto out; @@ -383,7 +383,7 @@ static int orangefs_unmount(int id, __s32 fs_id, const char *devname) op->upcall.req.fs_umount.id = id; op->upcall.req.fs_umount.fs_id = fs_id; strncpy(op->upcall.req.fs_umount.orangefs_config_server, - devname, ORANGEFS_MAX_SERVER_ADDR_LEN); + devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1); r = service_operation(op, "orangefs_fs_umount", 0); /* Not much to do about an error here. */ if (r) @@ -478,7 +478,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, devname, - ORANGEFS_MAX_SERVER_ADDR_LEN); + ORANGEFS_MAX_SERVER_ADDR_LEN - 1); gossip_debug(GOSSIP_SUPER_DEBUG, "Attempting ORANGEFS Mount via host %s\n", @@ -520,7 +520,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, */ strncpy(ORANGEFS_SB(sb)->devname, devname, - ORANGEFS_MAX_SERVER_ADDR_LEN); + ORANGEFS_MAX_SERVER_ADDR_LEN - 1); /* mount_pending must be cleared */ ORANGEFS_SB(sb)->mount_pending = 0; diff --git a/fs/seq_file.c b/fs/seq_file.c index 4be761c1a03d..eea09f6d8830 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -181,8 +181,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) * if request is to read from zero offset, reset iterator to first * record as it might have been already advanced by previous requests */ - if (*ppos == 0) + if (*ppos == 0) { m->index = 0; + m->version = 0; + m->count = 0; + } /* Don't assume *ppos is where we left it */ if (unlikely(*ppos != m->read_pos)) { |