summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cell.c4
-rw-r--r--fs/afs/cmservice.c10
-rw-r--r--fs/afs/dir.c92
-rw-r--r--fs/afs/file.c12
-rw-r--r--fs/afs/vlclient.c11
-rw-r--r--fs/afs/yfsclient.c2
-rw-r--r--fs/block_dev.c64
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c71
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c5
-rw-r--r--fs/ceph/inode.c7
-rw-r--r--fs/ceph/locks.c3
-rw-r--r--fs/ceph/snap.c4
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/ceph/xattr.c19
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/cifssmb.c197
-rw-r--r--fs/cifs/connect.c30
-rw-r--r--fs/cifs/dir.c5
-rw-r--r--fs/cifs/misc.c22
-rw-r--r--fs/cifs/sess.c26
-rw-r--r--fs/cifs/smb2ops.c39
-rw-r--r--fs/cifs/smb2pdu.c7
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/dax.c2
-rw-r--r--fs/gfs2/bmap.c164
-rw-r--r--fs/io_uring.c86
-rw-r--r--fs/nfs/delegation.c25
-rw-r--r--fs/nfs/delegation.h2
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/direct.c27
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c28
-rw-r--r--fs/nfs/fscache.c7
-rw-r--r--fs/nfs/fscache.h2
-rw-r--r--fs/nfs/inode.c33
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4file.c12
-rw-r--r--fs/nfs/nfs4proc.c109
-rw-r--r--fs/nfs/nfs4state.c49
-rw-r--r--fs/nfs/pagelist.c19
-rw-r--r--fs/nfs/pnfs.c7
-rw-r--r--fs/nfs/pnfs_nfs.c15
-rw-r--r--fs/nfs/proc.c7
-rw-r--r--fs/nfs/read.c35
-rw-r--r--fs/nfs/super.c1
-rw-r--r--fs/nfs/write.c38
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c19
-rw-r--r--fs/read_write.c49
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--fs/userfaultfd.c25
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c29
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c19
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c3
-rw-r--r--fs/xfs/xfs_ioctl32.c56
-rw-r--r--fs/xfs/xfs_iops.c1
-rw-r--r--fs/xfs/xfs_log.c5
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_reflink.c63
68 files changed, 914 insertions, 719 deletions
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index a2a87117d262..fd5133e26a38 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -74,6 +74,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
cell = rcu_dereference_raw(net->ws_cell);
if (cell) {
afs_get_cell(cell);
+ ret = 0;
break;
}
ret = -EDESTADDRREQ;
@@ -108,6 +109,9 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
done_seqretry(&net->cells_lock, seq);
+ if (ret != 0 && cell)
+ afs_put_cell(net, cell);
+
return ret == 0 ? cell : ERR_PTR(ret);
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4f1b6f466ff5..b86195e4dc6c 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -505,18 +505,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
struct afs_call *call = container_of(work, struct afs_call, work);
struct afs_uuid *r = call->request;
- struct {
- __be32 match;
- } reply;
-
_enter("");
if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
- reply.match = htonl(0);
+ afs_send_empty_reply(call);
else
- reply.match = htonl(1);
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ 1, 1, "K-1");
- afs_send_simple_reply(call, &reply, sizeof(reply));
afs_put_call(call);
_leave("");
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e640d67274be..139b4e3cc946 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -440,7 +440,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
* iterate through the data blob that lists the contents of an AFS directory
*/
static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
- struct key *key)
+ struct key *key, afs_dataversion_t *_dir_version)
{
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_xdr_dir_page *dbuf;
@@ -460,6 +460,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
req = afs_read_dir(dvnode, key);
if (IS_ERR(req))
return PTR_ERR(req);
+ *_dir_version = req->data_version;
/* round the file position up to the next entry boundary */
ctx->pos += sizeof(union afs_xdr_dirent) - 1;
@@ -514,7 +515,10 @@ out:
*/
static int afs_readdir(struct file *file, struct dir_context *ctx)
{
- return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file));
+ afs_dataversion_t dir_version;
+
+ return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
+ &dir_version);
}
/*
@@ -555,7 +559,8 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
* - just returns the FID the dentry name maps to if found
*/
static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
- struct afs_fid *fid, struct key *key)
+ struct afs_fid *fid, struct key *key,
+ afs_dataversion_t *_dir_version)
{
struct afs_super_info *as = dir->i_sb->s_fs_info;
struct afs_lookup_one_cookie cookie = {
@@ -568,7 +573,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
/* search the directory */
- ret = afs_dir_iterate(dir, &cookie.ctx, key);
+ ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
if (ret < 0) {
_leave(" = %d [iter]", ret);
return ret;
@@ -642,6 +647,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
struct afs_server *server;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct inode *inode = NULL, *ti;
+ afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
int ret, i;
_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
@@ -669,12 +675,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
cookie->fids[i].vid = as->volume->vid;
/* search the directory */
- ret = afs_dir_iterate(dir, &cookie->ctx, key);
+ ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
if (ret < 0) {
inode = ERR_PTR(ret);
goto out;
}
+ dentry->d_fsdata = (void *)(unsigned long)data_version;
+
inode = ERR_PTR(-ENOENT);
if (!cookie->found)
goto out;
@@ -951,7 +959,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
inode ? AFS_FS_I(inode) : NULL);
} else {
trace_afs_lookup(dvnode, &dentry->d_name,
- inode ? AFS_FS_I(inode) : NULL);
+ IS_ERR_OR_NULL(inode) ? NULL
+ : AFS_FS_I(inode));
}
return d;
}
@@ -968,7 +977,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
struct dentry *parent;
struct inode *inode;
struct key *key;
- long dir_version, de_version;
+ afs_dataversion_t dir_version;
+ long de_version;
int ret;
if (flags & LOOKUP_RCU)
@@ -1014,20 +1024,20 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
* on a 32-bit system, we only have 32 bits in the dentry to store the
* version.
*/
- dir_version = (long)dir->status.data_version;
+ dir_version = dir->status.data_version;
de_version = (long)dentry->d_fsdata;
- if (de_version == dir_version)
- goto out_valid;
+ if (de_version == (long)dir_version)
+ goto out_valid_noupdate;
- dir_version = (long)dir->invalid_before;
- if (de_version - dir_version >= 0)
+ dir_version = dir->invalid_before;
+ if (de_version - (long)dir_version >= 0)
goto out_valid;
_debug("dir modified");
afs_stat_v(dir, n_reval);
/* search the directory for this vnode */
- ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key);
+ ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version);
switch (ret) {
case 0:
/* the filename maps to something */
@@ -1080,7 +1090,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
}
out_valid:
- dentry->d_fsdata = (void *)dir_version;
+ dentry->d_fsdata = (void *)(unsigned long)dir_version;
+out_valid_noupdate:
dput(parent);
key_put(key);
_leave(" = 1 [valid]");
@@ -1186,6 +1197,20 @@ static void afs_prep_for_new_inode(struct afs_fs_cursor *fc,
}
/*
+ * Note that a dentry got changed. We need to set d_fsdata to the data version
+ * number derived from the result of the operation. It doesn't matter if
+ * d_fsdata goes backwards as we'll just revalidate.
+ */
+static void afs_update_dentry_version(struct afs_fs_cursor *fc,
+ struct dentry *dentry,
+ struct afs_status_cb *scb)
+{
+ if (fc->ac.error == 0)
+ dentry->d_fsdata =
+ (void *)(unsigned long)scb->status.data_version;
+}
+
+/*
* create a directory on an AFS filesystem
*/
static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -1227,6 +1252,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
afs_check_for_remote_deletion(&fc, dvnode);
afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
&data_version, &scb[0]);
+ afs_update_dentry_version(&fc, dentry, &scb[0]);
afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
@@ -1319,6 +1345,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
&data_version, scb);
+ afs_update_dentry_version(&fc, dentry, scb);
ret = afs_end_vnode_operation(&fc);
if (ret == 0) {
afs_dir_remove_subdir(dentry);
@@ -1458,6 +1485,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
&data_version, &scb[0]);
afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
&data_version_2, &scb[1]);
+ afs_update_dentry_version(&fc, dentry, &scb[0]);
ret = afs_end_vnode_operation(&fc);
if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
ret = afs_dir_remove_link(dvnode, dentry, key);
@@ -1526,6 +1554,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
afs_check_for_remote_deletion(&fc, dvnode);
afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
&data_version, &scb[0]);
+ afs_update_dentry_version(&fc, dentry, &scb[0]);
afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
@@ -1607,6 +1636,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
NULL, &scb[1]);
ihold(&vnode->vfs_inode);
+ afs_update_dentry_version(&fc, dentry, &scb[0]);
d_instantiate(dentry, &vnode->vfs_inode);
mutex_unlock(&vnode->io_lock);
@@ -1686,6 +1716,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
afs_check_for_remote_deletion(&fc, dvnode);
afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
&data_version, &scb[0]);
+ afs_update_dentry_version(&fc, dentry, &scb[0]);
afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
@@ -1791,6 +1822,17 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
+ /* This bit is potentially nasty as there's a potential race with
+ * afs_d_revalidate{,_rcu}(). We have to change d_fsdata on the dentry
+ * to reflect it's new parent's new data_version after the op, but
+ * d_revalidate may see old_dentry between the op having taken place
+ * and the version being updated.
+ *
+ * So drop the old_dentry for now to make other threads go through
+ * lookup instead - which we hold a lock against.
+ */
+ d_drop(old_dentry);
+
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
afs_dataversion_t orig_data_version;
@@ -1802,9 +1844,9 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (orig_dvnode != new_dvnode) {
if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
- goto error_rehash;
+ goto error_rehash_old;
}
- new_data_version = new_dvnode->status.data_version;
+ new_data_version = new_dvnode->status.data_version + 1;
} else {
new_data_version = orig_data_version;
new_scb = &scb[0];
@@ -1827,7 +1869,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
- goto error_rehash;
+ goto error_rehash_old;
}
if (ret == 0) {
@@ -1853,10 +1895,26 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
drop_nlink(new_inode);
spin_unlock(&new_inode->i_lock);
}
+
+ /* Now we can update d_fsdata on the dentries to reflect their
+ * new parent's data_version.
+ *
+ * Note that if we ever implement RENAME_EXCHANGE, we'll have
+ * to update both dentries with opposing dir versions.
+ */
+ if (new_dvnode != orig_dvnode) {
+ afs_update_dentry_version(&fc, old_dentry, &scb[1]);
+ afs_update_dentry_version(&fc, new_dentry, &scb[1]);
+ } else {
+ afs_update_dentry_version(&fc, old_dentry, &scb[0]);
+ afs_update_dentry_version(&fc, new_dentry, &scb[0]);
+ }
d_move(old_dentry, new_dentry);
goto error_tmp;
}
+error_rehash_old:
+ d_rehash(new_dentry);
error_rehash:
if (rehash)
d_rehash(rehash);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 56b69576274d..dd3c55c9101c 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -191,11 +191,13 @@ void afs_put_read(struct afs_read *req)
int i;
if (refcount_dec_and_test(&req->usage)) {
- for (i = 0; i < req->nr_pages; i++)
- if (req->pages[i])
- put_page(req->pages[i]);
- if (req->pages != req->array)
- kfree(req->pages);
+ if (req->pages) {
+ for (i = 0; i < req->nr_pages; i++)
+ if (req->pages[i])
+ put_page(req->pages[i]);
+ if (req->pages != req->array)
+ kfree(req->pages);
+ }
kfree(req);
}
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index d7e0fd3c00df..cfb0ac4bd039 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -56,23 +56,24 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
struct afs_uuid__xdr *xdr;
struct afs_uuid *uuid;
int j;
+ int n = entry->nr_servers;
tmp = ntohl(uvldb->serverFlags[i]);
if (tmp & AFS_VLSF_DONTUSE ||
(new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
continue;
if (tmp & AFS_VLSF_RWVOL) {
- entry->fs_mask[i] |= AFS_VOL_VTM_RW;
+ entry->fs_mask[n] |= AFS_VOL_VTM_RW;
if (vlflags & AFS_VLF_BACKEXISTS)
- entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
+ entry->fs_mask[n] |= AFS_VOL_VTM_BAK;
}
if (tmp & AFS_VLSF_ROVOL)
- entry->fs_mask[i] |= AFS_VOL_VTM_RO;
- if (!entry->fs_mask[i])
+ entry->fs_mask[n] |= AFS_VOL_VTM_RO;
+ if (!entry->fs_mask[n])
continue;
xdr = &uvldb->serverNumber[i];
- uuid = (struct afs_uuid *)&entry->fs_server[i];
+ uuid = (struct afs_uuid *)&entry->fs_server[n];
uuid->time_low = xdr->time_low;
uuid->time_mid = htons(ntohl(xdr->time_mid));
uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version));
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 2575503170fc..ca2452806ebf 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -2171,7 +2171,7 @@ int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
size = round_up(acl->size, 4);
- call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus,
+ call = afs_alloc_flat_call(net, &yfs_RXYFSStoreOpaqueACL2,
sizeof(__be32) * 2 +
sizeof(struct yfs_xdr_YFSFid) +
sizeof(__be32) + size,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a6f7c892cb4a..677cb364d33f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -345,24 +345,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
struct bio *bio;
bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
- bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
- gfp_t gfp;
- ssize_t ret;
+ int ret = 0;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- if (nowait)
- gfp = GFP_NOWAIT;
- else
- gfp = GFP_KERNEL;
-
- bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
- if (!bio)
- return -EAGAIN;
+ bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -384,10 +375,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
if (!is_poll)
blk_start_plug(&plug);
- ret = 0;
for (;;) {
- int err;
-
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
bio->bi_write_hint = iocb->ki_hint;
@@ -395,10 +383,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio;
- err = bio_iov_iter_get_pages(bio, iter);
- if (unlikely(err)) {
- if (!ret)
- ret = err;
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (unlikely(ret)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
break;
@@ -413,14 +399,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
task_io_account_write(bio->bi_iter.bi_size);
}
- /*
- * Tell underlying layer to not block for resource shortage.
- * And if we would have blocked, return error inline instead
- * of through the bio->bi_end_io() callback.
- */
- if (nowait)
- bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
-
dio->size += bio->bi_iter.bi_size;
pos += bio->bi_iter.bi_size;
@@ -434,12 +412,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
}
qc = submit_bio(bio);
- if (qc == BLK_QC_T_EAGAIN) {
- if (!ret)
- ret = -EAGAIN;
- goto error;
- }
- ret = dio->size;
if (polled)
WRITE_ONCE(iocb->ki_cookie, qc);
@@ -460,20 +432,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
atomic_inc(&dio->ref);
}
- qc = submit_bio(bio);
- if (qc == BLK_QC_T_EAGAIN) {
- if (!ret)
- ret = -EAGAIN;
- goto error;
- }
- ret = dio->size;
-
- bio = bio_alloc(gfp, nr_pages);
- if (!bio) {
- if (!ret)
- ret = -EAGAIN;
- goto error;
- }
+ submit_bio(bio);
+ bio = bio_alloc(GFP_KERNEL, nr_pages);
}
if (!is_poll)
@@ -493,16 +453,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
}
__set_current_state(TASK_RUNNING);
-out:
if (!ret)
ret = blk_status_to_errno(dio->bio.bi_status);
+ if (likely(!ret))
+ ret = dio->size;
bio_put(&dio->bio);
return ret;
-error:
- if (!is_poll)
- blk_finish_plug(&plug);
- goto out;
}
static ssize_t
@@ -1754,7 +1711,10 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
/* finish claiming */
mutex_lock(&bdev->bd_mutex);
- bd_finish_claiming(bdev, whole, holder);
+ if (!res)
+ bd_finish_claiming(bdev, whole, holder);
+ else
+ bd_abort_claiming(bdev, whole, holder);
/*
* Block event polling for write claims if requested. Any
* write holder makes the write_holder state stick until
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 299e11e6c554..94660063a162 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -401,7 +401,6 @@ struct btrfs_dev_replace {
struct raid_kobject {
u64 flags;
struct kobject kobj;
- struct list_head list;
};
/*
@@ -915,8 +914,6 @@ struct btrfs_fs_info {
u32 thread_pool_size;
struct kobject *space_info_kobj;
- struct list_head pending_raid_kobjs;
- spinlock_t pending_raid_kobjs_lock; /* uncontended */
u64 total_pinned;
@@ -2698,7 +2695,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 bytes_used, u64 type, u64 chunk_offset,
u64 size);
-void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f7ee70b3d1a..97beb351a10c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2683,8 +2683,6 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->delayed_iputs);
INIT_LIST_HEAD(&fs_info->delalloc_roots);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
- INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
- spin_lock_init(&fs_info->pending_raid_kobjs_lock);
spin_lock_init(&fs_info->delalloc_root_lock);
spin_lock_init(&fs_info->trans_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d3b58e388535..8b7eb22d508a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4,6 +4,7 @@
*/
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
@@ -7888,33 +7889,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
return 0;
}
-/* link_block_group will queue up kobjects to add when we're reclaim-safe */
-void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_space_info *space_info;
- struct raid_kobject *rkobj;
- LIST_HEAD(list);
- int ret = 0;
-
- spin_lock(&fs_info->pending_raid_kobjs_lock);
- list_splice_init(&fs_info->pending_raid_kobjs, &list);
- spin_unlock(&fs_info->pending_raid_kobjs_lock);
-
- list_for_each_entry(rkobj, &list, list) {
- space_info = btrfs_find_space_info(fs_info, rkobj->flags);
-
- ret = kobject_add(&rkobj->kobj, &space_info->kobj,
- "%s", btrfs_bg_type_to_raid_name(rkobj->flags));
- if (ret) {
- kobject_put(&rkobj->kobj);
- break;
- }
- }
- if (ret)
- btrfs_warn(fs_info,
- "failed to add kobject for block cache, ignoring");
-}
-
static void link_block_group(struct btrfs_block_group_cache *cache)
{
struct btrfs_space_info *space_info = cache->space_info;
@@ -7929,18 +7903,36 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
up_write(&space_info->groups_sem);
if (first) {
- struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
+ struct raid_kobject *rkobj;
+ unsigned int nofs_flag;
+ int ret;
+
+ /*
+ * Setup a NOFS context because kobject_add(), deep in its call
+ * chain, does GFP_KERNEL allocations, and we are often called
+ * in a context where if reclaim is triggered we can deadlock
+ * (we are either holding a transaction handle or some lock
+ * required for a transaction commit).
+ */
+ nofs_flag = memalloc_nofs_save();
+ rkobj = kzalloc(sizeof(*rkobj), GFP_KERNEL);
if (!rkobj) {
+ memalloc_nofs_restore(nofs_flag);
btrfs_warn(cache->fs_info,
"couldn't alloc memory for raid level kobject");
return;
}
rkobj->flags = cache->flags;
kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
-
- spin_lock(&fs_info->pending_raid_kobjs_lock);
- list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
- spin_unlock(&fs_info->pending_raid_kobjs_lock);
+ ret = kobject_add(&rkobj->kobj, &space_info->kobj, "%s",
+ btrfs_bg_type_to_raid_name(rkobj->flags));
+ memalloc_nofs_restore(nofs_flag);
+ if (ret) {
+ kobject_put(&rkobj->kobj);
+ btrfs_warn(fs_info,
+ "failed to add kobject for block cache, ignoring");
+ return;
+ }
space_info->block_group_kobjs[index] = &rkobj->kobj;
}
}
@@ -8206,7 +8198,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
inc_block_group_ro(cache, 1);
}
- btrfs_add_raid_kobjects(info);
btrfs_init_global_block_rsv(info);
ret = check_chunk_block_group_mappings(info);
error:
@@ -8975,6 +8966,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
struct btrfs_device *device;
struct list_head *devices;
u64 group_trimmed;
+ u64 range_end = U64_MAX;
u64 start;
u64 end;
u64 trimmed = 0;
@@ -8984,16 +8976,23 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
int dev_ret = 0;
int ret = 0;
+ /*
+ * Check range overflow if range->len is set.
+ * The default range->len is U64_MAX.
+ */
+ if (range->len != U64_MAX &&
+ check_add_overflow(range->start, range->len, &range_end))
+ return -EINVAL;
+
cache = btrfs_lookup_first_block_group(fs_info, range->start);
for (; cache; cache = next_block_group(cache)) {
- if (cache->key.objectid >= (range->start + range->len)) {
+ if (cache->key.objectid >= range_end) {
btrfs_put_block_group(cache);
break;
}
start = max(range->start, cache->key.objectid);
- end = min(range->start + range->len,
- cache->key.objectid + cache->key.offset);
+ end = min(range_end, cache->key.objectid + cache->key.offset);
if (end - start >= range->minlen) {
if (!block_group_cache_done(cache)) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d74b74ca07af..a447d3ec48d5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3087,16 +3087,6 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
if (ret)
return ret;
- /*
- * We add the kobjects here (and after forcing data chunk creation)
- * since relocation is the only place we'll create chunks of a new
- * type at runtime. The only place where we'll remove the last
- * chunk of a type is the call immediately below this one. Even
- * so, we're protected against races with the cleaner thread since
- * we're covered by the delete_unused_bgs_mutex.
- */
- btrfs_add_raid_kobjects(fs_info);
-
trans = btrfs_start_trans_remove_block_group(root->fs_info,
chunk_offset);
if (IS_ERR(trans)) {
@@ -3223,9 +3213,6 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
btrfs_end_transaction(trans);
if (ret < 0)
return ret;
-
- btrfs_add_raid_kobjects(fs_info);
-
return 1;
}
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e078cc55b989..b3c8b886bf64 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -913,8 +913,9 @@ get_more_pages:
if (page_offset(page) >= ceph_wbc.i_size) {
dout("%p page eof %llu\n",
page, ceph_wbc.i_size);
- if (ceph_wbc.size_stable ||
- page_offset(page) >= i_size_read(inode))
+ if ((ceph_wbc.size_stable ||
+ page_offset(page) >= i_size_read(inode)) &&
+ clear_page_dirty_for_io(page))
mapping->a_ops->invalidatepage(page,
0, PAGE_SIZE);
unlock_page(page);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d98dcd976c80..ce0f5658720a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1301,6 +1301,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
{
struct ceph_inode_info *ci = cap->ci;
struct inode *inode = &ci->vfs_inode;
+ struct ceph_buffer *old_blob = NULL;
struct cap_msg_args arg;
int held, revoking;
int wake = 0;
@@ -1365,7 +1366,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
ci->i_requested_max_size = arg.max_size;
if (flushing & CEPH_CAP_XATTR_EXCL) {
- __ceph_build_xattrs_blob(ci);
+ old_blob = __ceph_build_xattrs_blob(ci);
arg.xattr_version = ci->i_xattrs.version;
arg.xattr_buf = ci->i_xattrs.blob;
} else {
@@ -1409,6 +1410,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
spin_unlock(&ci->i_ceph_lock);
+ ceph_buffer_put(old_blob);
+
ret = send_cap_msg(&arg);
if (ret < 0) {
dout("error sending cap msg, must requeue %p\n", inode);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 791f84a13bb8..18500edefc56 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -736,6 +736,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
int issued, new_issued, info_caps;
struct timespec64 mtime, atime, ctime;
struct ceph_buffer *xattr_blob = NULL;
+ struct ceph_buffer *old_blob = NULL;
struct ceph_string *pool_ns = NULL;
struct ceph_cap *new_cap = NULL;
int err = 0;
@@ -881,7 +882,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) &&
le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) {
if (ci->i_xattrs.blob)
- ceph_buffer_put(ci->i_xattrs.blob);
+ old_blob = ci->i_xattrs.blob;
ci->i_xattrs.blob = xattr_blob;
if (xattr_blob)
memcpy(ci->i_xattrs.blob->vec.iov_base,
@@ -1022,8 +1023,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
out:
if (new_cap)
ceph_put_cap(mdsc, new_cap);
- if (xattr_blob)
- ceph_buffer_put(xattr_blob);
+ ceph_buffer_put(old_blob);
+ ceph_buffer_put(xattr_blob);
ceph_put_string(pool_ns);
return err;
}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ac9b53b89365..5083e238ad15 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -111,8 +111,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
req->r_wait_for_completion = ceph_lock_wait_for_completion;
err = ceph_mdsc_do_request(mdsc, inode, req);
-
- if (operation == CEPH_MDS_OP_GETFILELOCK) {
+ if (!err && operation == CEPH_MDS_OP_GETFILELOCK) {
fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
fl->fl_type = F_RDLCK;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4c6494eb02b5..ccfcc66aaf44 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -465,6 +465,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
struct inode *inode = &ci->vfs_inode;
struct ceph_cap_snap *capsnap;
struct ceph_snap_context *old_snapc, *new_snapc;
+ struct ceph_buffer *old_blob = NULL;
int used, dirty;
capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
@@ -541,7 +542,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->gid = inode->i_gid;
if (dirty & CEPH_CAP_XATTR_EXCL) {
- __ceph_build_xattrs_blob(ci);
+ old_blob = __ceph_build_xattrs_blob(ci);
capsnap->xattr_blob =
ceph_buffer_get(ci->i_xattrs.blob);
capsnap->xattr_version = ci->i_xattrs.version;
@@ -584,6 +585,7 @@ update_snapc:
}
spin_unlock(&ci->i_ceph_lock);
+ ceph_buffer_put(old_blob);
kfree(capsnap);
ceph_put_snap_context(old_snapc);
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d2352fd95dbc..6b9f1ee7de85 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -926,7 +926,7 @@ extern int ceph_getattr(const struct path *path, struct kstat *stat,
int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t);
extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
-extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
+extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci);
extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
extern const struct xattr_handler *ceph_xattr_handlers[];
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 37b458a9af3a..939eab7aa219 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -754,12 +754,15 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
/*
* If there are dirty xattrs, reencode xattrs into the prealloc_blob
- * and swap into place.
+ * and swap into place. It returns the old i_xattrs.blob (or NULL) so
+ * that it can be freed by the caller as the i_ceph_lock is likely to be
+ * held.
*/
-void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
+struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci)
{
struct rb_node *p;
struct ceph_inode_xattr *xattr = NULL;
+ struct ceph_buffer *old_blob = NULL;
void *dest;
dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
@@ -790,12 +793,14 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
if (ci->i_xattrs.blob)
- ceph_buffer_put(ci->i_xattrs.blob);
+ old_blob = ci->i_xattrs.blob;
ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
ci->i_xattrs.prealloc_blob = NULL;
ci->i_xattrs.dirty = false;
ci->i_xattrs.version++;
}
+
+ return old_blob;
}
static inline int __get_request_mask(struct inode *in) {
@@ -1036,6 +1041,7 @@ int __ceph_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_cap_flush *prealloc_cf = NULL;
+ struct ceph_buffer *old_blob = NULL;
int issued;
int err;
int dirty = 0;
@@ -1109,13 +1115,15 @@ retry:
struct ceph_buffer *blob;
spin_unlock(&ci->i_ceph_lock);
- dout(" preaallocating new blob size=%d\n", required_blob_size);
+ ceph_buffer_put(old_blob); /* Shouldn't be required */
+ dout(" pre-allocating new blob size=%d\n", required_blob_size);
blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
if (!blob)
goto do_sync_unlocked;
spin_lock(&ci->i_ceph_lock);
+ /* prealloc_blob can't be released while holding i_ceph_lock */
if (ci->i_xattrs.prealloc_blob)
- ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+ old_blob = ci->i_xattrs.prealloc_blob;
ci->i_xattrs.prealloc_blob = blob;
goto retry;
}
@@ -1131,6 +1139,7 @@ retry:
}
spin_unlock(&ci->i_ceph_lock);
+ ceph_buffer_put(old_blob);
if (lock_snap_rwsem)
up_read(&mdsc->snap_rwsem);
if (dirty)
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4b21a90015a9..99caf77df4a2 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,5 +152,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.21"
+#define CIFS_VERSION "2.22"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e23234207fc2..592a6cea2b79 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -579,6 +579,7 @@ extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
unsigned int *len, unsigned int *offset);
void extract_unc_hostname(const char *unc, const char **h, size_t *len);
+int copy_path_name(char *dst, const char *src);
#ifdef CONFIG_CIFS_DFS_UPCALL
static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e2f95965065d..3907653e63c7 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -942,10 +942,8 @@ PsxDelete:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB add path length overrun check */
- name_len = strnlen(fileName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, fileName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, fileName);
}
params = 6 + name_len;
@@ -1015,10 +1013,8 @@ DelFileRetry:
remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve check for buffer overruns BB */
- name_len = strnlen(name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->fileName, name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->fileName, name);
}
pSMB->SearchAttributes =
cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
@@ -1062,10 +1058,8 @@ RmDirRetry:
remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve check for buffer overruns BB */
- name_len = strnlen(name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->DirName, name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->DirName, name);
}
pSMB->BufferFormat = 0x04;
@@ -1107,10 +1101,8 @@ MkDirRetry:
remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve check for buffer overruns BB */
- name_len = strnlen(name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->DirName, name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->DirName, name);
}
pSMB->BufferFormat = 0x04;
@@ -1157,10 +1149,8 @@ PsxCreat:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, name);
}
params = 6 + name_len;
@@ -1324,11 +1314,9 @@ OldOpenRetry:
fileName, PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve check for buffer overruns BB */
+ } else {
count = 0; /* no pad */
- name_len = strnlen(fileName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->fileName, fileName, name_len);
+ name_len = copy_path_name(pSMB->fileName, fileName);
}
if (*pOplock & REQ_OPLOCK)
pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK);
@@ -1442,11 +1430,8 @@ openRetry:
/* BB improve check for buffer overruns BB */
/* no pad */
count = 0;
- name_len = strnlen(path, PATH_MAX);
- /* trailing null */
- name_len++;
+ name_len = copy_path_name(req->fileName, path);
req->NameLength = cpu_to_le16(name_len);
- strncpy(req->fileName, path, name_len);
}
if (*oplock & REQ_OPLOCK)
@@ -2812,15 +2797,10 @@ renameRetry:
remap);
name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ;
name_len2 *= 2; /* convert to bytes */
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(from_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->OldFileName, from_name, name_len);
- name_len2 = strnlen(to_name, PATH_MAX);
- name_len2++; /* trailing null */
+ } else {
+ name_len = copy_path_name(pSMB->OldFileName, from_name);
+ name_len2 = copy_path_name(pSMB->OldFileName+name_len+1, to_name);
pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */
- strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2);
- name_len2++; /* trailing null */
name_len2++; /* signature byte */
}
@@ -2962,15 +2942,10 @@ copyRetry:
toName, PATH_MAX, nls_codepage, remap);
name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ;
name_len2 *= 2; /* convert to bytes */
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(fromName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->OldFileName, fromName, name_len);
- name_len2 = strnlen(toName, PATH_MAX);
- name_len2++; /* trailing null */
+ } else {
+ name_len = copy_path_name(pSMB->OldFileName, fromName);
pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */
- strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2);
- name_len2++; /* trailing null */
+ name_len2 = copy_path_name(pSMB->OldFileName+name_len+1, toName);
name_len2++; /* signature byte */
}
@@ -3021,10 +2996,8 @@ createSymLinkRetry:
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(fromName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, fromName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, fromName);
}
params = 6 + name_len;
pSMB->MaxSetupCount = 0;
@@ -3044,10 +3017,8 @@ createSymLinkRetry:
PATH_MAX, nls_codepage, remap);
name_len_target++; /* trailing null */
name_len_target *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len_target = strnlen(toName, PATH_MAX);
- name_len_target++; /* trailing null */
- strncpy(data_offset, toName, name_len_target);
+ } else {
+ name_len_target = copy_path_name(data_offset, toName);
}
pSMB->MaxParameterCount = cpu_to_le16(2);
@@ -3109,10 +3080,8 @@ createHardLinkRetry:
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(toName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, toName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, toName);
}
params = 6 + name_len;
pSMB->MaxSetupCount = 0;
@@ -3131,10 +3100,8 @@ createHardLinkRetry:
PATH_MAX, nls_codepage, remap);
name_len_target++; /* trailing null */
name_len_target *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len_target = strnlen(fromName, PATH_MAX);
- name_len_target++; /* trailing null */
- strncpy(data_offset, fromName, name_len_target);
+ } else {
+ name_len_target = copy_path_name(data_offset, fromName);
}
pSMB->MaxParameterCount = cpu_to_le16(2);
@@ -3213,15 +3180,10 @@ winCreateHardLinkRetry:
remap);
name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ;
name_len2 *= 2; /* convert to bytes */
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(from_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->OldFileName, from_name, name_len);
- name_len2 = strnlen(to_name, PATH_MAX);
- name_len2++; /* trailing null */
+ } else {
+ name_len = copy_path_name(pSMB->OldFileName, from_name);
pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */
- strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2);
- name_len2++; /* trailing null */
+ name_len2 = copy_path_name(pSMB->OldFileName+name_len+1, to_name);
name_len2++; /* signature byte */
}
@@ -3271,10 +3233,8 @@ querySymLinkRetry:
remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(searchName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, searchName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, searchName);
}
params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ;
@@ -3691,10 +3651,8 @@ queryAclRetry:
name_len *= 2;
pSMB->FileName[name_len] = 0;
pSMB->FileName[name_len+1] = 0;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(searchName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, searchName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, searchName);
}
params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ;
@@ -3776,10 +3734,8 @@ setAclRetry:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(fileName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, fileName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, fileName);
}
params = 6 + name_len;
pSMB->MaxParameterCount = cpu_to_le16(2);
@@ -4184,9 +4140,7 @@ QInfRetry:
name_len++; /* trailing null */
name_len *= 2;
} else {
- name_len = strnlen(search_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, search_name, name_len);
+ name_len = copy_path_name(pSMB->FileName, search_name);
}
pSMB->BufferFormat = 0x04;
name_len++; /* account for buffer type byte */
@@ -4321,10 +4275,8 @@ QPathInfoRetry:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(search_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, search_name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, search_name);
}
params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */;
@@ -4490,10 +4442,8 @@ UnixQPathInfoRetry:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(searchName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, searchName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, searchName);
}
params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */;
@@ -4593,17 +4543,16 @@ findFirstRetry:
pSMB->FileName[name_len+1] = 0;
name_len += 2;
}
- } else { /* BB add check for overrun of SMB buf BB */
- name_len = strnlen(searchName, PATH_MAX);
-/* BB fix here and in unicode clause above ie
- if (name_len > buffersize-header)
- free buffer exit; BB */
- strncpy(pSMB->FileName, searchName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, searchName);
if (msearch) {
- pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb);
- pSMB->FileName[name_len+1] = '*';
- pSMB->FileName[name_len+2] = 0;
- name_len += 3;
+ if (WARN_ON_ONCE(name_len > PATH_MAX-2))
+ name_len = PATH_MAX-2;
+ /* overwrite nul byte */
+ pSMB->FileName[name_len-1] = CIFS_DIR_SEP(cifs_sb);
+ pSMB->FileName[name_len] = '*';
+ pSMB->FileName[name_len+1] = 0;
+ name_len += 2;
}
}
@@ -4898,10 +4847,8 @@ GetInodeNumberRetry:
remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(search_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, search_name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, search_name);
}
params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ;
@@ -5008,9 +4955,7 @@ getDFSRetry:
name_len++; /* trailing null */
name_len *= 2;
} else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(search_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->RequestFileName, search_name, name_len);
+ name_len = copy_path_name(pSMB->RequestFileName, search_name);
}
if (ses->server->sign)
@@ -5663,10 +5608,8 @@ SetEOFRetry:
PATH_MAX, cifs_sb->local_nls, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(file_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, file_name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, file_name);
}
params = 6 + name_len;
data_count = sizeof(struct file_end_of_file_info);
@@ -5959,10 +5902,8 @@ SetTimesRetry:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(fileName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, fileName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, fileName);
}
params = 6 + name_len;
@@ -6040,10 +5981,8 @@ SetAttrLgcyRetry:
PATH_MAX, nls_codepage);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(fileName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->fileName, fileName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->fileName, fileName);
}
pSMB->attr = cpu_to_le16(dos_attrs);
pSMB->BufferFormat = 0x04;
@@ -6203,10 +6142,8 @@ setPermsRetry:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(file_name, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, file_name, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, file_name);
}
params = 6 + name_len;
@@ -6298,10 +6235,8 @@ QAllEAsRetry:
PATH_MAX, nls_codepage, remap);
list_len++; /* trailing null */
list_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- list_len = strnlen(searchName, PATH_MAX);
- list_len++; /* trailing null */
- strncpy(pSMB->FileName, searchName, list_len);
+ } else {
+ list_len = copy_path_name(pSMB->FileName, searchName);
}
params = 2 /* level */ + 4 /* reserved */ + list_len /* includes NUL */;
@@ -6480,10 +6415,8 @@ SetEARetry:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- } else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(fileName, PATH_MAX);
- name_len++; /* trailing null */
- strncpy(pSMB->FileName, fileName, name_len);
+ } else {
+ name_len = copy_path_name(pSMB->FileName, fileName);
}
params = 6 + name_len;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a4830ced0f98..5299effa6f7d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1113,6 +1113,7 @@ cifs_demultiplex_thread(void *p)
mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
set_freezable();
+ allow_kernel_signal(SIGKILL);
while (server->tcpStatus != CifsExiting) {
if (try_to_freeze())
continue;
@@ -2980,6 +2981,7 @@ static int
cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
{
int rc = 0;
+ int is_domain = 0;
const char *delim, *payload;
char *desc;
ssize_t len;
@@ -3027,6 +3029,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
rc = PTR_ERR(key);
goto out_err;
}
+ is_domain = 1;
}
down_read(&key->sem);
@@ -3084,6 +3087,26 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
goto out_key_put;
}
+ /*
+ * If we have a domain key then we must set the domainName in the
+ * for the request.
+ */
+ if (is_domain && ses->domainName) {
+ vol->domainname = kstrndup(ses->domainName,
+ strlen(ses->domainName),
+ GFP_KERNEL);
+ if (!vol->domainname) {
+ cifs_dbg(FYI, "Unable to allocate %zd bytes for "
+ "domain\n", len);
+ rc = -ENOMEM;
+ kfree(vol->username);
+ vol->username = NULL;
+ kzfree(vol->password);
+ vol->password = NULL;
+ goto out_key_put;
+ }
+ }
+
out_key_put:
up_read(&key->sem);
key_put(key);
@@ -4208,16 +4231,19 @@ build_unc_path_to_root(const struct smb_vol *vol,
strlen(vol->prepath) + 1 : 0;
unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1);
+ if (unc_len > MAX_TREE_SIZE)
+ return ERR_PTR(-EINVAL);
+
full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL);
if (full_path == NULL)
return ERR_PTR(-ENOMEM);
- strncpy(full_path, vol->UNC, unc_len);
+ memcpy(full_path, vol->UNC, unc_len);
pos = full_path + unc_len;
if (pplen) {
*pos = CIFS_DIR_SEP(cifs_sb);
- strncpy(pos + 1, vol->prepath, pplen);
+ memcpy(pos + 1, vol->prepath, pplen);
pos += pplen;
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index f26a48dd2e39..be424e81e3ad 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -69,11 +69,10 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
return full_path;
if (dfsplen)
- strncpy(full_path, tcon->treeName, dfsplen);
+ memcpy(full_path, tcon->treeName, dfsplen);
full_path[dfsplen] = CIFS_DIR_SEP(cifs_sb);
- strncpy(full_path + dfsplen + 1, vol->prepath, pplen);
+ memcpy(full_path + dfsplen + 1, vol->prepath, pplen);
convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
- full_path[dfsplen + pplen] = 0; /* add trailing null */
return full_path;
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index f383877a6511..5ad83bdb9bea 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1011,3 +1011,25 @@ void extract_unc_hostname(const char *unc, const char **h, size_t *len)
*h = unc;
*len = end - unc;
}
+
+/**
+ * copy_path_name - copy src path to dst, possibly truncating
+ *
+ * returns number of bytes written (including trailing nul)
+ */
+int copy_path_name(char *dst, const char *src)
+{
+ int name_len;
+
+ /*
+ * PATH_MAX includes nul, so if strlen(src) >= PATH_MAX it
+ * will truncate and strlen(dst) will be PATH_MAX-1
+ */
+ name_len = strscpy(dst, src, PATH_MAX);
+ if (WARN_ON_ONCE(name_len < 0))
+ name_len = PATH_MAX-1;
+
+ /* we count the trailing nul */
+ name_len++;
+ return name_len;
+}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index dcd49ad60c83..4c764ff7edd2 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -159,13 +159,16 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
char *bcc_ptr = *pbcc_area;
+ int len;
/* copy user */
/* BB what about null user mounts - check that we do this BB */
/* copy user */
if (ses->user_name != NULL) {
- strncpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN);
- bcc_ptr += strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+ len = strscpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN);
+ if (WARN_ON_ONCE(len < 0))
+ len = CIFS_MAX_USERNAME_LEN - 1;
+ bcc_ptr += len;
}
/* else null user mount */
*bcc_ptr = 0;
@@ -173,8 +176,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
/* copy domain */
if (ses->domainName != NULL) {
- strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
- bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ if (WARN_ON_ONCE(len < 0))
+ len = CIFS_MAX_DOMAINNAME_LEN - 1;
+ bcc_ptr += len;
} /* else we will send a null domain name
so the server will default to its own domain */
*bcc_ptr = 0;
@@ -242,9 +247,10 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
kfree(ses->serverOS);
- ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
+ ses->serverOS = kmalloc(len + 1, GFP_KERNEL);
if (ses->serverOS) {
- strncpy(ses->serverOS, bcc_ptr, len);
+ memcpy(ses->serverOS, bcc_ptr, len);
+ ses->serverOS[len] = 0;
if (strncmp(ses->serverOS, "OS/2", 4) == 0)
cifs_dbg(FYI, "OS/2 server\n");
}
@@ -258,9 +264,11 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
kfree(ses->serverNOS);
- ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
- if (ses->serverNOS)
- strncpy(ses->serverNOS, bcc_ptr, len);
+ ses->serverNOS = kmalloc(len + 1, GFP_KERNEL);
+ if (ses->serverNOS) {
+ memcpy(ses->serverNOS, bcc_ptr, len);
+ ses->serverNOS[len] = 0;
+ }
bcc_ptr += len + 1;
bleft -= len + 1;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a5bc1b671c12..64a5864127be 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3489,7 +3489,15 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
- sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+ void *addr;
+ /*
+ * VMAP_STACK (at least) puts stack into the vmalloc address space
+ */
+ if (is_vmalloc_addr(buf))
+ addr = vmalloc_to_page(buf);
+ else
+ addr = virt_to_page(buf);
+ sg_set_page(sg, addr, buflen, offset_in_page(buf));
}
/* Assumes the first rqst has a transform header as the first iov.
@@ -4070,7 +4078,6 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
{
int ret, length;
char *buf = server->smallbuf;
- char *tmpbuf;
struct smb2_sync_hdr *shdr;
unsigned int pdu_length = server->pdu_size;
unsigned int buf_size;
@@ -4100,18 +4107,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
return length;
next_is_large = server->large_buf;
- one_more:
+one_more:
shdr = (struct smb2_sync_hdr *)buf;
if (shdr->NextCommand) {
- if (next_is_large) {
- tmpbuf = server->bigbuf;
+ if (next_is_large)
next_buffer = (char *)cifs_buf_get();
- } else {
- tmpbuf = server->smallbuf;
+ else
next_buffer = (char *)cifs_small_buf_get();
- }
memcpy(next_buffer,
- tmpbuf + le32_to_cpu(shdr->NextCommand),
+ buf + le32_to_cpu(shdr->NextCommand),
pdu_length - le32_to_cpu(shdr->NextCommand));
}
@@ -4140,12 +4144,21 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
pdu_length -= le32_to_cpu(shdr->NextCommand);
server->large_buf = next_is_large;
if (next_is_large)
- server->bigbuf = next_buffer;
+ server->bigbuf = buf = next_buffer;
else
- server->smallbuf = next_buffer;
-
- buf += le32_to_cpu(shdr->NextCommand);
+ server->smallbuf = buf = next_buffer;
goto one_more;
+ } else if (ret != 0) {
+ /*
+ * ret != 0 here means that we didn't get to handle_mid() thus
+ * server->smallbuf and server->bigbuf are still valid. We need
+ * to free next_buffer because it is not going to be used
+ * anywhere.
+ */
+ if (next_is_large)
+ free_rsp_buf(CIFS_LARGE_BUFFER, next_buffer);
+ else
+ free_rsp_buf(CIFS_SMALL_BUFFER, next_buffer);
}
return ret;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c8cd7b6cdda2..31e4a1b0b170 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -252,7 +252,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
if (tcon == NULL)
return 0;
- if (smb2_command == SMB2_TREE_CONNECT)
+ if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL)
return 0;
if (tcon->tidStatus == CifsExiting) {
@@ -1196,7 +1196,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
else
req->SecurityMode = 0;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS);
+#else
req->Capabilities = 0;
+#endif /* DFS_UPCALL */
+
req->Channel = 0; /* MBZ */
sess_data->iov[0].iov_base = (char *)req;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6e30949d9f77..a7ec2d3dff92 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -638,9 +638,6 @@ COMPATIBLE_IOCTL(PPPIOCDISCONN)
COMPATIBLE_IOCTL(PPPIOCATTCHAN)
COMPATIBLE_IOCTL(PPPIOCGCHAN)
COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS)
-/* PPPOX */
-COMPATIBLE_IOCTL(PPPOEIOCSFWD)
-COMPATIBLE_IOCTL(PPPOEIOCDFWD)
/* Big A */
/* sparc only */
/* Big Q for sound/OSS */
diff --git a/fs/dax.c b/fs/dax.c
index b64964ef44f6..6bf81f931de3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -600,7 +600,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
* guaranteed to either see new references or prevent new
* references from being established.
*/
- unmap_mapping_range(mapping, 0, 0, 1);
+ unmap_mapping_range(mapping, 0, 0, 0);
xas_lock_irq(&xas);
xas_for_each(&xas, entry, ULONG_MAX) {
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4df26ef2b2b1..4f8b5fd6c81f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -390,6 +390,19 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
return mp->mp_aheight - x - 1;
}
+static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
+{
+ sector_t factor = 1, block = 0;
+ int hgt;
+
+ for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
+ if (hgt < mp->mp_aheight)
+ block += mp->mp_list[hgt] * factor;
+ factor *= sdp->sd_inptrs;
+ }
+ return block;
+}
+
static void release_metapath(struct metapath *mp)
{
int i;
@@ -430,60 +443,84 @@ static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *pt
return ptr - first;
}
-typedef const __be64 *(*gfs2_metadata_walker)(
- struct metapath *mp,
- const __be64 *start, const __be64 *end,
- u64 factor, void *data);
+enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
-#define WALK_STOP ((__be64 *)0)
-#define WALK_NEXT ((__be64 *)1)
+/*
+ * gfs2_metadata_walker - walk an indirect block
+ * @mp: Metapath to indirect block
+ * @ptrs: Number of pointers to look at
+ *
+ * When returning WALK_FOLLOW, the walker must update @mp to point at the right
+ * indirect block to follow.
+ */
+typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
+ unsigned int ptrs);
-static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
- u64 len, struct metapath *mp, gfs2_metadata_walker walker,
- void *data)
+/*
+ * gfs2_walk_metadata - walk a tree of indirect blocks
+ * @inode: The inode
+ * @mp: Starting point of walk
+ * @max_len: Maximum number of blocks to walk
+ * @walker: Called during the walk
+ *
+ * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
+ * past the end of metadata, and a negative error code otherwise.
+ */
+
+static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
+ u64 max_len, gfs2_metadata_walker walker)
{
- struct metapath clone;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- const __be64 *start, *end, *ptr;
u64 factor = 1;
unsigned int hgt;
- int ret = 0;
+ int ret;
- for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
+ /*
+ * The walk starts in the lowest allocated indirect block, which may be
+ * before the position indicated by @mp. Adjust @max_len accordingly
+ * to avoid a short walk.
+ */
+ for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
+ max_len += mp->mp_list[hgt] * factor;
+ mp->mp_list[hgt] = 0;
factor *= sdp->sd_inptrs;
+ }
for (;;) {
- u64 step;
+ u16 start = mp->mp_list[hgt];
+ enum walker_status status;
+ unsigned int ptrs;
+ u64 len;
/* Walk indirect block. */
- start = metapointer(hgt, mp);
- end = metaend(hgt, mp);
-
- step = (end - start) * factor;
- if (step > len)
- end = start + DIV_ROUND_UP_ULL(len, factor);
-
- ptr = walker(mp, start, end, factor, data);
- if (ptr == WALK_STOP)
+ ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
+ len = ptrs * factor;
+ if (len > max_len)
+ ptrs = DIV_ROUND_UP_ULL(max_len, factor);
+ status = walker(mp, ptrs);
+ switch (status) {
+ case WALK_STOP:
+ return 1;
+ case WALK_FOLLOW:
+ BUG_ON(mp->mp_aheight == mp->mp_fheight);
+ ptrs = mp->mp_list[hgt] - start;
+ len = ptrs * factor;
break;
- if (step >= len)
+ case WALK_CONTINUE:
break;
- len -= step;
- if (ptr != WALK_NEXT) {
- BUG_ON(!*ptr);
- mp->mp_list[hgt] += ptr - start;
- goto fill_up_metapath;
}
+ if (len >= max_len)
+ break;
+ max_len -= len;
+ if (status == WALK_FOLLOW)
+ goto fill_up_metapath;
lower_metapath:
/* Decrease height of metapath. */
- if (mp != &clone) {
- clone_metapath(&clone, mp);
- mp = &clone;
- }
brelse(mp->mp_bh[hgt]);
mp->mp_bh[hgt] = NULL;
+ mp->mp_list[hgt] = 0;
if (!hgt)
break;
hgt--;
@@ -491,10 +528,7 @@ lower_metapath:
/* Advance in metadata tree. */
(mp->mp_list[hgt])++;
- start = metapointer(hgt, mp);
- end = metaend(hgt, mp);
- if (start >= end) {
- mp->mp_list[hgt] = 0;
+ if (mp->mp_list[hgt] >= sdp->sd_inptrs) {
if (!hgt)
break;
goto lower_metapath;
@@ -502,44 +536,36 @@ lower_metapath:
fill_up_metapath:
/* Increase height of metapath. */
- if (mp != &clone) {
- clone_metapath(&clone, mp);
- mp = &clone;
- }
ret = fillup_metapath(ip, mp, ip->i_height - 1);
if (ret < 0)
- break;
+ return ret;
hgt += ret;
for (; ret; ret--)
do_div(factor, sdp->sd_inptrs);
mp->mp_aheight = hgt + 1;
}
- if (mp == &clone)
- release_metapath(mp);
- return ret;
+ return 0;
}
-struct gfs2_hole_walker_args {
- u64 blocks;
-};
-
-static const __be64 *gfs2_hole_walker(struct metapath *mp,
- const __be64 *start, const __be64 *end,
- u64 factor, void *data)
+static enum walker_status gfs2_hole_walker(struct metapath *mp,
+ unsigned int ptrs)
{
- struct gfs2_hole_walker_args *args = data;
- const __be64 *ptr;
+ const __be64 *start, *ptr, *end;
+ unsigned int hgt;
+
+ hgt = mp->mp_aheight - 1;
+ start = metapointer(hgt, mp);
+ end = start + ptrs;
for (ptr = start; ptr < end; ptr++) {
if (*ptr) {
- args->blocks += (ptr - start) * factor;
+ mp->mp_list[hgt] += ptr - start;
if (mp->mp_aheight == mp->mp_fheight)
return WALK_STOP;
- return ptr; /* increase height */
+ return WALK_FOLLOW;
}
}
- args->blocks += (end - start) * factor;
- return WALK_NEXT;
+ return WALK_CONTINUE;
}
/**
@@ -557,12 +583,24 @@ static const __be64 *gfs2_hole_walker(struct metapath *mp,
static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
struct metapath *mp, struct iomap *iomap)
{
- struct gfs2_hole_walker_args args = { };
- int ret = 0;
+ struct metapath clone;
+ u64 hole_size;
+ int ret;
- ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
- if (!ret)
- iomap->length = args.blocks << inode->i_blkbits;
+ clone_metapath(&clone, mp);
+ ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
+ if (ret < 0)
+ goto out;
+
+ if (ret == 1)
+ hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
+ else
+ hole_size = len;
+ iomap->length = hole_size << inode->i_blkbits;
+ ret = 0;
+
+out:
+ release_metapath(&clone);
return ret;
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d542f1cf4428..cfb48bd088e1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -679,6 +679,13 @@ static void io_put_req(struct io_kiocb *req)
io_free_req(req);
}
+static unsigned io_cqring_events(struct io_cq_ring *ring)
+{
+ /* See comment at the top of this file */
+ smp_rmb();
+ return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -771,7 +778,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
long min)
{
- while (!list_empty(&ctx->poll_list)) {
+ while (!list_empty(&ctx->poll_list) && !need_resched()) {
int ret;
ret = io_do_iopoll(ctx, nr_events, min);
@@ -798,6 +805,12 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
unsigned int nr_events = 0;
io_iopoll_getevents(ctx, &nr_events, 1);
+
+ /*
+ * Ensure we allow local-to-the-cpu processing to take place,
+ * in this case we need to ensure that we reap all events.
+ */
+ cond_resched();
}
mutex_unlock(&ctx->uring_lock);
}
@@ -805,11 +818,42 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
long min)
{
- int ret = 0;
+ int iters, ret = 0;
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
+
+ iters = 0;
do {
int tmin = 0;
+ /*
+ * Don't enter poll loop if we already have events pending.
+ * If we do, we can potentially be spinning for commands that
+ * already triggered a CQE (eg in error).
+ */
+ if (io_cqring_events(ctx->cq_ring))
+ break;
+
+ /*
+ * If a submit got punted to a workqueue, we can have the
+ * application entering polling for a command before it gets
+ * issued. That app will hold the uring_lock for the duration
+ * of the poll right here, so we need to take a breather every
+ * now and then to ensure that the issue has a chance to add
+ * the poll to the issued list. Otherwise we can spin here
+ * forever, while the workqueue is stuck trying to acquire the
+ * very same mutex.
+ */
+ if (!(++iters & 7)) {
+ mutex_unlock(&ctx->uring_lock);
+ mutex_lock(&ctx->uring_lock);
+ }
+
if (*nr_events < min)
tmin = min - *nr_events;
@@ -819,6 +863,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
ret = 0;
} while (min && !*nr_events && !need_resched());
+ mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -1097,10 +1142,8 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
iter->bvec = bvec + seg_skip;
iter->nr_segs -= seg_skip;
- iter->count -= (seg_skip << PAGE_SHIFT);
+ iter->count -= bvec->bv_len + offset;
iter->iov_offset = offset & ~PAGE_MASK;
- if (iter->iov_offset)
- iter->count -= iter->iov_offset;
}
}
@@ -2025,6 +2068,15 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
{
int ret;
+ ret = io_req_defer(ctx, req, s->sqe);
+ if (ret) {
+ if (ret != -EIOCBQUEUED) {
+ io_free_req(req);
+ io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ }
+ return 0;
+ }
+
ret = __io_submit_sqe(ctx, req, s, true);
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
@@ -2097,13 +2149,6 @@ err:
return;
}
- ret = io_req_defer(ctx, req, s->sqe);
- if (ret) {
- if (ret != -EIOCBQUEUED)
- goto err_req;
- return;
- }
-
/*
* If we already have a head request, queue this one for async
* submittal once the head completes. If we don't have a head but
@@ -2280,15 +2325,7 @@ static int io_sq_thread(void *data)
unsigned nr_events = 0;
if (ctx->flags & IORING_SETUP_IOPOLL) {
- /*
- * We disallow the app entering submit/complete
- * with polling, but we still need to lock the
- * ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
io_iopoll_check(ctx, &nr_events, 0);
- mutex_unlock(&ctx->uring_lock);
} else {
/*
* Normal IO, just pretend everything completed.
@@ -2433,13 +2470,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
return submit;
}
-static unsigned io_cqring_events(struct io_cq_ring *ring)
-{
- /* See comment at the top of this file */
- smp_rmb();
- return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
-}
-
/*
* Wait until events become available, if we don't already have some. The
* application must reap them itself, as they reside on the shared cq ring.
@@ -3190,9 +3220,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
min_complete = min(min_complete, ctx->cq_entries);
if (ctx->flags & IORING_SETUP_IOPOLL) {
- mutex_lock(&ctx->uring_lock);
ret = io_iopoll_check(ctx, &nr_events, min_complete);
- mutex_unlock(&ctx->uring_lock);
} else {
ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 0ff3facf81da..071b90a45933 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -153,7 +153,7 @@ again:
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
- err = nfs4_open_delegation_recall(ctx, state, stateid, type);
+ err = nfs4_open_delegation_recall(ctx, state, stateid);
if (!err)
err = nfs_delegation_claim_locks(state, stateid);
if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -1046,6 +1046,22 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
+static void
+nfs_delegation_test_free_expired(struct inode *inode,
+ nfs4_stateid *stateid,
+ const struct cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops;
+ int status;
+
+ if (!cred)
+ return;
+ status = ops->test_and_free_expired(server, stateid, cred);
+ if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
+ nfs_remove_bad_delegation(inode, stateid);
+}
+
/**
* nfs_reap_expired_delegations - reap expired delegations
* @clp: nfs_client to process
@@ -1057,7 +1073,6 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp)
*/
void nfs_reap_expired_delegations(struct nfs_client *clp)
{
- const struct nfs4_minor_version_ops *ops = clp->cl_mvops;
struct nfs_delegation *delegation;
struct nfs_server *server;
struct inode *inode;
@@ -1088,11 +1103,7 @@ restart:
nfs4_stateid_copy(&stateid, &delegation->stateid);
clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
rcu_read_unlock();
- if (cred != NULL &&
- ops->test_and_free_expired(server, &stateid, cred) < 0) {
- nfs_revoke_delegation(inode, &stateid);
- nfs_inode_find_state_and_recover(inode, &stateid);
- }
+ nfs_delegation_test_free_expired(inode, &stateid, cred);
put_cred(cred);
if (nfs4_server_rebooted(clp)) {
nfs_inode_mark_test_expired_delegation(server,inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 5799777df5ec..9eb87ae4c982 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -63,7 +63,7 @@ void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync);
-int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8d501093660f..0adfd8840110 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1487,7 +1487,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
nfs_file_set_open_context(file, ctx);
else
- err = -ESTALE;
+ err = -EOPENSTALE;
out:
return err;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0cb442406168..222d7115db71 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -401,15 +401,21 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
unsigned long bytes = 0;
struct nfs_direct_req *dreq = hdr->dreq;
- if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
- goto out_put;
-
spin_lock(&dreq->lock);
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
dreq->error = hdr->error;
- else
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ spin_unlock(&dreq->lock);
+ goto out_put;
+ }
+
+ if (hdr->good_bytes != 0)
nfs_direct_good_bytes(dreq, hdr);
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
+ dreq->error = 0;
+
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
@@ -782,16 +788,19 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
bool request_commit = false;
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
- if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
- goto out_put;
-
nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
dreq->error = hdr->error;
- if (dreq->error == 0) {
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ spin_unlock(&dreq->lock);
+ goto out_put;
+ }
+
+ if (hdr->good_bytes != 0) {
nfs_direct_good_bytes(dreq, hdr);
if (nfs_write_need_commit(hdr)) {
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index b04e20d28162..5657b7f2611f 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -8,6 +8,7 @@
*/
#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
#include <linux/sched/mm.h>
@@ -928,7 +929,9 @@ retry:
pgm = &pgio->pg_mirrors[0];
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
- pgio->pg_maxretrans = io_maxretrans;
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
if (pgio->pg_error < 0)
@@ -940,6 +943,7 @@ out_mds:
pgio->pg_lseg);
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
+ pgio->pg_maxretrans = 0;
nfs_pageio_reset_read_mds(pgio);
}
@@ -1000,7 +1004,9 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
}
- pgio->pg_maxretrans = io_maxretrans;
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
return;
out_mds:
@@ -1010,6 +1016,7 @@ out_mds:
pgio->pg_lseg);
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
+ pgio->pg_maxretrans = 0;
nfs_pageio_reset_write_mds(pgio);
}
@@ -1148,8 +1155,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
break;
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
- case -EAGAIN:
- return -NFS4ERR_RESET_TO_PNFS;
/* Invalidate Layout errors */
case -NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */
@@ -1210,7 +1215,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
case -EBADHANDLE:
case -ELOOP:
case -ENOSPC:
- case -EAGAIN:
break;
case -EJUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
@@ -1445,16 +1449,6 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
ff_layout_read_prepare_common(task, hdr);
}
-static void
-ff_layout_io_prepare_transmit(struct rpc_task *task,
- void *data)
-{
- struct nfs_pgio_header *hdr = data;
-
- if (!pnfs_is_valid_lseg(hdr->lseg))
- rpc_exit(task, -EAGAIN);
-}
-
static void ff_layout_read_call_done(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
@@ -1740,7 +1734,6 @@ static void ff_layout_commit_release(void *data)
static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
.rpc_call_prepare = ff_layout_read_prepare_v3,
- .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
.rpc_release = ff_layout_read_release,
@@ -1748,7 +1741,6 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
.rpc_call_prepare = ff_layout_read_prepare_v4,
- .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
.rpc_release = ff_layout_read_release,
@@ -1756,7 +1748,6 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
.rpc_call_prepare = ff_layout_write_prepare_v3,
- .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
.rpc_release = ff_layout_write_release,
@@ -1764,7 +1755,6 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
.rpc_call_prepare = ff_layout_write_prepare_v4,
- .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
.rpc_release = ff_layout_write_release,
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 53507aa96b0b..3800ab6f08fa 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -114,6 +114,10 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
struct rb_node **p, *parent;
int diff;
+ nfss->fscache_key = NULL;
+ nfss->fscache = NULL;
+ if (!(nfss->options & NFS_OPTION_FSCACHE))
+ return;
if (!uniq) {
uniq = "";
ulen = 1;
@@ -226,10 +230,11 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
void nfs_fscache_init_inode(struct inode *inode)
{
struct nfs_fscache_inode_auxdata auxdata;
+ struct nfs_server *nfss = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
nfsi->fscache = NULL;
- if (!S_ISREG(inode->i_mode))
+ if (!(nfss->fscache && S_ISREG(inode->i_mode)))
return;
memset(&auxdata, 0, sizeof(auxdata));
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 25a75e40d91d..ad041cfbf9ec 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -182,7 +182,7 @@ static inline void nfs_fscache_wait_on_invalidate(struct inode *inode)
*/
static inline const char *nfs_server_fscache_state(struct nfs_server *server)
{
- if (server->fscache && (server->options & NFS_OPTION_FSCACHE))
+ if (server->fscache)
return "yes";
return "no ";
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8a1758200b57..c764cfe456e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1403,12 +1403,21 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
return 0;
+ /* No fileid? Just exit */
+ if (!(fattr->valid & NFS_ATTR_FATTR_FILEID))
+ return 0;
/* Has the inode gone and changed behind our back? */
- if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
+ if (nfsi->fileid != fattr->fileid) {
+ /* Is this perhaps the mounted-on fileid? */
+ if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) &&
+ nfsi->fileid == fattr->mounted_on_fileid)
+ return 0;
return -ESTALE;
+ }
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
return -ESTALE;
+
if (!nfs_file_has_buffered_writers(nfsi)) {
/* Verify a few of the more important attributes */
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
@@ -1768,18 +1777,6 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc);
-static inline bool nfs_fileid_valid(struct nfs_inode *nfsi,
- struct nfs_fattr *fattr)
-{
- bool ret1 = true, ret2 = true;
-
- if (fattr->valid & NFS_ATTR_FATTR_FILEID)
- ret1 = (nfsi->fileid == fattr->fileid);
- if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
- ret2 = (nfsi->fileid == fattr->mounted_on_fileid);
- return ret1 || ret2;
-}
-
/*
* Many nfs protocol calls return the new file attributes after
* an operation. Here we update the inode to reflect the state
@@ -1810,7 +1807,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
- if (!nfs_fileid_valid(nfsi, fattr)) {
+ /* No fileid? Just exit */
+ if (!(fattr->valid & NFS_ATTR_FATTR_FILEID))
+ return 0;
+ /* Has the inode gone and changed behind our back? */
+ if (nfsi->fileid != fattr->fileid) {
+ /* Is this perhaps the mounted-on fileid? */
+ if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) &&
+ nfsi->fileid == fattr->mounted_on_fileid)
+ return 0;
printk(KERN_ERR "NFS: server %s error: fileid changed\n"
"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a2346a2f8361..e64f810223be 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -775,3 +775,13 @@ static inline bool nfs_error_is_fatal(int err)
}
}
+static inline bool nfs_error_is_fatal_on_server(int err)
+{
+ switch (err) {
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ return false;
+ }
+ return nfs_error_is_fatal(err);
+}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d778dad9a75e..3564da1ba8a1 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -465,7 +465,8 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session,
extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, const struct cred *, gfp_t);
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
-extern void nfs4_purge_state_owners(struct nfs_server *);
+extern void nfs4_purge_state_owners(struct nfs_server *, struct list_head *);
+extern void nfs4_free_state_owners(struct list_head *head);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct nfs4_state *, fmode_t);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 616393a01c06..da6204025a2d 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -758,9 +758,12 @@ out:
static void nfs4_destroy_server(struct nfs_server *server)
{
+ LIST_HEAD(freeme);
+
nfs_server_return_all_delegations(server);
unset_pnfs_layoutdriver(server);
- nfs4_purge_state_owners(server);
+ nfs4_purge_state_owners(server, &freeme);
+ nfs4_free_state_owners(&freeme);
}
/*
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 96db471ca2e5..339663d04bf8 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -73,13 +73,13 @@ nfs4_file_open(struct inode *inode, struct file *filp)
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
switch (err) {
- case -EPERM:
- case -EACCES:
- case -EDQUOT:
- case -ENOSPC:
- case -EROFS:
- goto out_put_ctx;
default:
+ goto out_put_ctx;
+ case -ENOENT:
+ case -ESTALE:
+ case -EISDIR:
+ case -ENOTDIR:
+ case -ELOOP:
goto out_drop;
}
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 39896afc6edf..1406858bae6c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1683,6 +1683,14 @@ static void nfs_state_set_open_stateid(struct nfs4_state *state,
write_sequnlock(&state->seqlock);
}
+static void nfs_state_clear_open_state_flags(struct nfs4_state *state)
+{
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_bit(NFS_OPEN_STATE, &state->flags);
+}
+
static void nfs_state_set_delegation(struct nfs4_state *state,
const nfs4_stateid *deleg_stateid,
fmode_t fmode)
@@ -1907,8 +1915,9 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
update:
- update_open_stateid(state, &data->o_res.stateid, NULL,
- data->o_arg.fmode);
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode))
+ return ERR_PTR(-EAGAIN);
refcount_inc(&state->count);
return state;
@@ -1973,8 +1982,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
- update_open_stateid(state, &data->o_res.stateid, NULL,
- data->o_arg.fmode);
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode)) {
+ nfs4_put_open_state(state);
+ state = ERR_PTR(-EAGAIN);
+ }
out:
nfs_release_seqid(data->o_arg.seqid);
return state;
@@ -2074,13 +2086,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
{
int ret;
- /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
/* memory barrier prior to reading state->n_* */
- clear_bit(NFS_DELEGATED_STATE, &state->flags);
- clear_bit(NFS_OPEN_STATE, &state->flags);
smp_rmb();
ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
if (ret != 0)
@@ -2156,6 +2162,8 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
ctx = nfs4_state_find_open_context(state);
if (IS_ERR(ctx))
return -EAGAIN;
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ nfs_state_clear_open_state_flags(state);
ret = nfs4_do_open_reclaim(ctx, state);
put_nfs_open_context(ctx);
return ret;
@@ -2171,18 +2179,17 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
case -ENOENT:
case -EAGAIN:
case -ESTALE:
+ case -ETIMEDOUT:
break;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- set_bit(NFS_DELEGATED_STATE, &state->flags);
nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
return -EAGAIN;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
- set_bit(NFS_DELEGATED_STATE, &state->flags);
/* Don't recall a delegation if it was lost */
nfs4_schedule_lease_recovery(server->nfs_client);
return -EAGAIN;
@@ -2203,7 +2210,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
return -EAGAIN;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
- set_bit(NFS_DELEGATED_STATE, &state->flags);
ssleep(1);
return -EAGAIN;
case -ENOMEM:
@@ -2219,8 +2225,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
}
int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
- struct nfs4_state *state, const nfs4_stateid *stateid,
- fmode_t type)
+ struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_opendata *opendata;
@@ -2231,20 +2236,23 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
if (IS_ERR(opendata))
return PTR_ERR(opendata);
nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
- nfs_state_clear_delegation(state);
- switch (type & (FMODE_READ|FMODE_WRITE)) {
- case FMODE_READ|FMODE_WRITE:
- case FMODE_WRITE:
+ if (!test_bit(NFS_O_RDWR_STATE, &state->flags)) {
err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
if (err)
- break;
+ goto out;
+ }
+ if (!test_bit(NFS_O_WRONLY_STATE, &state->flags)) {
err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
if (err)
- break;
- /* Fall through */
- case FMODE_READ:
+ goto out;
+ }
+ if (!test_bit(NFS_O_RDONLY_STATE, &state->flags)) {
err = nfs4_open_recover_helper(opendata, FMODE_READ);
+ if (err)
+ goto out;
}
+ nfs_state_clear_delegation(state);
+out:
nfs4_opendata_put(opendata);
return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err);
}
@@ -2492,6 +2500,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data,
if (!ctx) {
nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1);
data->is_recover = true;
+ task_setup_data.flags |= RPC_TASK_TIMEOUT;
} else {
nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0);
pnfs_lgopen_prepare(data, ctx);
@@ -2698,6 +2707,7 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
{
/* NFSv4.0 doesn't allow for delegation recovery on open expire */
nfs40_clear_delegation_stateid(state);
+ nfs_state_clear_open_state_flags(state);
return nfs4_open_expired(sp, state);
}
@@ -2740,13 +2750,13 @@ out_free:
return -NFS4ERR_EXPIRED;
}
-static void nfs41_check_delegation_stateid(struct nfs4_state *state)
+static int nfs41_check_delegation_stateid(struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
nfs4_stateid stateid;
struct nfs_delegation *delegation;
const struct cred *cred = NULL;
- int status;
+ int status, ret = NFS_OK;
/* Get the delegation credential for use by test/free_stateid */
rcu_read_lock();
@@ -2754,20 +2764,15 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
if (delegation == NULL) {
rcu_read_unlock();
nfs_state_clear_delegation(state);
- return;
+ return NFS_OK;
}
nfs4_stateid_copy(&stateid, &delegation->stateid);
- if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
- rcu_read_unlock();
- nfs_state_clear_delegation(state);
- return;
- }
if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
&delegation->flags)) {
rcu_read_unlock();
- return;
+ return NFS_OK;
}
if (delegation->cred)
@@ -2777,9 +2782,24 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
trace_nfs4_test_delegation_stateid(state, NULL, status);
if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
nfs_finish_clear_delegation_stateid(state, &stateid);
+ else
+ ret = status;
- if (delegation->cred)
- put_cred(cred);
+ put_cred(cred);
+ return ret;
+}
+
+static void nfs41_delegation_recover_stateid(struct nfs4_state *state)
+{
+ nfs4_stateid tmp;
+
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) &&
+ nfs4_copy_delegation_stateid(state->inode, state->state,
+ &tmp, NULL) &&
+ nfs4_stateid_match_other(&state->stateid, &tmp))
+ nfs_state_set_delegation(state, &tmp, state->state);
+ else
+ nfs_state_clear_delegation(state);
}
/**
@@ -2849,21 +2869,12 @@ static int nfs41_check_open_stateid(struct nfs4_state *state)
const struct cred *cred = state->owner->so_cred;
int status;
- if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) {
- if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) {
- if (nfs4_have_delegation(state->inode, state->state))
- return NFS_OK;
- return -NFS4ERR_OPENMODE;
- }
+ if (test_bit(NFS_OPEN_STATE, &state->flags) == 0)
return -NFS4ERR_BAD_STATEID;
- }
status = nfs41_test_and_free_expired_stateid(server, stateid, cred);
trace_nfs4_test_open_stateid(state, NULL, status);
if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) {
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
- clear_bit(NFS_OPEN_STATE, &state->flags);
+ nfs_state_clear_open_state_flags(state);
stateid->type = NFS4_INVALID_STATEID_TYPE;
return status;
}
@@ -2876,7 +2887,11 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
{
int status;
- nfs41_check_delegation_stateid(state);
+ status = nfs41_check_delegation_stateid(state);
+ if (status != NFS_OK)
+ return status;
+ nfs41_delegation_recover_stateid(state);
+
status = nfs41_check_expired_locks(state);
if (status != NFS_OK)
return status;
@@ -3201,7 +3216,7 @@ static int _nfs4_do_setattr(struct inode *inode,
if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) {
/* Use that stateid */
- } else if (ctx != NULL) {
+ } else if (ctx != NULL && ctx->state) {
struct nfs_lock_context *l_ctx;
if (!nfs4_valid_open_stateid(ctx->state))
return -EBADF;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9afd051a4876..cad4e064b328 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -624,24 +624,39 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
/**
* nfs4_purge_state_owners - Release all cached state owners
* @server: nfs_server with cached state owners to release
+ * @head: resulting list of state owners
*
* Called at umount time. Remaining state owners will be on
* the LRU with ref count of zero.
+ * Note that the state owners are not freed, but are added
+ * to the list @head, which can later be used as an argument
+ * to nfs4_free_state_owners.
*/
-void nfs4_purge_state_owners(struct nfs_server *server)
+void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *tmp;
- LIST_HEAD(doomed);
spin_lock(&clp->cl_lock);
list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
- list_move(&sp->so_lru, &doomed);
+ list_move(&sp->so_lru, head);
nfs4_remove_state_owner_locked(sp);
}
spin_unlock(&clp->cl_lock);
+}
- list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+/**
+ * nfs4_purge_state_owners - Release all cached state owners
+ * @head: resulting list of state owners
+ *
+ * Frees a list of state owners that was generated by
+ * nfs4_purge_state_owners
+ */
+void nfs4_free_state_owners(struct list_head *head)
+{
+ struct nfs4_state_owner *sp, *tmp;
+
+ list_for_each_entry_safe(sp, tmp, head, so_lru) {
list_del(&sp->so_lru);
nfs4_free_state_owner(sp);
}
@@ -1463,7 +1478,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
nfs4_schedule_state_manager(clp);
}
-static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
+static void nfs4_state_mark_open_context_bad(struct nfs4_state *state, int err)
{
struct inode *inode = state->inode;
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1474,6 +1489,8 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
if (ctx->state != state)
continue;
set_bit(NFS_CONTEXT_BAD, &ctx->flags);
+ pr_warn("NFSv4: state recovery failed for open file %pd2, "
+ "error = %d\n", ctx->dentry, err);
}
rcu_read_unlock();
}
@@ -1481,7 +1498,7 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
{
set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
- nfs4_state_mark_open_context_bad(state);
+ nfs4_state_mark_open_context_bad(state, error);
}
@@ -1512,6 +1529,7 @@ restart:
switch (status) {
case 0:
break;
+ case -ETIMEDOUT:
case -ESTALE:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
@@ -1605,6 +1623,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st
static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
{
struct nfs4_state *state;
+ unsigned int loop = 0;
int status = 0;
/* Note: we rely on the sp->so_states list being ordered
@@ -1631,8 +1650,10 @@ restart:
switch (status) {
default:
- if (status >= 0)
+ if (status >= 0) {
+ loop = 0;
break;
+ }
printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
/* Fall through */
case -ENOENT:
@@ -1646,6 +1667,10 @@ restart:
break;
case -EAGAIN:
ssleep(1);
+ if (loop++ < 10) {
+ set_bit(ops->state_flag_bit, &state->flags);
+ break;
+ }
/* Fall through */
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
@@ -1658,11 +1683,13 @@ restart:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_NO_GRACE:
nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ /* Fall through */
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case -ETIMEDOUT:
goto out_err;
}
nfs4_put_open_state(state);
@@ -1856,12 +1883,13 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
struct nfs4_state_owner *sp;
struct nfs_server *server;
struct rb_node *pos;
+ LIST_HEAD(freeme);
int status = 0;
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- nfs4_purge_state_owners(server);
+ nfs4_purge_state_owners(server, &freeme);
spin_lock(&clp->cl_lock);
for (pos = rb_first(&server->state_owners);
pos != NULL;
@@ -1890,6 +1918,7 @@ restart:
spin_unlock(&clp->cl_lock);
}
rcu_read_unlock();
+ nfs4_free_state_owners(&freeme);
return 0;
}
@@ -1945,7 +1974,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
return -EPERM;
case -EACCES:
case -NFS4ERR_DELAY:
- case -ETIMEDOUT:
case -EAGAIN:
ssleep(1);
break;
@@ -2574,7 +2602,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
}
/* Now recover expired state... */
- if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
section = "reclaim nograce";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->nograce_recovery_ops);
@@ -2582,6 +2610,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
continue;
if (status < 0)
goto out_error;
+ clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
}
nfs4_end_drain_session(clp);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ed4e1b07447b..20b3717cd7ca 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -590,7 +590,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
}
hdr->res.fattr = &hdr->fattr;
- hdr->res.count = count;
+ hdr->res.count = 0;
hdr->res.eof = 0;
hdr->res.verf = &hdr->verf;
nfs_fattr_init(&hdr->fattr);
@@ -1251,20 +1251,23 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
- LIST_HEAD(failed);
+ LIST_HEAD(pages);
desc->pg_io_completion = hdr->io_completion;
desc->pg_dreq = hdr->dreq;
- while (!list_empty(&hdr->pages)) {
- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ list_splice_init(&hdr->pages, &pages);
+ while (!list_empty(&pages)) {
+ struct nfs_page *req = nfs_list_entry(pages.next);
if (!nfs_pageio_add_request(desc, req))
- nfs_list_move_request(req, &failed);
+ break;
}
nfs_pageio_complete(desc);
- if (!list_empty(&failed)) {
- list_move(&failed, &hdr->pages);
- return desc->pg_error < 0 ? desc->pg_error : -EIO;
+ if (!list_empty(&pages)) {
+ int err = desc->pg_error < 0 ? desc->pg_error : -EIO;
+ hdr->completion_ops->error_cleanup(&pages, err);
+ nfs_set_pgio_error(hdr, err, hdr->io_start);
+ return err;
}
return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 75bd5b552ba4..4525d5acae38 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1903,12 +1903,6 @@ lookup_again:
goto out_unlock;
}
- if (!nfs4_valid_open_stateid(ctx->state)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
- PNFS_UPDATE_LAYOUT_INVALID_OPEN);
- goto out_unlock;
- }
-
/*
* Choose a stateid for the LAYOUTGET. If we don't have a layout
* stateid, or it has been invalidated, then we must use the open
@@ -1939,6 +1933,7 @@ lookup_again:
iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
NULL, &stateid, NULL);
if (status != 0) {
+ lseg = ERR_PTR(status);
trace_pnfs_update_layout(ino, pos, count,
iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_INVALID_OPEN);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index c0046c348910..82af4809b869 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -627,11 +627,16 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
/* Add this address as an alias */
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_test_and_add_xprt, NULL);
- } else
- clp = get_v3_ds_connect(mds_srv,
- (struct sockaddr *)&da->da_addr,
- da->da_addrlen, IPPROTO_TCP,
- timeo, retrans);
+ continue;
+ }
+ clp = get_v3_ds_connect(mds_srv,
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP,
+ timeo, retrans);
+ if (IS_ERR(clp))
+ continue;
+ clp->cl_rpcclient->cl_softerr = 0;
+ clp->cl_rpcclient->cl_softrtry = 0;
}
if (IS_ERR(clp)) {
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 5552fa8b6e12..0f7288b94633 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -594,7 +594,8 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
- if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
+ if ((hdr->res.count == 0 && hdr->args.count > 0) ||
+ hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
hdr->res.eof = 1;
}
return 0;
@@ -615,8 +616,10 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- if (task->tk_status >= 0)
+ if (task->tk_status >= 0) {
+ hdr->res.count = hdr->args.count;
nfs_writeback_update_inode(hdr);
+ }
return 0;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c19841c82b6a..cfe0b586eadd 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -91,19 +91,25 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
-static void nfs_readpage_release(struct nfs_page *req)
+static void nfs_readpage_release(struct nfs_page *req, int error)
{
struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
+ struct page *page = req->wb_page;
dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
(long long)req_offset(req));
+ if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
+ SetPageError(page);
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
- if (PageUptodate(req->wb_page))
- nfs_readpage_to_fscache(inode, req->wb_page, 0);
+ struct address_space *mapping = page_file_mapping(page);
- unlock_page(req->wb_page);
+ if (PageUptodate(page))
+ nfs_readpage_to_fscache(inode, page, 0);
+ else if (!PageError(page) && !PagePrivate(page))
+ generic_error_remove_page(mapping, page);
+ unlock_page(page);
}
nfs_release_request(req);
}
@@ -131,7 +137,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
&nfs_async_read_completion_ops);
if (!nfs_pageio_add_request(&pgio, new)) {
nfs_list_remove_request(new);
- nfs_readpage_release(new);
+ nfs_readpage_release(new, pgio.pg_error);
}
nfs_pageio_complete(&pgio);
@@ -153,6 +159,7 @@ static void nfs_page_group_set_uptodate(struct nfs_page *req)
static void nfs_read_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;
+ int error;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -179,14 +186,19 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
zero_user_segment(page, start, end);
}
}
+ error = 0;
bytes += req->wb_bytes;
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
if (bytes <= hdr->good_bytes)
nfs_page_group_set_uptodate(req);
+ else {
+ error = hdr->error;
+ xchg(&nfs_req_openctx(req)->error, error);
+ }
} else
nfs_page_group_set_uptodate(req);
nfs_list_remove_request(req);
- nfs_readpage_release(req);
+ nfs_readpage_release(req, error);
}
out:
hdr->release(hdr);
@@ -213,7 +225,7 @@ nfs_async_read_error(struct list_head *head, int error)
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_readpage_release(req);
+ nfs_readpage_release(req, error);
}
}
@@ -337,8 +349,13 @@ int nfs_readpage(struct file *file, struct page *page)
goto out;
}
+ xchg(&ctx->error, 0);
error = nfs_readpage_async(ctx, inode, page);
-
+ if (!error) {
+ error = wait_on_page_locked_killable(page);
+ if (!PageUptodate(page) && !error)
+ error = xchg(&ctx->error, 0);
+ }
out:
put_nfs_open_context(ctx);
return error;
@@ -372,8 +389,8 @@ readpage_async_filler(void *data, struct page *page)
zero_user_segment(page, len, PAGE_SIZE);
if (!nfs_pageio_add_request(desc->pgio, new)) {
nfs_list_remove_request(new);
- nfs_readpage_release(new);
error = desc->pgio->pg_error;
+ nfs_readpage_release(new, error);
goto out;
}
return 0;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 628631e2e34f..703f595dce90 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2260,6 +2260,7 @@ nfs_compare_remount_data(struct nfs_server *nfss,
data->acdirmin != nfss->acdirmin / HZ ||
data->acdirmax != nfss->acdirmax / HZ ||
data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
+ (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
data->nfs_server.port != nfss->port ||
data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
!rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 92d9cadc6102..85ca49549b39 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -57,6 +57,7 @@ static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_inode_remove_request(struct nfs_page *req);
static void nfs_clear_request_commit(struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
@@ -591,23 +592,13 @@ release_request:
static void nfs_write_error(struct nfs_page *req, int error)
{
+ nfs_set_pageerror(page_file_mapping(req->wb_page));
nfs_mapping_set_error(req->wb_page, error);
+ nfs_inode_remove_request(req);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
-static bool
-nfs_error_is_fatal_on_server(int err)
-{
- switch (err) {
- case 0:
- case -ERESTARTSYS:
- case -EINTR:
- return false;
- }
- return nfs_error_is_fatal(err);
-}
-
/*
* Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request().
@@ -615,7 +606,6 @@ nfs_error_is_fatal_on_server(int err)
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct page *page)
{
- struct address_space *mapping;
struct nfs_page *req;
int ret = 0;
@@ -630,12 +620,11 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
/* If there is a fatal error that covers this write, just exit */
- ret = 0;
- mapping = page_file_mapping(page);
- if (test_bit(AS_ENOSPC, &mapping->flags) ||
- test_bit(AS_EIO, &mapping->flags))
+ ret = pgio->pg_error;
+ if (nfs_error_is_fatal_on_server(ret))
goto out_launder;
+ ret = 0;
if (!nfs_pageio_add_request(pgio, req)) {
ret = pgio->pg_error;
/*
@@ -647,6 +636,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
} else
ret = -EAGAIN;
nfs_redirty_request(req);
+ pgio->pg_error = 0;
} else
nfs_add_stats(page_file_mapping(page)->host,
NFSIOS_WRITEPAGES, 1);
@@ -666,7 +656,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
ret = nfs_page_async_flush(pgio, page);
if (ret == -EAGAIN) {
redirty_page_for_writepage(wbc, page);
- ret = 0;
+ ret = AOP_WRITEPAGE_ACTIVATE;
}
return ret;
}
@@ -685,10 +675,11 @@ static int nfs_writepage_locked(struct page *page,
nfs_pageio_init_write(&pgio, inode, 0,
false, &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
+ pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
if (err < 0)
return err;
- if (pgio.pg_error < 0)
+ if (nfs_error_is_fatal(pgio.pg_error))
return pgio.pg_error;
return 0;
}
@@ -698,7 +689,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
int ret;
ret = nfs_writepage_locked(page, wbc);
- unlock_page(page);
+ if (ret != AOP_WRITEPAGE_ACTIVATE)
+ unlock_page(page);
return ret;
}
@@ -707,7 +699,8 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
int ret;
ret = nfs_do_writepage(page, wbc, data);
- unlock_page(page);
+ if (ret != AOP_WRITEPAGE_ACTIVATE)
+ unlock_page(page);
return ret;
}
@@ -733,13 +726,14 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
&nfs_async_write_completion_ops);
pgio.pg_io_completion = ioc;
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
+ pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
nfs_io_completion_put(ioc);
if (err < 0)
goto out_err;
err = pgio.pg_error;
- if (err < 0)
+ if (nfs_error_is_fatal(err))
goto out_err;
return 0;
out_err:
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 26ad75ae2be0..96352ab7bd81 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -571,7 +571,7 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- struct nfsd_net *nn = v;
+ struct nfsd_net *nn = m->private;
seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 13c548733860..3cf4f6aa48d6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1171,13 +1171,17 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
return inode;
}
-static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, struct nfsdfs_client *ncl)
{
struct inode *inode;
inode = nfsd_get_inode(dir->i_sb, mode);
if (!inode)
return -ENOMEM;
+ if (ncl) {
+ inode->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
d_add(dentry, inode);
inc_nlink(dir);
fsnotify_mkdir(dir, dentry);
@@ -1194,17 +1198,14 @@ static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *nc
dentry = d_alloc_name(parent, name);
if (!dentry)
goto out_err;
- ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600);
+ ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600, ncl);
if (ret)
goto out_err;
- if (ncl) {
- d_inode(dentry)->i_private = ncl;
- kref_get(&ncl->cl_ref);
- }
out:
inode_unlock(dir);
return dentry;
out_err:
+ dput(dentry);
dentry = ERR_PTR(ret);
goto out;
}
@@ -1214,11 +1215,9 @@ static void clear_ncl(struct inode *inode)
struct nfsdfs_client *ncl = inode->i_private;
inode->i_private = NULL;
- synchronize_rcu();
kref_put(&ncl->cl_ref, ncl->cl_release);
}
-
static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
{
struct nfsdfs_client *nc = inode->i_private;
@@ -1232,9 +1231,9 @@ struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
{
struct nfsdfs_client *nc;
- rcu_read_lock();
+ inode_lock_shared(inode);
nc = __get_nfsdfs_client(inode);
- rcu_read_unlock();
+ inode_unlock_shared(inode);
return nc;
}
/* from __rpc_unlink */
diff --git a/fs/read_write.c b/fs/read_write.c
index 1f5088dec566..5bbf587f5bc1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1811,10 +1811,7 @@ static int generic_remap_check_len(struct inode *inode_in,
return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
}
-/*
- * Read a page's worth of file data into the page cache. Return the page
- * locked.
- */
+/* Read a page's worth of file data into the page cache. */
static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
{
struct page *page;
@@ -1826,11 +1823,33 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
put_page(page);
return ERR_PTR(-EIO);
}
- lock_page(page);
return page;
}
/*
+ * Lock two pages, ensuring that we lock in offset order if the pages are from
+ * the same file.
+ */
+static void vfs_lock_two_pages(struct page *page1, struct page *page2)
+{
+ /* Always lock in order of increasing index. */
+ if (page1->index > page2->index)
+ swap(page1, page2);
+
+ lock_page(page1);
+ if (page1 != page2)
+ lock_page(page2);
+}
+
+/* Unlock two pages, being careful not to unlock the same page twice. */
+static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
+{
+ unlock_page(page1);
+ if (page1 != page2)
+ unlock_page(page2);
+}
+
+/*
* Compare extents of two files to see if they are the same.
* Caller must have locked both inodes to prevent write races.
*/
@@ -1867,10 +1886,24 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
dest_page = vfs_dedupe_get_page(dest, destoff);
if (IS_ERR(dest_page)) {
error = PTR_ERR(dest_page);
- unlock_page(src_page);
put_page(src_page);
goto out_error;
}
+
+ vfs_lock_two_pages(src_page, dest_page);
+
+ /*
+ * Now that we've locked both pages, make sure they're still
+ * mapped to the file data we're interested in. If not,
+ * someone is invalidating pages on us and we lose.
+ */
+ if (!PageUptodate(src_page) || !PageUptodate(dest_page) ||
+ src_page->mapping != src->i_mapping ||
+ dest_page->mapping != dest->i_mapping) {
+ same = false;
+ goto unlock;
+ }
+
src_addr = kmap_atomic(src_page);
dest_addr = kmap_atomic(dest_page);
@@ -1882,8 +1915,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
kunmap_atomic(dest_addr);
kunmap_atomic(src_addr);
- unlock_page(dest_page);
- unlock_page(src_page);
+unlock:
+ vfs_unlock_two_pages(src_page, dest_page);
put_page(dest_page);
put_page(src_page);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 04f09689cd6d..1600034a929b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -119,6 +119,7 @@ static int traverse(struct seq_file *m, loff_t offset)
}
if (seq_has_overflowed(m))
goto Eoverflow;
+ p = m->op->next(m, p, &m->index);
if (pos + m->count > offset) {
m->from = offset - pos;
m->count -= m->from;
@@ -126,7 +127,6 @@ static int traverse(struct seq_file *m, loff_t offset)
}
pos += m->count;
m->count = 0;
- p = m->op->next(m, p, &m->index);
if (pos == offset)
break;
}
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 80d7301ab76d..c0b84e960b20 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -51,7 +51,7 @@
static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
down_read(&c->vfs_sb->s_umount);
- writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
+ writeback_inodes_sb_nr(c->vfs_sb, nr_to_write, WB_REASON_FS_FREE_SPACE);
up_read(&c->vfs_sb->s_umount);
}
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index b52624e28fa1..3b4b4114f208 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -129,7 +129,6 @@ static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o)
static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph)
{
if (orph->del) {
- spin_unlock(&c->orphan_lock);
dbg_gen("deleted twice ino %lu", orph->inum);
return;
}
@@ -138,7 +137,6 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph)
orph->del = 1;
orph->dnext = c->orph_dnext;
c->orph_dnext = orph;
- spin_unlock(&c->orphan_lock);
dbg_gen("delete later ino %lu", orph->inum);
return;
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 2c0803b0ac3a..8c1d571334bc 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -609,6 +609,10 @@ static int init_constants_early(struct ubifs_info *c)
c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
if (c->max_bu_buf_len > c->leb_size)
c->max_bu_buf_len = c->leb_size;
+
+ /* Log is ready, preserve one LEB for commits. */
+ c->min_log_bytes = c->leb_size;
+
return 0;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ccbdbd62f0d8..fe6d804a38dc 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -880,6 +880,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
/* len == 0 means wake all */
struct userfaultfd_wake_range range = { .len = 0, };
unsigned long new_flags;
+ bool still_valid;
WRITE_ONCE(ctx->released, true);
@@ -895,8 +896,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_sem for writing.
*/
down_write(&mm->mmap_sem);
- if (!mmget_still_valid(mm))
- goto skip_mm;
+ still_valid = mmget_still_valid(mm);
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
@@ -907,19 +907,20 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
continue;
}
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
- prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
- new_flags, vma->anon_vma,
- vma->vm_file, vma->vm_pgoff,
- vma_policy(vma),
- NULL_VM_UFFD_CTX);
- if (prev)
- vma = prev;
- else
- prev = vma;
+ if (still_valid) {
+ prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
+ new_flags, vma->anon_vma,
+ vma->vm_file, vma->vm_pgoff,
+ vma_policy(vma),
+ NULL_VM_UFFD_CTX);
+ if (prev)
+ vma = prev;
+ else
+ prev = vma;
+ }
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
-skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
wakeup:
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index baf0b72c0a37..07aad70f3931 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3835,15 +3835,28 @@ xfs_bmapi_read(
XFS_STATS_INC(mp, xs_blk_mapr);
ifp = XFS_IFORK_PTR(ip, whichfork);
+ if (!ifp) {
+ /* No CoW fork? Return a hole. */
+ if (whichfork == XFS_COW_FORK) {
+ mval->br_startoff = bno;
+ mval->br_startblock = HOLESTARTBLOCK;
+ mval->br_blockcount = len;
+ mval->br_state = XFS_EXT_NORM;
+ *nmap = 1;
+ return 0;
+ }
- /* No CoW fork? Return a hole. */
- if (whichfork == XFS_COW_FORK && !ifp) {
- mval->br_startoff = bno;
- mval->br_startblock = HOLESTARTBLOCK;
- mval->br_blockcount = len;
- mval->br_state = XFS_EXT_NORM;
- *nmap = 1;
- return 0;
+ /*
+ * A missing attr ifork implies that the inode says we're in
+ * extents or btree format but failed to pass the inode fork
+ * verifier while trying to load it. Treat that as a file
+ * corruption too.
+ */
+#ifdef DEBUG
+ xfs_alert(mp, "%s: inode %llu missing fork %d",
+ __func__, ip->i_ino, whichfork);
+#endif /* DEBUG */
+ return -EFSCORRUPTED;
}
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index d1c77fd0815d..0bf56e94bfe9 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -487,10 +487,8 @@ xfs_da3_split(
ASSERT(state->path.active == 0);
oldblk = &state->path.blk[0];
error = xfs_da3_root_split(state, oldblk, addblk);
- if (error) {
- addblk->bp = NULL;
- return error; /* GROT: dir is inconsistent */
- }
+ if (error)
+ goto out;
/*
* Update pointers to the node which used to be block 0 and just got
@@ -505,7 +503,10 @@ xfs_da3_split(
*/
node = oldblk->bp->b_addr;
if (node->hdr.info.forw) {
- ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno);
+ if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
node = addblk->bp->b_addr;
node->hdr.info.back = cpu_to_be32(oldblk->blkno);
xfs_trans_log_buf(state->args->trans, addblk->bp,
@@ -514,15 +515,19 @@ xfs_da3_split(
}
node = oldblk->bp->b_addr;
if (node->hdr.info.back) {
- ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno);
+ if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
node = addblk->bp->b_addr;
node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
xfs_trans_log_buf(state->args->trans, addblk->bp,
XFS_DA_LOGRANGE(node, &node->hdr.info,
sizeof(node->hdr.info)));
}
+out:
addblk->bp = NULL;
- return 0;
+ return error;
}
/*
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index afcc6642690a..1fc44efc344d 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -741,7 +741,8 @@ xfs_dir2_leafn_lookup_for_entry(
ents = dp->d_ops->leaf_ents_p(leaf);
xfs_dir3_leaf_check(dp, bp);
- ASSERT(leafhdr.count > 0);
+ if (leafhdr.count <= 0)
+ return -EFSCORRUPTED;
/*
* Look up the hash value in the leaf entries.
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 7fcf7569743f..7bd7534f5051 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -547,63 +547,12 @@ xfs_file_compat_ioctl(
struct inode *inode = file_inode(filp);
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- void __user *arg = (void __user *)p;
+ void __user *arg = compat_ptr(p);
int error;
trace_xfs_file_compat_ioctl(ip);
switch (cmd) {
- /* No size or alignment issues on any arch */
- case XFS_IOC_DIOINFO:
- case XFS_IOC_FSGEOMETRY_V4:
- case XFS_IOC_FSGEOMETRY:
- case XFS_IOC_AG_GEOMETRY:
- case XFS_IOC_FSGETXATTR:
- case XFS_IOC_FSSETXATTR:
- case XFS_IOC_FSGETXATTRA:
- case XFS_IOC_FSSETDM:
- case XFS_IOC_GETBMAP:
- case XFS_IOC_GETBMAPA:
- case XFS_IOC_GETBMAPX:
- case XFS_IOC_FSCOUNTS:
- case XFS_IOC_SET_RESBLKS:
- case XFS_IOC_GET_RESBLKS:
- case XFS_IOC_FSGROWFSLOG:
- case XFS_IOC_GOINGDOWN:
- case XFS_IOC_ERROR_INJECTION:
- case XFS_IOC_ERROR_CLEARALL:
- case FS_IOC_GETFSMAP:
- case XFS_IOC_SCRUB_METADATA:
- case XFS_IOC_BULKSTAT:
- case XFS_IOC_INUMBERS:
- return xfs_file_ioctl(filp, cmd, p);
-#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
- /*
- * These are handled fine if no alignment issues. To support x32
- * which uses native 64-bit alignment we must emit these cases in
- * addition to the ia-32 compat set below.
- */
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_FREESP:
- case XFS_IOC_RESVSP:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- case XFS_IOC_FSGEOMETRY_V1:
- case XFS_IOC_FSGROWFSDATA:
- case XFS_IOC_FSGROWFSRT:
- case XFS_IOC_ZERO_RANGE:
-#ifdef CONFIG_X86_X32
- /*
- * x32 special: this gets a different cmd number from the ia-32 compat
- * case below; the associated data will match native 64-bit alignment.
- */
- case XFS_IOC_SWAPEXT:
-#endif
- return xfs_file_ioctl(filp, cmd, p);
-#endif
#if defined(BROKEN_X86_ALIGNMENT)
case XFS_IOC_ALLOCSP_32:
case XFS_IOC_FREESP_32:
@@ -705,6 +654,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSSETDM_BY_HANDLE_32:
return xfs_compat_fssetdm_by_handle(filp, arg);
default:
- return -ENOIOCTLCMD;
+ /* try the native version */
+ return xfs_file_ioctl(filp, cmd, (unsigned long)arg);
}
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ff3c1fae5357..fe285d123d69 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -793,6 +793,7 @@ xfs_setattr_nonsize(
out_cancel:
xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_dqrele:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 00e9f5c388d3..7fc3c1ad36bc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -429,10 +429,7 @@ xfs_log_reserve(
ASSERT(*ticp == NULL);
tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
- KM_SLEEP | KM_MAYFAIL);
- if (!tic)
- return -ENOMEM;
-
+ KM_SLEEP);
*ticp = tic;
xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 0c954cad7449..a339bd5fa260 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -32,7 +32,7 @@ xfs_break_leased_layouts(
struct xfs_inode *ip = XFS_I(inode);
int error;
- while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
+ while ((error = break_layout(inode, false)) == -EWOULDBLOCK) {
xfs_iunlock(ip, *iolock);
*did_unlock = true;
error = break_layout(inode, true);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c4ec7afd1170..edbe37b7f636 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1190,11 +1190,11 @@ xfs_reflink_remap_blocks(
}
/*
- * Grab the exclusive iolock for a data copy from src to dest, making
- * sure to abide vfs locking order (lowest pointer value goes first) and
- * breaking the pnfs layout leases on dest before proceeding. The loop
- * is needed because we cannot call the blocking break_layout() with the
- * src iolock held, and therefore have to back out both locks.
+ * Grab the exclusive iolock for a data copy from src to dest, making sure to
+ * abide vfs locking order (lowest pointer value goes first) and breaking the
+ * layout leases before proceeding. The loop is needed because we cannot call
+ * the blocking break_layout() with the iolocks held, and therefore have to
+ * back out both locks.
*/
static int
xfs_iolock_two_inodes_and_break_layout(
@@ -1203,33 +1203,44 @@ xfs_iolock_two_inodes_and_break_layout(
{
int error;
-retry:
- if (src < dest) {
- inode_lock_shared(src);
- inode_lock_nested(dest, I_MUTEX_NONDIR2);
- } else {
- /* src >= dest */
- inode_lock(dest);
- }
+ if (src > dest)
+ swap(src, dest);
- error = break_layout(dest, false);
- if (error == -EWOULDBLOCK) {
- inode_unlock(dest);
- if (src < dest)
- inode_unlock_shared(src);
+retry:
+ /* Wait to break both inodes' layouts before we start locking. */
+ error = break_layout(src, true);
+ if (error)
+ return error;
+ if (src != dest) {
error = break_layout(dest, true);
if (error)
return error;
- goto retry;
}
+
+ /* Lock one inode and make sure nobody got in and leased it. */
+ inode_lock(src);
+ error = break_layout(src, false);
if (error) {
+ inode_unlock(src);
+ if (error == -EWOULDBLOCK)
+ goto retry;
+ return error;
+ }
+
+ if (src == dest)
+ return 0;
+
+ /* Lock the other inode and make sure nobody got in and leased it. */
+ inode_lock_nested(dest, I_MUTEX_NONDIR2);
+ error = break_layout(dest, false);
+ if (error) {
+ inode_unlock(src);
inode_unlock(dest);
- if (src < dest)
- inode_unlock_shared(src);
+ if (error == -EWOULDBLOCK)
+ goto retry;
return error;
}
- if (src > dest)
- inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
+
return 0;
}
@@ -1247,10 +1258,10 @@ xfs_reflink_remap_unlock(
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (!same_inode)
- xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
+ xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
inode_unlock(inode_out);
if (!same_inode)
- inode_unlock_shared(inode_in);
+ inode_unlock(inode_in);
}
/*
@@ -1325,7 +1336,7 @@ xfs_reflink_remap_prep(
if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
else
- xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest,
+ xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest,
XFS_MMAPLOCK_EXCL);
/* Check file eligibility and prepare for block sharing. */