summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/inode.c70
-rw-r--r--fs/btrfs/raid56.c34
-rw-r--r--fs/btrfs/volumes.c10
-rw-r--r--fs/btrfs/volumes.h6
-rw-r--r--fs/ceph/addr.c24
-rw-r--r--fs/ceph/cache.c12
-rw-r--r--fs/char_dev.c58
-rw-r--r--fs/cifs/connect.c22
-rw-r--r--fs/cifs/dir.c18
-rw-r--r--fs/cifs/smb2pdu.c11
-rw-r--r--fs/cifs/smb2pdu.h4
-rw-r--r--fs/dax.c29
-rw-r--r--fs/devpts/inode.c69
-rw-r--r--fs/eventpoll.c42
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/xattr.c6
-rw-r--r--fs/gfs2/acl.c30
-rw-r--r--fs/gfs2/aops.c14
-rw-r--r--fs/gfs2/bmap.c24
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c3
-rw-r--r--fs/gfs2/glock.c137
-rw-r--r--fs/gfs2/glock.h36
-rw-r--r--fs/gfs2/glops.c30
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c17
-rw-r--r--fs/gfs2/lock_dlm.c5
-rw-r--r--fs/gfs2/log.c13
-rw-r--r--fs/gfs2/lops.c7
-rw-r--r--fs/gfs2/meta_io.c9
-rw-r--r--fs/gfs2/ops_fstype.c7
-rw-r--r--fs/gfs2/quota.c7
-rw-r--r--fs/gfs2/rgrp.c3
-rw-r--r--fs/gfs2/super.c71
-rw-r--r--fs/gfs2/util.h1
-rw-r--r--fs/gfs2/xattr.c9
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/nfsd/nfs4xdr.c6
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/overlayfs/readdir.c4
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/proc/devices.c8
-rw-r--r--fs/read_write.c50
-rw-r--r--fs/select.c6
-rw-r--r--fs/userfaultfd.c25
47 files changed, 645 insertions, 330 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 080e2ebb8aa0..f45b61fe9a9a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3516,7 +3516,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
struct bio *bio = device->flush_bio;
if (!device->flush_bio_sent)
- return 0;
+ return BLK_STS_OK;
device->flush_bio_sent = 0;
wait_for_completion_io(&device->flush_wait);
@@ -3563,7 +3563,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
continue;
write_dev_flush(dev);
- dev->last_flush_error = 0;
+ dev->last_flush_error = BLK_STS_OK;
}
/* wait for all the barriers */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95c212037095..24bcd5cd9cf2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7924,11 +7924,12 @@ err:
return ret;
}
-static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
- int mirror_num)
+static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
+ struct bio *bio,
+ int mirror_num)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int ret;
+ blk_status_t ret;
BUG_ON(bio_op(bio) == REQ_OP_WRITE);
@@ -7980,10 +7981,10 @@ static int btrfs_check_dio_repairable(struct inode *inode,
return 1;
}
-static int dio_read_error(struct inode *inode, struct bio *failed_bio,
- struct page *page, unsigned int pgoff,
- u64 start, u64 end, int failed_mirror,
- bio_end_io_t *repair_endio, void *repair_arg)
+static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
+ struct page *page, unsigned int pgoff,
+ u64 start, u64 end, int failed_mirror,
+ bio_end_io_t *repair_endio, void *repair_arg)
{
struct io_failure_record *failrec;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -7993,18 +7994,19 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
int read_mode = 0;
int segs;
int ret;
+ blk_status_t status;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
- return ret;
+ return errno_to_blk_status(ret);
ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
failed_mirror);
if (!ret) {
free_io_failure(failure_tree, io_tree, failrec);
- return -EIO;
+ return BLK_STS_IOERR;
}
segs = bio_segments(failed_bio);
@@ -8022,13 +8024,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
"Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
- if (ret) {
+ status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
+ if (status) {
free_io_failure(failure_tree, io_tree, failrec);
bio_put(bio);
}
- return ret;
+ return status;
}
struct btrfs_retry_complete {
@@ -8065,8 +8067,8 @@ end:
bio_put(bio);
}
-static int __btrfs_correct_data_nocsum(struct inode *inode,
- struct btrfs_io_bio *io_bio)
+static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
+ struct btrfs_io_bio *io_bio)
{
struct btrfs_fs_info *fs_info;
struct bio_vec bvec;
@@ -8076,8 +8078,8 @@ static int __btrfs_correct_data_nocsum(struct inode *inode,
unsigned int pgoff;
u32 sectorsize;
int nr_sectors;
- int ret;
- int err = 0;
+ blk_status_t ret;
+ blk_status_t err = BLK_STS_OK;
fs_info = BTRFS_I(inode)->root->fs_info;
sectorsize = fs_info->sectorsize;
@@ -8183,11 +8185,12 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
int csum_pos;
bool uptodate = (err == 0);
int ret;
+ blk_status_t status;
fs_info = BTRFS_I(inode)->root->fs_info;
sectorsize = fs_info->sectorsize;
- err = 0;
+ err = BLK_STS_OK;
start = io_bio->logical;
done.inode = inode;
io_bio->bio.bi_iter = io_bio->iter;
@@ -8209,12 +8212,12 @@ try_again:
done.start = start;
init_completion(&done.done);
- ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
- pgoff, start, start + sectorsize - 1,
- io_bio->mirror_num,
- btrfs_retry_endio, &done);
- if (ret) {
- err = errno_to_blk_status(ret);
+ status = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
+ pgoff, start, start + sectorsize - 1,
+ io_bio->mirror_num, btrfs_retry_endio,
+ &done);
+ if (status) {
+ err = status;
goto next;
}
@@ -8250,7 +8253,7 @@ static blk_status_t btrfs_subio_endio_read(struct inode *inode,
if (unlikely(err))
return __btrfs_correct_data_nocsum(inode, io_bio);
else
- return 0;
+ return BLK_STS_OK;
} else {
return __btrfs_subio_endio_read(inode, io_bio, err);
}
@@ -8423,9 +8426,9 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
return 0;
}
-static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
- u64 file_offset, int skip_sum,
- int async_submit)
+static inline blk_status_t
+__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
+ int skip_sum, int async_submit)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
@@ -8488,6 +8491,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
int clone_offset = 0;
int clone_len;
int ret;
+ blk_status_t status;
map_length = orig_bio->bi_iter.bi_size;
submit_len = map_length;
@@ -8537,9 +8541,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
*/
atomic_inc(&dip->pending_bios);
- ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
- async_submit);
- if (ret) {
+ status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
+ async_submit);
+ if (status) {
bio_put(bio);
atomic_dec(&dip->pending_bios);
goto out_err;
@@ -8557,9 +8561,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
} while (submit_len > 0);
submit:
- ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
- async_submit);
- if (!ret)
+ status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
+ async_submit);
+ if (!status)
return 0;
bio_put(bio);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 208638384cd2..2cf6ba40f7c4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -905,7 +905,7 @@ static void raid_write_end_io(struct bio *bio)
if (!atomic_dec_and_test(&rbio->stripes_pending))
return;
- err = 0;
+ err = BLK_STS_OK;
/* OK, we have read all the stripes we need to. */
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
@@ -1324,7 +1324,7 @@ write_data:
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
}
/*
@@ -1475,7 +1475,7 @@ static void raid_rmw_end_io(struct bio *bio)
cleanup:
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
}
static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
@@ -1579,7 +1579,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
return 0;
cleanup:
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
return -EIO;
finish:
@@ -1795,12 +1795,12 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
void **pointers;
int faila = -1, failb = -1;
struct page *page;
- int err;
+ blk_status_t err;
int i;
pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
if (!pointers) {
- err = -ENOMEM;
+ err = BLK_STS_RESOURCE;
goto cleanup_io;
}
@@ -1856,7 +1856,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
* a bad data or Q stripe.
* TODO, we should redo the xor here.
*/
- err = -EIO;
+ err = BLK_STS_IOERR;
goto cleanup;
}
/*
@@ -1882,7 +1882,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
if (rbio->bbio->raid_map[faila] ==
RAID5_P_STRIPE) {
- err = -EIO;
+ err = BLK_STS_IOERR;
goto cleanup;
}
/*
@@ -1954,13 +1954,13 @@ pstripe:
}
}
- err = 0;
+ err = BLK_STS_OK;
cleanup:
kfree(pointers);
cleanup_io:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
- if (err == 0)
+ if (err == BLK_STS_OK)
cache_rbio_pages(rbio);
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -1968,7 +1968,7 @@ cleanup_io:
rbio_orig_end_io(rbio, err);
} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
rbio_orig_end_io(rbio, err);
- } else if (err == 0) {
+ } else if (err == BLK_STS_OK) {
rbio->faila = -1;
rbio->failb = -1;
@@ -2005,7 +2005,7 @@ static void raid_recover_end_io(struct bio *bio)
return;
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
else
__raid_recover_end_io(rbio);
}
@@ -2104,7 +2104,7 @@ out:
cleanup:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
return -EIO;
}
@@ -2431,7 +2431,7 @@ submit_write:
nr_data = bio_list_size(&bio_list);
if (!nr_data) {
/* Every parity is right */
- rbio_orig_end_io(rbio, 0);
+ rbio_orig_end_io(rbio, BLK_STS_OK);
return;
}
@@ -2451,7 +2451,7 @@ submit_write:
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
}
static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
@@ -2519,7 +2519,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
}
/*
@@ -2633,7 +2633,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO);
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
return;
finish:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e8b9a269fdde..bd679bc7a1a9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6212,8 +6212,8 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
}
}
-int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
- int mirror_num, int async_submit)
+blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ int mirror_num, int async_submit)
{
struct btrfs_device *dev;
struct bio *first_bio = bio;
@@ -6233,7 +6233,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
&map_length, &bbio, mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(fs_info);
- return ret;
+ return errno_to_blk_status(ret);
}
total_devs = bbio->num_stripes;
@@ -6256,7 +6256,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
}
btrfs_bio_counter_dec(fs_info);
- return ret;
+ return errno_to_blk_status(ret);
}
if (map_length < length) {
@@ -6283,7 +6283,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
dev_nr, async_submit);
}
btrfs_bio_counter_dec(fs_info);
- return 0;
+ return BLK_STS_OK;
}
struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 6f45fd60d15a..93277fc60930 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -74,7 +74,7 @@ struct btrfs_device {
int missing;
int can_discard;
int is_tgtdev_for_dev_replace;
- int last_flush_error;
+ blk_status_t last_flush_error;
int flush_bio_sent;
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
@@ -416,8 +416,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 type);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
-int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
- int mirror_num, int async_submit);
+blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ int mirror_num, int async_submit);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 50836280a6f8..1bc709fe330a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -189,7 +189,7 @@ static int ceph_releasepage(struct page *page, gfp_t g)
/*
* read a single page, without unlocking it.
*/
-static int readpage_nounlock(struct file *filp, struct page *page)
+static int ceph_do_readpage(struct file *filp, struct page *page)
{
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -219,7 +219,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
err = ceph_readpage_from_fscache(inode, page);
if (err == 0)
- goto out;
+ return -EINPROGRESS;
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
@@ -249,8 +249,11 @@ out:
static int ceph_readpage(struct file *filp, struct page *page)
{
- int r = readpage_nounlock(filp, page);
- unlock_page(page);
+ int r = ceph_do_readpage(filp, page);
+ if (r != -EINPROGRESS)
+ unlock_page(page);
+ else
+ r = 0;
return r;
}
@@ -1237,7 +1240,7 @@ retry_locked:
goto retry_locked;
r = writepage_nounlock(page, NULL);
if (r < 0)
- goto fail_nosnap;
+ goto fail_unlock;
goto retry_locked;
}
@@ -1265,11 +1268,14 @@ retry_locked:
}
/* we need to read it. */
- r = readpage_nounlock(file, page);
- if (r < 0)
- goto fail_nosnap;
+ r = ceph_do_readpage(file, page);
+ if (r < 0) {
+ if (r == -EINPROGRESS)
+ return -EAGAIN;
+ goto fail_unlock;
+ }
goto retry_locked;
-fail_nosnap:
+fail_unlock:
unlock_page(page);
return r;
}
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index fd1172823f86..337f88673ed9 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -297,13 +297,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
}
}
-static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
-{
- if (!error)
- SetPageUptodate(page);
-}
-
-static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error)
+static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error)
{
if (!error)
SetPageUptodate(page);
@@ -331,7 +325,7 @@ int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
return -ENOBUFS;
ret = fscache_read_or_alloc_page(ci->fscache, page,
- ceph_vfs_readpage_complete, NULL,
+ ceph_readpage_from_fscache_complete, NULL,
GFP_KERNEL);
switch (ret) {
@@ -360,7 +354,7 @@ int ceph_readpages_from_fscache(struct inode *inode,
return -ENOBUFS;
ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
- ceph_vfs_readpage_complete_unlock,
+ ceph_readpage_from_fscache_complete,
NULL, mapping_gfp_mask(mapping));
switch (ret) {
diff --git a/fs/char_dev.c b/fs/char_dev.c
index fb8507f521b2..ebcc8fb3fa66 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -28,6 +28,8 @@ static struct kobj_map *cdev_map;
static DEFINE_MUTEX(chrdevs_lock);
+#define CHRDEV_MAJOR_HASH_SIZE 255
+
static struct char_device_struct {
struct char_device_struct *next;
unsigned int major;
@@ -49,16 +51,39 @@ void chrdev_show(struct seq_file *f, off_t offset)
{
struct char_device_struct *cd;
- if (offset < CHRDEV_MAJOR_HASH_SIZE) {
- mutex_lock(&chrdevs_lock);
- for (cd = chrdevs[offset]; cd; cd = cd->next)
+ mutex_lock(&chrdevs_lock);
+ for (cd = chrdevs[major_to_index(offset)]; cd; cd = cd->next) {
+ if (cd->major == offset)
seq_printf(f, "%3d %s\n", cd->major, cd->name);
- mutex_unlock(&chrdevs_lock);
}
+ mutex_unlock(&chrdevs_lock);
}
#endif /* CONFIG_PROC_FS */
+static int find_dynamic_major(void)
+{
+ int i;
+ struct char_device_struct *cd;
+
+ for (i = ARRAY_SIZE(chrdevs)-1; i > CHRDEV_MAJOR_DYN_END; i--) {
+ if (chrdevs[i] == NULL)
+ return i;
+ }
+
+ for (i = CHRDEV_MAJOR_DYN_EXT_START;
+ i > CHRDEV_MAJOR_DYN_EXT_END; i--) {
+ for (cd = chrdevs[major_to_index(i)]; cd; cd = cd->next)
+ if (cd->major == i)
+ break;
+
+ if (cd == NULL || cd->major != i)
+ return i;
+ }
+
+ return -EBUSY;
+}
+
/*
* Register a single major with a specified minor range.
*
@@ -84,22 +109,21 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
mutex_lock(&chrdevs_lock);
- /* temporary */
if (major == 0) {
- for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
- if (chrdevs[i] == NULL)
- break;
- }
-
- if (i < CHRDEV_MAJOR_DYN_END)
- pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range\n",
- name, i);
-
- if (i == 0) {
- ret = -EBUSY;
+ ret = find_dynamic_major();
+ if (ret < 0) {
+ pr_err("CHRDEV \"%s\" dynamic allocation region is full\n",
+ name);
goto out;
}
- major = i;
+ major = ret;
+ }
+
+ if (major >= CHRDEV_MAJOR_MAX) {
+ pr_err("CHRDEV \"%s\" major requested (%d) is greater than the maximum (%d)\n",
+ name, major, CHRDEV_MAJOR_MAX);
+ ret = -EINVAL;
+ goto out;
}
cd->major = major;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 59647eb72c5f..83a8f52cd879 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1223,6 +1223,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
char *tmp_end, *value;
char delim;
bool got_ip = false;
+ bool got_version = false;
unsigned short port = 0;
struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr;
@@ -1874,24 +1875,35 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
pr_warn("CIFS: server netbiosname longer than 15 truncated.\n");
break;
case Opt_ver:
+ /* version of mount userspace tools, not dialect */
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
+ /* If interface changes in mount.cifs bump to new ver */
if (strncasecmp(string, "1", 1) == 0) {
+ if (strlen(string) > 1) {
+ pr_warn("Bad mount helper ver=%s. Did "
+ "you want SMB1 (CIFS) dialect "
+ "and mean to type vers=1.0 "
+ "instead?\n", string);
+ goto cifs_parse_mount_err;
+ }
/* This is the default */
break;
}
/* For all other value, error */
- pr_warn("CIFS: Invalid version specified\n");
+ pr_warn("CIFS: Invalid mount helper version specified\n");
goto cifs_parse_mount_err;
case Opt_vers:
+ /* protocol version (dialect) */
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
if (cifs_parse_smb_version(string, vol) != 0)
goto cifs_parse_mount_err;
+ got_version = true;
break;
case Opt_sec:
string = match_strdup(args);
@@ -1973,6 +1985,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
else if (override_gid == 1)
pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n");
+ if (got_version == false)
+ pr_warn("No dialect specified on mount. Default has changed to "
+ "a more secure dialect, SMB3 (vers=3.0), from CIFS "
+ "(SMB1). To use the less secure SMB1 dialect to access "
+ "old servers which do not support SMB3 specify vers=1.0"
+ " on mount. For somewhat newer servers such as Windows "
+ "7 try vers=2.1.\n");
+
kfree(mountdata_copy);
return 0;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 56366e984076..e702d48bd023 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -194,15 +194,20 @@ cifs_bp_rename_retry:
}
/*
+ * Don't allow path components longer than the server max.
* Don't allow the separator character in a path component.
* The VFS will not allow "/", but "\" is allowed by posix.
*/
static int
-check_name(struct dentry *direntry)
+check_name(struct dentry *direntry, struct cifs_tcon *tcon)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
int i;
+ if (unlikely(direntry->d_name.len >
+ le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength)))
+ return -ENAMETOOLONG;
+
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
for (i = 0; i < direntry->d_name.len; i++) {
if (direntry->d_name.name[i] == '\\') {
@@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
return finish_no_open(file, res);
}
- rc = check_name(direntry);
- if (rc)
- return rc;
-
xid = get_xid();
cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
@@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
}
tcon = tlink_tcon(tlink);
+
+ rc = check_name(direntry, tcon);
+ if (rc)
+ goto out_free_xid;
+
server = tcon->ses->server;
if (server->ops->new_lease_key)
@@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
}
pTcon = tlink_tcon(tlink);
- rc = check_name(direntry);
+ rc = check_name(direntry, pTcon);
if (rc)
goto lookup_out;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5fb2fc2d0080..7aa67206f6da 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -514,7 +514,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
* No tcon so can't do
* cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
*/
- if (rc != 0)
+ if (rc == -EOPNOTSUPP) {
+ cifs_dbg(VFS, "Dialect not supported by server. Consider "
+ "specifying vers=1.0 or vers=2.1 on mount for accessing"
+ " older servers\n");
+ goto neg_exit;
+ } else if (rc != 0)
goto neg_exit;
cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
@@ -3219,8 +3224,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits);
- kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits);
- kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
+ kst->f_bfree = kst->f_bavail =
+ le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
return;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 18700fd25a0b..2826882c81d1 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -84,8 +84,8 @@
#define NUMBER_OF_SMB2_COMMANDS 0x0013
-/* BB FIXME - analyze following length BB */
-#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
+/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */
+#define MAX_SMB2_HDR_SIZE 0x00b0
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
diff --git a/fs/dax.c b/fs/dax.c
index 306c2b603fb8..ab925dc6647a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -646,11 +646,10 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
pte_t pte, *ptep = NULL;
pmd_t *pmdp = NULL;
spinlock_t *ptl;
- bool changed;
i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
- unsigned long address;
+ unsigned long address, start, end;
cond_resched();
@@ -658,8 +657,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
continue;
address = pgoff_address(index, vma);
- changed = false;
- if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
+
+ /*
+ * Note because we provide start/end to follow_pte_pmd it will
+ * call mmu_notifier_invalidate_range_start() on our behalf
+ * before taking any lock.
+ */
+ if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl))
continue;
if (pmdp) {
@@ -676,7 +680,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
- changed = true;
+ mmu_notifier_invalidate_range(vma->vm_mm, start, end);
unlock_pmd:
spin_unlock(ptl);
#endif
@@ -691,13 +695,12 @@ unlock_pmd:
pte = pte_wrprotect(pte);
pte = pte_mkclean(pte);
set_pte_at(vma->vm_mm, address, ptep, pte);
- changed = true;
+ mmu_notifier_invalidate_range(vma->vm_mm, start, end);
unlock_pte:
pte_unmap_unlock(ptep, ptl);
}
- if (changed)
- mmu_notifier_invalidate_page(vma->vm_mm, address);
+ mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
}
i_mmap_unlock_read(mapping);
}
@@ -1383,6 +1386,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
+ /*
+ * Make sure that the faulting address's PMD offset (color) matches
+ * the PMD offset from the start of the file. This is necessary so
+ * that a PMD range in the page table overlaps exactly with a PMD
+ * range in the radix tree.
+ */
+ if ((vmf->pgoff & PG_PMD_COLOUR) !=
+ ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
+ goto fallback;
+
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
goto fallback;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 44dfbca9306f..7eae33ffa3fc 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -133,7 +133,51 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
return sb->s_fs_info;
}
-struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt)
+static int devpts_ptmx_path(struct path *path)
+{
+ struct super_block *sb;
+ int err;
+
+ /* Has the devpts filesystem already been found? */
+ if (path->mnt->mnt_sb->s_magic == DEVPTS_SUPER_MAGIC)
+ return 0;
+
+ /* Is a devpts filesystem at "pts" in the same directory? */
+ err = path_pts(path);
+ if (err)
+ return err;
+
+ /* Is the path the root of a devpts filesystem? */
+ sb = path->mnt->mnt_sb;
+ if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
+ (path->mnt->mnt_root != sb->s_root))
+ return -ENODEV;
+
+ return 0;
+}
+
+struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi)
+{
+ struct path path;
+ int err;
+
+ path = filp->f_path;
+ path_get(&path);
+
+ err = devpts_ptmx_path(&path);
+ dput(path.dentry);
+ if (err) {
+ mntput(path.mnt);
+ path.mnt = ERR_PTR(err);
+ }
+ if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) {
+ mntput(path.mnt);
+ path.mnt = ERR_PTR(-ENODEV);
+ }
+ return path.mnt;
+}
+
+struct pts_fs_info *devpts_acquire(struct file *filp)
{
struct pts_fs_info *result;
struct path path;
@@ -142,31 +186,18 @@ struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt)
path = filp->f_path;
path_get(&path);
- *ptsmnt = NULL;
- /* Has the devpts filesystem already been found? */
- sb = path.mnt->mnt_sb;
- if (sb->s_magic != DEVPTS_SUPER_MAGIC) {
- /* Is a devpts filesystem at "pts" in the same directory? */
- err = path_pts(&path);
- if (err) {
- result = ERR_PTR(err);
- goto out;
- }
-
- /* Is the path the root of a devpts filesystem? */
- result = ERR_PTR(-ENODEV);
- sb = path.mnt->mnt_sb;
- if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
- (path.mnt->mnt_root != sb->s_root))
- goto out;
+ err = devpts_ptmx_path(&path);
+ if (err) {
+ result = ERR_PTR(err);
+ goto out;
}
/*
* pty code needs to hold extra references in case of last /dev/tty close
*/
+ sb = path.mnt->mnt_sb;
atomic_inc(&sb->s_active);
- *ptsmnt = mntget(path.mnt);
result = DEVPTS_SB(sb);
out:
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e767e4389cb1..adbe328b957c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -600,8 +600,13 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq)
wait_queue_head_t *whead;
rcu_read_lock();
- /* If it is cleared by POLLFREE, it should be rcu-safe */
- whead = rcu_dereference(pwq->whead);
+ /*
+ * If it is cleared by POLLFREE, it should be rcu-safe.
+ * If we read NULL we need a barrier paired with
+ * smp_store_release() in ep_poll_callback(), otherwise
+ * we rely on whead->lock.
+ */
+ whead = smp_load_acquire(&pwq->whead);
if (whead)
remove_wait_queue(whead, &pwq->wait);
rcu_read_unlock();
@@ -1134,17 +1139,6 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
struct eventpoll *ep = epi->ep;
int ewake = 0;
- if ((unsigned long)key & POLLFREE) {
- ep_pwq_from_wait(wait)->whead = NULL;
- /*
- * whead = NULL above can race with ep_remove_wait_queue()
- * which can do another remove_wait_queue() after us, so we
- * can't use __remove_wait_queue(). whead->lock is held by
- * the caller.
- */
- list_del_init(&wait->entry);
- }
-
spin_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1228,10 +1222,26 @@ out_unlock:
if (pwake)
ep_poll_safewake(&ep->poll_wait);
- if (epi->event.events & EPOLLEXCLUSIVE)
- return ewake;
+ if (!(epi->event.events & EPOLLEXCLUSIVE))
+ ewake = 1;
+
+ if ((unsigned long)key & POLLFREE) {
+ /*
+ * If we race with ep_remove_wait_queue() it can miss
+ * ->whead = NULL and do another remove_wait_queue() after
+ * us, so we can't use __remove_wait_queue().
+ */
+ list_del_init(&wait->entry);
+ /*
+ * ->whead != NULL protects us from the race with ep_free()
+ * or ep_remove(), ep_remove_wait_queue() takes whead->lock
+ * held by the caller. Once we nullify it, nothing protects
+ * ep/epi or even wait.
+ */
+ smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL);
+ }
- return 1;
+ return ewake;
}
/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5a1052627a81..701085620cd8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,7 +2300,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
EXT4_MAX_BLOCK_LOG_SIZE);
struct sg {
struct ext4_group_info info;
- ext4_grpblk_t counters[blocksize_bits + 2];
+ ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
} sg;
group--;
@@ -2309,6 +2309,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
" 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
" 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
+ i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
+ sizeof(struct ext4_group_info);
+
grinfo = ext4_get_group_info(sb, group);
/* Load the group info in memory only if not already loaded. */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
@@ -2320,7 +2323,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
buddy_loaded = 1;
}
- memcpy(&sg, ext4_get_group_info(sb, group), sizeof(sg));
+ memcpy(&sg, ext4_get_group_info(sb, group), i);
if (buddy_loaded)
ext4_mb_unload_buddy(&e4b);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 82a5af9f6668..3dd970168448 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1543,7 +1543,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
/* Clear padding bytes. */
memset(val + i->value_len, 0, new_size - i->value_len);
}
- return 0;
+ goto update_hash;
}
/* Compute min_offs and last. */
@@ -1707,6 +1707,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
here->e_value_size = cpu_to_le32(i->value_len);
}
+update_hash:
if (i->value) {
__le32 hash = 0;
@@ -1725,7 +1726,8 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
here->e_name_len,
&crc32c_hash, 1);
} else if (is_block) {
- __le32 *value = s->base + min_offs - new_size;
+ __le32 *value = s->base + le16_to_cpu(
+ here->e_value_offs);
hash = ext4_xattr_hash_entry(here->e_name,
here->e_name_len, value,
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 2524807ee070..9d5eecb123de 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -86,19 +86,6 @@ int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
char *data;
const char *name = gfs2_acl_name(type);
- if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
- return -E2BIG;
-
- if (type == ACL_TYPE_ACCESS) {
- umode_t mode = inode->i_mode;
-
- error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (error)
- return error;
- if (mode != inode->i_mode)
- mark_inode_dirty(inode);
- }
-
if (acl) {
len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
if (len == 0)
@@ -129,6 +116,10 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
struct gfs2_holder gh;
bool need_unlock = false;
int ret;
+ umode_t mode;
+
+ if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
+ return -E2BIG;
ret = gfs2_rsqa_alloc(ip);
if (ret)
@@ -140,7 +131,20 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return ret;
need_unlock = true;
}
+
+ mode = inode->i_mode;
+ if (type == ACL_TYPE_ACCESS && acl) {
+ ret = posix_acl_update_mode(inode, &mode, &acl);
+ if (ret)
+ goto unlock;
+ }
+
ret = __gfs2_set_acl(inode, acl, type);
+ if (!ret && mode != inode->i_mode) {
+ inode->i_mode = mode;
+ mark_inode_dirty(inode);
+ }
+unlock:
if (need_unlock)
gfs2_glock_dq_uninit(&gh);
return ret;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ed7a2e252ad8..68ed06962537 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -234,7 +234,19 @@ out:
static int gfs2_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+ struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
+ int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+
+ /*
+ * Even if we didn't write any pages here, we might still be holding
+ * dirty pages in the ail. We forcibly flush the ail because we don't
+ * want balance_dirty_pages() to loop indefinitely trying to write out
+ * pages held in the ail that it can't find.
+ */
+ if (ret == 0)
+ set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
+
+ return ret;
}
/**
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 9fa3aef9a5b3..3dd0cceefa43 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -291,8 +291,9 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
if (trylock_buffer(rabh)) {
if (!buffer_uptodate(rabh)) {
rabh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META,
- rabh);
+ submit_bh(REQ_OP_READ,
+ REQ_RAHEAD | REQ_META | REQ_PRIO,
+ rabh);
continue;
}
unlock_buffer(rabh);
@@ -1103,8 +1104,15 @@ static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
while (true) {
ptr = metapointer(h, mp);
- if (*ptr) /* if we have a non-null pointer */
+ if (*ptr) { /* if we have a non-null pointer */
+ /* Now zero the metapath after the current height. */
+ h++;
+ if (h < GFS2_MAX_META_HEIGHT)
+ memset(&mp->mp_list[h], 0,
+ (GFS2_MAX_META_HEIGHT - h) *
+ sizeof(mp->mp_list[0]));
return true;
+ }
if (mp->mp_list[h] < ptrs)
mp->mp_list[h]++;
@@ -1120,6 +1128,13 @@ enum dealloc_states {
DEALLOC_DONE = 3, /* process complete */
};
+static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h)
+{
+ if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0])))
+ return false;
+ return true;
+}
+
/**
* trunc_dealloc - truncate a file down to a desired size
* @ip: inode to truncate
@@ -1197,8 +1212,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
/* If we're truncating to a non-zero size and the mp is
at the beginning of file for the strip height, we
need to preserve the first metadata pointer. */
- preserve1 = (newsize &&
- (mp.mp_list[mp_h] == nbof[mp_h]));
+ preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
bh = mp.mp_bh[mp_h];
gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp,
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5ee2e2f8576c..06a0d1947c77 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1513,7 +1513,9 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
continue;
}
bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh);
+ submit_bh(REQ_OP_READ,
+ REQ_RAHEAD | REQ_META | REQ_PRIO,
+ bh);
continue;
}
brelse(bh);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c2062a108d19..bb48074be019 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1030,8 +1030,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
mutex_lock(&fp->f_fl_mutex);
- gl = fl_gh->gh_gl;
- if (gl) {
+ if (gfs2_holder_initialized(fl_gh)) {
if (fl_gh->gh_state == state)
goto out;
locks_lock_file_wait(file,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c38ab6c81898..98e845b7841b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -15,6 +15,7 @@
#include <linux/buffer_head.h>
#include <linux/delay.h>
#include <linux/sort.h>
+#include <linux/hash.h>
#include <linux/jhash.h>
#include <linux/kallsyms.h>
#include <linux/gfs2_ondisk.h>
@@ -71,7 +72,7 @@ static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT)
-static struct rhashtable_params ht_parms = {
+static const struct rhashtable_params ht_parms = {
.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
.key_len = offsetofend(struct lm_lockname, ln_type),
.key_offset = offsetof(struct gfs2_glock, gl_name),
@@ -80,6 +81,49 @@ static struct rhashtable_params ht_parms = {
static struct rhashtable gl_hash_table;
+#define GLOCK_WAIT_TABLE_BITS 12
+#define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
+static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
+
+struct wait_glock_queue {
+ struct lm_lockname *name;
+ wait_queue_entry_t wait;
+};
+
+static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
+ int sync, void *key)
+{
+ struct wait_glock_queue *wait_glock =
+ container_of(wait, struct wait_glock_queue, wait);
+ struct lm_lockname *wait_name = wait_glock->name;
+ struct lm_lockname *wake_name = key;
+
+ if (wake_name->ln_sbd != wait_name->ln_sbd ||
+ wake_name->ln_number != wait_name->ln_number ||
+ wake_name->ln_type != wait_name->ln_type)
+ return 0;
+ return autoremove_wake_function(wait, mode, sync, key);
+}
+
+static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
+{
+ u32 hash = jhash2((u32 *)name, sizeof(*name) / 4, 0);
+
+ return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
+}
+
+/**
+ * wake_up_glock - Wake up waiters on a glock
+ * @gl: the glock
+ */
+static void wake_up_glock(struct gfs2_glock *gl)
+{
+ wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
+
+ if (waitqueue_active(wq))
+ __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
+}
+
static void gfs2_glock_dealloc(struct rcu_head *rcu)
{
struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
@@ -96,6 +140,9 @@ void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
+ smp_mb();
+ wake_up_glock(gl);
call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
@@ -107,7 +154,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
*
*/
-static void gfs2_glock_hold(struct gfs2_glock *gl)
+void gfs2_glock_hold(struct gfs2_glock *gl)
{
GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
lockref_get(&gl->gl_lockref);
@@ -150,6 +197,9 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
{
+ if (!(gl->gl_ops->go_flags & GLOF_LRU))
+ return;
+
spin_lock(&lru_lock);
if (!list_empty(&gl->gl_lru)) {
list_del_init(&gl->gl_lru);
@@ -191,13 +241,20 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
- rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
+/*
+ * Cause the glock to be put in work queue context.
+ */
+void gfs2_glock_queue_put(struct gfs2_glock *gl)
+{
+ gfs2_glock_queue_work(gl, 0);
+}
+
/**
* gfs2_glock_put() - Decrement reference count on glock
* @gl: The glock to put
@@ -676,6 +733,40 @@ static void glock_work_func(struct work_struct *work)
spin_unlock(&gl->gl_lockref.lock);
}
+static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
+ struct gfs2_glock *new)
+{
+ struct wait_glock_queue wait;
+ wait_queue_head_t *wq = glock_waitqueue(name);
+ struct gfs2_glock *gl;
+
+ wait.name = name;
+ init_wait(&wait.wait);
+ wait.wait.func = glock_wake_function;
+
+again:
+ prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ rcu_read_lock();
+ if (new) {
+ gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
+ &new->gl_node, ht_parms);
+ if (IS_ERR(gl))
+ goto out;
+ } else {
+ gl = rhashtable_lookup_fast(&gl_hash_table,
+ name, ht_parms);
+ }
+ if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
+ rcu_read_unlock();
+ schedule();
+ goto again;
+ }
+out:
+ rcu_read_unlock();
+ finish_wait(wq, &wait.wait);
+ return gl;
+}
+
/**
* gfs2_glock_get() - Get a glock, or create one if one doesn't exist
* @sdp: The GFS2 superblock
@@ -702,15 +793,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct kmem_cache *cachep;
int ret = 0;
- rcu_read_lock();
- gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
- if (gl && !lockref_get_not_dead(&gl->gl_lockref))
- gl = NULL;
- rcu_read_unlock();
-
- *glp = gl;
- if (gl)
+ gl = find_insert_glock(&name, NULL);
+ if (gl) {
+ *glp = gl;
return 0;
+ }
if (!create)
return -ENOENT;
@@ -764,10 +851,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->writeback_index = 0;
}
-again:
- rcu_read_lock();
- tmp = rhashtable_lookup_get_insert_fast(&gl_hash_table, &gl->gl_node,
- ht_parms);
+ tmp = find_insert_glock(&name, gl);
if (!tmp) {
*glp = gl;
goto out;
@@ -776,13 +860,7 @@ again:
ret = PTR_ERR(tmp);
goto out_free;
}
- if (lockref_get_not_dead(&tmp->gl_lockref)) {
- *glp = tmp;
- goto out_free;
- }
- rcu_read_unlock();
- cond_resched();
- goto again;
+ *glp = tmp;
out_free:
kfree(gl->gl_lksb.sb_lvbptr);
@@ -790,7 +868,6 @@ out_free:
atomic_dec(&sdp->sd_glock_disposal);
out:
- rcu_read_unlock();
return ret;
}
@@ -1473,14 +1550,15 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
do {
gl = ERR_PTR(rhashtable_walk_start(&iter));
- if (gl)
- continue;
+ if (IS_ERR(gl))
+ goto walk_stop;
while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
- if ((gl->gl_name.ln_sbd == sdp) &&
+ if (gl->gl_name.ln_sbd == sdp &&
lockref_get_not_dead(&gl->gl_lockref))
examiner(gl);
+walk_stop:
rhashtable_walk_stop(&iter);
} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
@@ -1803,7 +1881,7 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
int __init gfs2_glock_init(void)
{
- int ret;
+ int i, ret;
ret = rhashtable_init(&gl_hash_table, &ht_parms);
if (ret < 0)
@@ -1832,6 +1910,9 @@ int __init gfs2_glock_init(void)
return ret;
}
+ for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
+ init_waitqueue_head(glock_wait_table + i);
+
return 0;
}
@@ -1860,6 +1941,7 @@ static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
}
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos;
@@ -1892,6 +1974,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
}
static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
+ __releases(RCU)
{
struct gfs2_glock_iter *gi = seq->private;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 9ad4a6ac6c84..5e12220cc0c2 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/parser.h>
#include "incore.h"
+#include "util.h"
/* Options for hostdata parser */
@@ -181,7 +182,9 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
int create, struct gfs2_glock **glp);
+extern void gfs2_glock_hold(struct gfs2_glock *gl);
extern void gfs2_glock_put(struct gfs2_glock *gl);
+extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
u16 flags, struct gfs2_holder *gh);
extern void gfs2_holder_reinit(unsigned int state, u16 flags,
@@ -257,11 +260,44 @@ static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
return gh->gh_gl;
}
+/**
+ * glock_set_object - set the gl_object field of a glock
+ * @gl: the glock
+ * @object: the object
+ */
static inline void glock_set_object(struct gfs2_glock *gl, void *object)
{
spin_lock(&gl->gl_lockref.lock);
+ if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
+ gfs2_dump_glock(NULL, gl);
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
}
+/**
+ * glock_clear_object - clear the gl_object field of a glock
+ * @gl: the glock
+ * @object: the object
+ *
+ * I'd love to similarly add this:
+ * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
+ * gfs2_dump_glock(NULL, gl);
+ * Unfortunately, that's not possible because as soon as gfs2_delete_inode
+ * frees the block in the rgrp, another process can reassign it for an I_NEW
+ * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
+ * That means gfs2_delete_inode may subsequently try to call this function
+ * for a glock that's already pointing to a brand new inode. If we clear the
+ * new inode's gl_object, we'll introduce metadata corruption. Function
+ * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also
+ * tries to clear gl_object, so it's more than just gfs2_delete_inode.
+ *
+ */
+static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
+{
+ spin_lock(&gl->gl_lockref.lock);
+ if (gl->gl_object == object)
+ gl->gl_object = NULL;
+ spin_unlock(&gl->gl_lockref.lock);
+}
+
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5e69636d4dd3..dac6559e2195 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -329,32 +329,6 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
return 1;
}
-/**
- * gfs2_set_nlink - Set the inode's link count based on on-disk info
- * @inode: The inode in question
- * @nlink: The link count
- *
- * If the link count has hit zero, it must never be raised, whatever the
- * on-disk inode might say. When new struct inodes are created the link
- * count is set to 1, so that we can safely use this test even when reading
- * in on disk information for the first time.
- */
-
-static void gfs2_set_nlink(struct inode *inode, u32 nlink)
-{
- /*
- * We will need to review setting the nlink count here in the
- * light of the forthcoming ro bind mount work. This is a reminder
- * to do that.
- */
- if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
- if (nlink == 0)
- clear_nlink(inode);
- else
- set_nlink(inode, nlink);
- }
-}
-
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
const struct gfs2_dinode *str = buf;
@@ -376,7 +350,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
- gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
+ set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
atime.tv_sec = be64_to_cpu(str->di_atime);
@@ -470,7 +444,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
(gh->gh_state == LM_ST_EXCLUSIVE)) {
spin_lock(&sdp->sd_trunc_lock);
if (list_empty(&ip->i_trunc_list))
- list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
+ list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
wake_up(&sdp->sd_quota_wait);
return 1;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 73fce76e67ee..6e18e9793ec4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -606,6 +606,7 @@ enum {
SDF_NOJOURNALID = 6,
SDF_RORECOVERY = 7, /* read only recovery */
SDF_SKIP_DLM_UNLOCK = 8,
+ SDF_FORCE_AIL_FLUSH = 9,
};
enum gfs2_freeze_state {
@@ -816,6 +817,7 @@ struct gfs2_sbd {
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
+ int sd_log_error;
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
@@ -831,7 +833,7 @@ struct gfs2_sbd {
atomic_t sd_freeze_state;
struct mutex sd_freeze_mutex;
- char sd_fsname[GFS2_FSNAME_LEN];
+ char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
char sd_table_name[GFS2_FSNAME_LEN];
char sd_proto_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index acca501f8110..863749e29bf9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -109,7 +109,7 @@ static void gfs2_set_iop(struct inode *inode)
* @no_addr: The inode number
* @no_formal_ino: The inode generation number
* @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED;
- * GFS2_BLKST_FREE do indicate not to verify)
+ * GFS2_BLKST_FREE to indicate not to verify)
*
* If @type is DT_UNKNOWN, the inode type is fetched from disk.
*
@@ -145,7 +145,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (unlikely(error))
goto fail;
flush_delayed_work(&ip->i_gl->gl_work);
- glock_set_object(ip->i_gl, ip);
error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
@@ -170,11 +169,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
}
}
+ glock_set_object(ip->i_gl, ip);
set_bit(GIF_INVALID, &ip->i_flags);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail_put;
- flush_delayed_work(&ip->i_iopen_gh.gh_gl->gl_work);
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_put(io_gl);
io_gl = NULL;
@@ -202,14 +201,14 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
fail_refresh:
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- glock_set_object(ip->i_iopen_gh.gh_gl, NULL);
+ glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_put:
if (io_gl)
gfs2_glock_put(io_gl);
+ glock_clear_object(ip->i_gl, ip);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
- glock_set_object(ip->i_gl, NULL);
fail:
iget_failed(inode);
return ERR_PTR(error);
@@ -706,8 +705,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (error)
goto fail_free_inode;
-
+ flush_delayed_work(&ip->i_gl->gl_work);
glock_set_object(ip->i_gl, ip);
+
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
if (error)
goto fail_free_inode;
@@ -775,14 +775,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
return error;
fail_gunlock3:
+ glock_clear_object(io_gl, ip);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
gfs2_glock_put(io_gl);
fail_gunlock2:
if (io_gl)
clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
fail_free_inode:
- if (ip->i_gl)
+ if (ip->i_gl) {
+ glock_clear_object(ip->i_gl, ip);
gfs2_glock_put(ip->i_gl);
+ }
gfs2_rsqa_delete(ip, NULL);
fail_free_acls:
if (default_acl)
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0515f0a68637..65f33a0ac190 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -23,8 +23,6 @@
#include "sys.h"
#include "trace_gfs2.h"
-extern struct workqueue_struct *gfs2_control_wq;
-
/**
* gfs2_update_stats - Update time based stats
* @mv: Pointer to mean/variance structure to update
@@ -1059,6 +1057,7 @@ static void free_recover_size(struct lm_lockstruct *ls)
ls->ls_recover_submit = NULL;
ls->ls_recover_result = NULL;
ls->ls_recover_size = 0;
+ ls->ls_lvb_bits = NULL;
}
/* dlm calls before it does lock recovery */
@@ -1175,7 +1174,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
spin_unlock(&ls->ls_recover_spin);
}
-const struct dlm_lockspace_ops gdlm_lockspace_ops = {
+static const struct dlm_lockspace_ops gdlm_lockspace_ops = {
.recover_prep = gdlm_recover_prep,
.recover_slot = gdlm_recover_slot,
.recover_done = gdlm_recover_done,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 9a624f694400..f72c44231406 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -898,6 +898,10 @@ static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
{
unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
+
+ if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
+ return 1;
+
return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
atomic_read(&sdp->sd_log_thresh2);
}
@@ -919,6 +923,15 @@ int gfs2_logd(void *data)
while (!kthread_should_stop()) {
+ /* Check for errors writing to the journal */
+ if (sdp->sd_log_error) {
+ gfs2_lm_withdraw(sdp,
+ "GFS2: fsid=%s: error %d: "
+ "withdrawing the file system to "
+ "prevent further damage.\n",
+ sdp->sd_fsname, sdp->sd_log_error);
+ }
+
did_flush = false;
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3010f9edd177..7dabbe721dba 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -207,8 +207,11 @@ static void gfs2_end_log_write(struct bio *bio)
struct page *page;
int i;
- if (bio->bi_status)
- fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
+ if (bio->bi_status) {
+ fs_err(sdp, "Error %d writing to journal, jid=%u\n",
+ bio->bi_status, sdp->sd_jdesc->jd_jid);
+ wake_up(&sdp->sd_logd_waitq);
+ }
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index fabe1614f879..61ef6c9be816 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -419,8 +419,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
brelse(bh);
ret = -EIO;
+ } else {
+ *bhp = bh;
}
- *bhp = bh;
return ret;
}
@@ -452,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
- ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh);
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh);
dblock++;
extlen--;
@@ -461,7 +462,9 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
bh = gfs2_getbuf(gl, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh))
- ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh);
+ ll_rw_block(REQ_OP_READ,
+ REQ_RAHEAD | REQ_META | REQ_PRIO,
+ 1, &bh);
brelse(bh);
dblock++;
extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e76058d34b74..c0a4b3778f3f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1113,7 +1113,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
return error;
}
- snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
+ snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
error = gfs2_sys_fs_add(sdp);
/*
@@ -1159,10 +1159,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
}
if (sdp->sd_args.ar_spectator)
- snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
+ snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.s",
sdp->sd_table_name);
else
- snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
+ snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.%u",
sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
error = init_inodes(sdp, DO);
@@ -1388,7 +1388,6 @@ static void gfs2_kill_sb(struct super_block *sb)
sdp->sd_root_dir = NULL;
sdp->sd_master_dir = NULL;
shrink_dcache_sb(sb);
- gfs2_delete_debugfs_file(sdp);
free_percpu(sdp->sd_lkstats);
kill_block_super(sb);
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c2ca9566b764..e647938432bd 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -730,7 +730,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
if (PageUptodate(page))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
- ll_rw_block(REQ_OP_READ, REQ_META, 1, &bh);
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
goto unlock_out;
@@ -1474,8 +1474,11 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
{
if (error == 0 || error == -EROFS)
return;
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
+ sdp->sd_log_error = error;
+ wake_up(&sdp->sd_logd_waitq);
+ }
}
static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 836e38ba5d0a..95b2a57ded33 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -705,8 +705,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
rb_erase(n, &sdp->sd_rindex_tree);
if (gl) {
- glock_set_object(gl, NULL);
- gfs2_glock_add_to_lru(gl);
+ glock_clear_object(gl, rgd);
gfs2_glock_put(gl);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fdedec379b78..769841185ce5 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -924,6 +924,7 @@ restart:
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
+ gfs2_delete_debugfs_file(sdp);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
@@ -943,9 +944,9 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
struct gfs2_sbd *sdp = sb->s_fs_info;
gfs2_quota_sync(sb, -1);
- if (wait && sdp)
+ if (wait)
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
- return 0;
+ return sdp->sd_log_error;
}
void gfs2_freeze_func(struct work_struct *work)
@@ -1295,7 +1296,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
* gfs2_drop_inode - Drop an inode (test for remote unlink)
* @inode: The inode to drop
*
- * If we've received a callback on an iopen lock then its because a
+ * If we've received a callback on an iopen lock then it's because a
* remote node tried to deallocate the inode but failed due to this node
* still having the inode open. Here we mark the link count zero
* since we know that it must have reached zero if the GLF_DEMOTE flag
@@ -1317,6 +1318,23 @@ static int gfs2_drop_inode(struct inode *inode)
if (test_bit(GLF_DEMOTE, &gl->gl_flags))
clear_nlink(inode);
}
+
+ /*
+ * When under memory pressure when an inode's link count has dropped to
+ * zero, defer deleting the inode to the delete workqueue. This avoids
+ * calling into DLM under memory pressure, which can deadlock.
+ */
+ if (!inode->i_nlink &&
+ unlikely(current->flags & PF_MEMALLOC) &&
+ gfs2_holder_initialized(&ip->i_iopen_gh)) {
+ struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+
+ gfs2_glock_hold(gl);
+ if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+ gfs2_glock_queue_put(gl);
+ return false;
+ }
+
return generic_drop_inode(inode);
}
@@ -1501,6 +1519,22 @@ out_qs:
}
/**
+ * gfs2_glock_put_eventually
+ * @gl: The glock to put
+ *
+ * When under memory pressure, trigger a deferred glock put to make sure we
+ * won't call into DLM and deadlock. Otherwise, put the glock directly.
+ */
+
+static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
+{
+ if (current->flags & PF_MEMALLOC)
+ gfs2_glock_queue_put(gl);
+ else
+ gfs2_glock_put(gl);
+}
+
+/**
* gfs2_evict_inode - Remove an inode from cache
* @inode: The inode to evict
*
@@ -1544,9 +1578,14 @@ static void gfs2_evict_inode(struct inode *inode)
goto alloc_failed;
}
+ /* Deletes should never happen under memory pressure anymore. */
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
+ goto out;
+
/* Must not read inode block until block type has been verified */
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (unlikely(error)) {
+ glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
@@ -1562,6 +1601,12 @@ static void gfs2_evict_inode(struct inode *inode)
goto out_truncate;
}
+ /*
+ * The inode may have been recreated in the meantime.
+ */
+ if (inode->i_nlink)
+ goto out_truncate;
+
alloc_failed:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
@@ -1595,6 +1640,11 @@ alloc_failed:
goto out_unlock;
}
+ /* We're about to clear the bitmap for the dinode, but as soon as we
+ do, gfs2_create_inode can create another inode at the same block
+ location and try to set gl_object again. We clear gl_object here so
+ that subsequent inode creates don't see an old gl_object. */
+ glock_clear_object(ip->i_gl, ip);
error = gfs2_dinode_dealloc(ip);
goto out_unlock;
@@ -1623,14 +1673,17 @@ out_unlock:
gfs2_rs_deltree(&ip->i_res);
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
+ glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq(&ip->i_iopen_gh);
}
gfs2_holder_uninit(&ip->i_iopen_gh);
}
- if (gfs2_holder_initialized(&gh))
+ if (gfs2_holder_initialized(&gh)) {
+ glock_clear_object(ip->i_gl, ip);
gfs2_glock_dq_uninit(&gh);
+ }
if (error && error != GLR_TRYFAILED && error != -EROFS)
fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
out:
@@ -1640,15 +1693,19 @@ out:
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
- glock_set_object(ip->i_gl, NULL);
+ glock_clear_object(ip->i_gl, ip);
wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
gfs2_glock_add_to_lru(ip->i_gl);
- gfs2_glock_put(ip->i_gl);
+ gfs2_glock_put_eventually(ip->i_gl);
ip->i_gl = NULL;
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
- glock_set_object(ip->i_iopen_gh.gh_gl, NULL);
+ struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+
+ glock_clear_object(gl, ip);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_hold(gl);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ gfs2_glock_put_eventually(gl);
}
}
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index c81295f407f6..3926f95a6eb7 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -151,6 +151,7 @@ extern struct kmem_cache *gfs2_rgrpd_cachep;
extern struct kmem_cache *gfs2_quotad_cachep;
extern struct kmem_cache *gfs2_qadata_cachep;
extern mempool_t *gfs2_page_pool;
+extern struct workqueue_struct *gfs2_control_wq;
static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
unsigned int *p)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 54179554c7d2..ea09e41dbb49 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -25,6 +25,7 @@
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
+#include "super.h"
#include "trans.h"
#include "util.h"
@@ -1209,8 +1210,12 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
if (namel > GFS2_EA_MAX_NAME_LEN)
return -ERANGE;
- if (value == NULL)
- return gfs2_xattr_remove(ip, type, name);
+ if (value == NULL) {
+ error = gfs2_xattr_remove(ip, type, name);
+ if (error == -ENODATA && !(flags & XATTR_REPLACE))
+ error = 0;
+ return error;
+ }
if (ea_check_size(sdp, namel, size))
return -ERANGE;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 78b41e1d5c67..60726ae7cf26 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -619,16 +619,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb->s_root)
goto out_no_root;
- /* logical blocks are represented by 40 bits in pxd_t, etc. */
- sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
-#if BITS_PER_LONG == 32
- /*
- * Page cache is indexed by long.
- * I would use MAX_LFS_FILESIZE, but it's only half as big
+ /* logical blocks are represented by 40 bits in pxd_t, etc.
+ * and page cache is indexed by long
*/
- sb->s_maxbytes = min(((u64) PAGE_SIZE << 32) - 1,
- (u64)sb->s_maxbytes);
-#endif
+ sb->s_maxbytes = min(((loff_t)sb->s_blocksize) << 40, MAX_LFS_FILESIZE);
sb->s_time_gran = 1;
return 0;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index ac2dfe0c5a9c..e6c8954a4e89 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -997,7 +997,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (key) {
- lockdep_init_map(&kn->dep_map, "s_active", key, 0);
+ lockdep_init_map(&kn->dep_map, "kn->count", key, 0);
kn->flags |= KERNFS_LOCKDEP;
}
#endif
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 20fbcab97753..5f940d2a136b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp)
argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
- argp->end = argp->p + (argp->pagelen>>2);
+ argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
argp->pagelen = 0;
} else {
argp->end = argp->p + (PAGE_SIZE>>2);
@@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
argp->pagelen -= pages * PAGE_SIZE;
len -= pages * PAGE_SIZE;
- argp->p = (__be32 *)page_address(argp->pagelist[0]);
- argp->pagelist++;
- argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
+ next_decode_page(argp);
}
argp->p += XDR_QUADLEN(len);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 38d0383dc7f9..bc69d40c4e8b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -969,7 +969,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
- int flags = 0;
+ rwf_t flags = 0;
if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
/*
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 3d424a51cabb..f0fd3adb1693 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -446,14 +446,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
ovl_path_upper(dentry, &upperpath);
realfile = ovl_path_open(&upperpath, O_RDONLY);
- smp_mb__before_spinlock();
+
inode_lock(inode);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
inode_unlock(inode);
return PTR_ERR(realfile);
}
- od->upperfile = realfile;
+ smp_store_release(&od->upperfile, realfile);
} else {
/* somebody has beaten us to it */
if (!IS_ERR(realfile))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 719c2e943ea1..98fd8f6df851 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1408,12 +1408,13 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
struct task_struct *p;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
- proc_sched_show_task(p, m);
+ proc_sched_show_task(p, ns, m);
put_task_struct(p);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 50493edc30e5..e5709343feb7 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -7,14 +7,14 @@ static int devinfo_show(struct seq_file *f, void *v)
{
int i = *(loff_t *) v;
- if (i < CHRDEV_MAJOR_HASH_SIZE) {
+ if (i < CHRDEV_MAJOR_MAX) {
if (i == 0)
seq_puts(f, "Character devices:\n");
chrdev_show(f, i);
}
#ifdef CONFIG_BLOCK
else {
- i -= CHRDEV_MAJOR_HASH_SIZE;
+ i -= CHRDEV_MAJOR_MAX;
if (i == 0)
seq_puts(f, "\nBlock devices:\n");
blkdev_show(f, i);
@@ -25,7 +25,7 @@ static int devinfo_show(struct seq_file *f, void *v)
static void *devinfo_start(struct seq_file *f, loff_t *pos)
{
- if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+ if (*pos < (BLKDEV_MAJOR_MAX + CHRDEV_MAJOR_MAX))
return pos;
return NULL;
}
@@ -33,7 +33,7 @@ static void *devinfo_start(struct seq_file *f, loff_t *pos)
static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
{
(*pos)++;
- if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+ if (*pos >= (BLKDEV_MAJOR_MAX + CHRDEV_MAJOR_MAX))
return NULL;
return pos;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 0cc7033aa413..61b58c7b6531 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -33,7 +33,7 @@ const struct file_operations generic_ro_fops = {
EXPORT_SYMBOL(generic_ro_fops);
-static inline int unsigned_offsets(struct file *file)
+static inline bool unsigned_offsets(struct file *file)
{
return file->f_mode & FMODE_UNSIGNED_OFFSET;
}
@@ -633,7 +633,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
EXPORT_SYMBOL(iov_shorten);
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, int type, int flags)
+ loff_t *ppos, int type, rwf_t flags)
{
struct kiocb kiocb;
ssize_t ret;
@@ -655,7 +655,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, int type, int flags)
+ loff_t *ppos, int type, rwf_t flags)
{
ssize_t ret = 0;
@@ -871,7 +871,7 @@ out:
#endif
static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
- loff_t *pos, int flags)
+ loff_t *pos, rwf_t flags)
{
size_t tot_len;
ssize_t ret = 0;
@@ -899,7 +899,7 @@ out:
}
ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
- int flags)
+ rwf_t flags)
{
if (!file->f_op->read_iter)
return -EINVAL;
@@ -908,7 +908,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
EXPORT_SYMBOL(vfs_iter_read);
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
- loff_t *pos, int flags)
+ loff_t *pos, rwf_t flags)
{
size_t tot_len;
ssize_t ret = 0;
@@ -935,7 +935,7 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
}
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
- int flags)
+ rwf_t flags)
{
if (!file->f_op->write_iter)
return -EINVAL;
@@ -944,7 +944,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
EXPORT_SYMBOL(vfs_iter_write);
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
- unsigned long vlen, loff_t *pos, int flags)
+ unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
@@ -962,7 +962,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
EXPORT_SYMBOL(vfs_readv);
ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
- unsigned long vlen, loff_t *pos, int flags)
+ unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
@@ -981,7 +981,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
EXPORT_SYMBOL(vfs_writev);
static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
- unsigned long vlen, int flags)
+ unsigned long vlen, rwf_t flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
@@ -1001,7 +1001,7 @@ static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
}
static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
- unsigned long vlen, int flags)
+ unsigned long vlen, rwf_t flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
@@ -1027,7 +1027,7 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
}
static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
- unsigned long vlen, loff_t pos, int flags)
+ unsigned long vlen, loff_t pos, rwf_t flags)
{
struct fd f;
ssize_t ret = -EBADF;
@@ -1050,7 +1050,7 @@ static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
}
static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
- unsigned long vlen, loff_t pos, int flags)
+ unsigned long vlen, loff_t pos, rwf_t flags)
{
struct fd f;
ssize_t ret = -EBADF;
@@ -1094,7 +1094,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
- int, flags)
+ rwf_t, flags)
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
@@ -1114,7 +1114,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
- int, flags)
+ rwf_t, flags)
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
@@ -1127,7 +1127,7 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
#ifdef CONFIG_COMPAT
static size_t compat_readv(struct file *file,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos, int flags)
+ unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
@@ -1147,7 +1147,7 @@ static size_t compat_readv(struct file *file,
static size_t do_compat_readv(compat_ulong_t fd,
const struct compat_iovec __user *vec,
- compat_ulong_t vlen, int flags)
+ compat_ulong_t vlen, rwf_t flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret;
@@ -1173,7 +1173,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
static long do_compat_preadv64(unsigned long fd,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos, int flags)
+ unsigned long vlen, loff_t pos, rwf_t flags)
{
struct fd f;
ssize_t ret;
@@ -1211,7 +1211,7 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, loff_t, pos, int, flags)
+ unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
return do_compat_preadv64(fd, vec, vlen, pos, flags);
}
@@ -1220,7 +1220,7 @@ COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
- int, flags)
+ rwf_t, flags)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
@@ -1232,7 +1232,7 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
static size_t compat_writev(struct file *file,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos, int flags)
+ unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
@@ -1254,7 +1254,7 @@ static size_t compat_writev(struct file *file,
static size_t do_compat_writev(compat_ulong_t fd,
const struct compat_iovec __user* vec,
- compat_ulong_t vlen, int flags)
+ compat_ulong_t vlen, rwf_t flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret;
@@ -1279,7 +1279,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
static long do_compat_pwritev64(unsigned long fd,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos, int flags)
+ unsigned long vlen, loff_t pos, rwf_t flags)
{
struct fd f;
ssize_t ret;
@@ -1317,7 +1317,7 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, loff_t, pos, int, flags)
+ unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
return do_compat_pwritev64(fd, vec, vlen, pos, flags);
}
@@ -1325,7 +1325,7 @@ COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
diff --git a/fs/select.c b/fs/select.c
index 9d5f15ed87fe..c6362e38ae92 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1164,11 +1164,7 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
if (ufdset) {
return compat_get_bitmap(fdset, ufdset, nr);
} else {
- /* Tricky, must clear full unsigned long in the
- * kernel fdset at the end, ALIGN makes sure that
- * actually happens.
- */
- memset(fdset, 0, ALIGN(nr, BITS_PER_LONG));
+ zero_fd_set(nr, fdset);
return 0;
}
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index b0d5897bc4e6..886085b47c75 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -109,27 +109,24 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
goto out;
WRITE_ONCE(uwq->waken, true);
/*
- * The implicit smp_mb__before_spinlock in try_to_wake_up()
- * renders uwq->waken visible to other CPUs before the task is
- * waken.
+ * The Program-Order guarantees provided by the scheduler
+ * ensure uwq->waken is visible before the task is woken.
*/
ret = wake_up_state(wq->private, mode);
- if (ret)
+ if (ret) {
/*
* Wake only once, autoremove behavior.
*
- * After the effect of list_del_init is visible to the
- * other CPUs, the waitqueue may disappear from under
- * us, see the !list_empty_careful() in
- * handle_userfault(). try_to_wake_up() has an
- * implicit smp_mb__before_spinlock, and the
- * wq->private is read before calling the extern
- * function "wake_up_state" (which in turns calls
- * try_to_wake_up). While the spin_lock;spin_unlock;
- * wouldn't be enough, the smp_mb__before_spinlock is
- * enough to avoid an explicit smp_mb() here.
+ * After the effect of list_del_init is visible to the other
+ * CPUs, the waitqueue may disappear from under us, see the
+ * !list_empty_careful() in handle_userfault().
+ *
+ * try_to_wake_up() has an implicit smp_mb(), and the
+ * wq->private is read before calling the extern function
+ * "wake_up_state" (which in turns calls try_to_wake_up).
*/
list_del_init(&wq->entry);
+ }
out:
return ret;
}