diff options
Diffstat (limited to 'fs')
53 files changed, 310 insertions, 227 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index df415c05939e..de1ae0bead3b 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -19,7 +19,7 @@ void afs_put_addrlist(struct afs_addr_list *alist) { if (alist && refcount_dec_and_test(&alist->usage)) - call_rcu(&alist->rcu, (rcu_callback_t)kfree); + kfree_rcu(alist, rcu); } /* diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index ff3994a6be23..6765949b3aab 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -244,6 +244,17 @@ static void afs_cm_destructor(struct afs_call *call) } /* + * Abort a service call from within an action function. + */ +static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, + const char *why) +{ + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, error, why); + afs_set_call_complete(call, error, 0); +} + +/* * The server supplied a list of callbacks that it wanted to break. */ static void SRXAFSCB_CallBack(struct work_struct *work) @@ -510,8 +521,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) afs_send_empty_reply(call); else - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - 1, 1, "K-1"); + afs_abort_service_call(call, 1, 1, "K-1"); afs_put_call(call); _leave(""); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index cfe62b154f68..e1b9ed679045 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -145,6 +145,7 @@ static int afs_do_probe_fileserver(struct afs_net *net, read_lock(&server->fs_lock); ac.alist = rcu_dereference_protected(server->addresses, lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(ac.alist); read_unlock(&server->fs_lock); atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); @@ -163,6 +164,7 @@ static int afs_do_probe_fileserver(struct afs_net *net, if (!in_progress) afs_fs_probe_done(server); + afs_put_addrlist(ac.alist); return in_progress; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 1d81fc4c3058..ef732dd4e7ef 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -81,7 +81,7 @@ enum afs_call_state { * List of server addresses. */ struct afs_addr_list { - struct rcu_head rcu; /* Must be first */ + struct rcu_head rcu; refcount_t usage; u32 version; /* Version */ unsigned char max_addrs; @@ -154,7 +154,7 @@ struct afs_call { }; unsigned char unmarshall; /* unmarshalling phase */ unsigned char addr_ix; /* Address in ->alist */ - bool incoming; /* T if incoming call */ + bool drop_ref; /* T if need to drop ref for incoming call */ bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ @@ -1209,8 +1209,16 @@ static inline void afs_set_call_complete(struct afs_call *call, ok = true; } spin_unlock_bh(&call->state_lock); - if (ok) + if (ok) { trace_afs_call_done(call); + + /* Asynchronous calls have two refs to release - one from the alloc and + * one queued with the work item - and we can't just deallocate the + * call because the work item may be queued again. + */ + if (call->drop_ref) + afs_put_call(call); + } } /* diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 58d396592250..1ecc67da6c1a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -18,7 +18,6 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); -static void afs_delete_async_call(struct work_struct *); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); @@ -169,7 +168,7 @@ void afs_put_call(struct afs_call *call) int n = atomic_dec_return(&call->usage); int o = atomic_read(&net->nr_outstanding_calls); - trace_afs_call(call, afs_call_trace_put, n + 1, o, + trace_afs_call(call, afs_call_trace_put, n, o, __builtin_return_address(0)); ASSERTCMP(n, >=, 0); @@ -402,8 +401,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) /* If the call is going to be asynchronous, we need an extra ref for * the call to hold itself so the caller need not hang on to its ref. */ - if (call->async) + if (call->async) { afs_get_call(call, afs_call_trace_get); + call->drop_ref = true; + } /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, @@ -413,7 +414,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade, - call->intr, + (call->intr ? RXRPC_PREINTERRUPTIBLE : + RXRPC_UNINTERRUPTIBLE), call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -584,8 +586,6 @@ static void afs_deliver_to_call(struct afs_call *call) done: if (call->type->done) call->type->done(call); - if (state == AFS_CALL_COMPLETE && call->incoming) - afs_put_call(call); out: _leave(""); return; @@ -604,11 +604,7 @@ call_complete: long afs_wait_for_call_to_complete(struct afs_call *call, struct afs_addr_cursor *ac) { - signed long rtt2, timeout; long ret; - bool stalled = false; - u64 rtt; - u32 life, last_life; bool rxrpc_complete = false; DECLARE_WAITQUEUE(myself, current); @@ -619,14 +615,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call, if (ret < 0) goto out; - rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); - rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 2) - rtt2 = 2; - - timeout = rtt2; - rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life); - add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -637,37 +625,19 @@ long afs_wait_for_call_to_complete(struct afs_call *call, call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); - timeout = rtt2; continue; } if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) { + if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) { /* rxrpc terminated the call. */ rxrpc_complete = true; break; } - if (call->intr && timeout == 0 && - life == last_life && signal_pending(current)) { - if (stalled) - break; - __set_current_state(TASK_RUNNING); - rxrpc_kernel_probe_life(call->net->socket, call->rxcall); - timeout = rtt2; - stalled = true; - continue; - } - - if (life != last_life) { - timeout = rtt2; - last_life = life; - stalled = false; - } - - timeout = schedule_timeout(timeout); + schedule(); } remove_wait_queue(&call->waitq, &myself); @@ -735,7 +705,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, u = atomic_fetch_add_unless(&call->usage, 1, 0); if (u != 0) { - trace_afs_call(call, afs_call_trace_wake, u, + trace_afs_call(call, afs_call_trace_wake, u + 1, atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); @@ -745,21 +715,6 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, } /* - * Delete an asynchronous call. The work item carries a ref to the call struct - * that we need to release. - */ -static void afs_delete_async_call(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - _enter(""); - - afs_put_call(call); - - _leave(""); -} - -/* * Perform I/O processing on an asynchronous call. The work item carries a ref * to the call struct that we either need to release or to pass on. */ @@ -774,16 +729,6 @@ static void afs_process_async_call(struct work_struct *work) afs_deliver_to_call(call); } - if (call->state == AFS_CALL_COMPLETE) { - /* We have two refs to release - one from the alloc and one - * queued with the work item - and we can't just deallocate the - * call because the work item may be queued again. - */ - call->async_work.func = afs_delete_async_call; - if (!queue_work(afs_async_calls, &call->async_work)) - afs_put_call(call); - } - afs_put_call(call); _leave(""); } @@ -810,6 +755,7 @@ void afs_charge_preallocation(struct work_struct *work) if (!call) break; + call->drop_ref = true; call->async = true; call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); diff --git a/fs/block_dev.c b/fs/block_dev.c index 69bf2fb6f7cd..9501880dff5e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1520,10 +1520,22 @@ rescan: if (ret) return ret; - if (invalidate) - set_capacity(disk, 0); - else if (disk->fops->revalidate_disk) - disk->fops->revalidate_disk(disk); + /* + * Historically we only set the capacity to zero for devices that + * support partitions (independ of actually having partitions created). + * Doing that is rather inconsistent, but changing it broke legacy + * udisks polling for legacy ide-cdrom devices. Use the crude check + * below to get the sane behavior for most device while not breaking + * userspace for this particular setup. + */ + if (invalidate) { + if (disk_part_scan_enabled(disk) || + !(disk->flags & GENHD_FL_REMOVABLE)) + set_capacity(disk, 0); + } else { + if (disk->fops->revalidate_disk) + disk->fops->revalidate_disk(disk); + } check_disk_size_change(disk, bdev, !invalidate); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 404e050ce8ee..7f09147872dc 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -856,9 +856,9 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags) found_raid1c34 = true; up_read(&sinfo->groups_sem); } - if (found_raid56) + if (!found_raid56) btrfs_clear_fs_incompat(fs_info, RAID56); - if (found_raid1c34) + if (!found_raid1c34) btrfs_clear_fs_incompat(fs_info, RAID1C34); } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1ccb3f8d528d..d267eb5caa7b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7783,6 +7783,7 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, { struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); + u16 csum_size; blk_status_t ret; /* @@ -7802,7 +7803,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, file_offset -= dip->logical_offset; file_offset >>= inode->i_sb->s_blocksize_bits; - io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); + csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy); + io_bio->csum = orig_io_bio->csum + csum_size * file_offset; return 0; } @@ -9494,6 +9496,10 @@ out_fail: ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx); if (ret) commit_transaction = true; + } else if (sync_log) { + mutex_lock(&root->log_mutex); + list_del(&ctx.list); + mutex_unlock(&root->log_mutex); } if (commit_transaction) { ret = btrfs_commit_transaction(trans); diff --git a/fs/buffer.c b/fs/buffer.c index b8d28370cfd7..3f5758e01e40 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3019,49 +3019,6 @@ static void end_bio_bh_io_sync(struct bio *bio) bio_put(bio); } -/* - * This allows us to do IO even on the odd last sectors - * of a device, even if the block size is some multiple - * of the physical sector size. - * - * We'll just truncate the bio to the size of the device, - * and clear the end of the buffer head manually. - * - * Truly out-of-range accesses will turn into actual IO - * errors, this only handles the "we need to be able to - * do IO at the final sector" case. - */ -void guard_bio_eod(struct bio *bio) -{ - sector_t maxsector; - struct hd_struct *part; - - rcu_read_lock(); - part = __disk_get_part(bio->bi_disk, bio->bi_partno); - if (part) - maxsector = part_nr_sects_read(part); - else - maxsector = get_capacity(bio->bi_disk); - rcu_read_unlock(); - - if (!maxsector) - return; - - /* - * If the *whole* IO is past the end of the device, - * let it through, and the IO layer will turn it into - * an EIO. - */ - if (unlikely(bio->bi_iter.bi_sector >= maxsector)) - return; - - maxsector -= bio->bi_iter.bi_sector; - if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) - return; - - bio_truncate(bio, maxsector << 9); -} - static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, enum rw_hint write_hint, struct writeback_control *wbc) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7e0190b1f821..5a478cd06e11 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1415,10 +1415,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; bool direct_lock = false; + u32 map_flags; + u64 pool_flags; loff_t pos; loff_t limit = max(i_size_read(inode), fsc->max_file_size); @@ -1481,8 +1484,12 @@ retry_snap: goto out; } - /* FIXME: not complete since it doesn't account for being at quota */ - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { + down_read(&osdc->lock); + map_flags = osdc->osdmap->flags; + pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id); + up_read(&osdc->lock); + if ((map_flags & CEPH_OSDMAP_FULL) || + (pool_flags & CEPH_POOL_FLAG_FULL)) { err = -ENOSPC; goto out; } @@ -1575,7 +1582,8 @@ retry_snap: } if (written >= 0) { - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) + if ((map_flags & CEPH_OSDMAP_NEARFULL) || + (pool_flags & CEPH_POOL_FLAG_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index ccfcc66aaf44..923be9399b21 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -1155,5 +1155,6 @@ void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) pr_err("snapid map %llx -> %x still in use\n", sm->snap, sm->dev); } + kfree(sm); } } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 606f26d862dc..cc3ada12848d 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -324,6 +324,8 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) if (full_path == NULL) goto cdda_exit; + convert_delimiter(full_path, '\\'); + cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); if (!cifs_sb_master_tlink(cifs_sb)) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 46ebaf3f0824..fa77fe5258b0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -530,6 +530,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) if (tcon->seal) seq_puts(s, ",seal"); + else if (tcon->ses->server->ignore_signature) + seq_puts(s, ",signloosely"); if (tcon->nocase) seq_puts(s, ",nocase"); if (tcon->local_lease) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de82cfa44b1a..0d956360e984 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1281,6 +1281,7 @@ struct cifs_fid { __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ __u8 create_guid[16]; + __u32 access; struct cifs_pending_open *pending_open; unsigned int epoch; #ifdef CONFIG_CIFS_DEBUG2 @@ -1741,6 +1742,12 @@ static inline bool is_retryable_error(int error) return false; } + +/* cifs_get_writable_file() flags */ +#define FIND_WR_ANY 0 +#define FIND_WR_FSUID_ONLY 1 +#define FIND_WR_WITH_DELETE 2 + #define MID_FREE 0 #define MID_REQUEST_ALLOCATED 1 #define MID_REQUEST_SUBMITTED 2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 89eaaf46d1ca..e5cb681ec138 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -134,11 +134,12 @@ extern bool backup_cred(struct cifs_sb_info *); extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); -extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); +extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only, + int flags, struct cifsFileInfo **ret_file); extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, struct cifsFileInfo **ret_file); extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3c89569e7210..6f6fb3606a5d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1492,6 +1492,7 @@ openRetry: *oplock = rsp->OplockLevel; /* cifs fid stays in le */ oparms->fid->netfid = rsp->Fid; + oparms->fid->access = desired_access; /* Let caller know file was created so we can set the mode. */ /* Do we care about the CreateAction in any other cases? */ @@ -2115,7 +2116,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) wdata2->tailsz = tailsz; wdata2->bytes = cur_len; - rc = cifs_get_writable_file(CIFS_I(inode), false, + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &wdata2->cfile); if (!wdata2->cfile) { cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 0ef099442f20..36e7b2fd2190 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -555,7 +555,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); - fput(file); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index bc9516ab4b34..8f9d849a0012 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1169,7 +1169,8 @@ try_again: rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); + rc = wait_event_interruptible(flock->fl_wait, + list_empty(&flock->fl_blocked_member)); if (!rc) goto try_again; locks_delete_block(flock); @@ -1958,7 +1959,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, /* Return -EBADF if no handle is found and general rc otherwise */ int -cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, +cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, struct cifsFileInfo **ret_file) { struct cifsFileInfo *open_file, *inv_file = NULL; @@ -1966,7 +1967,8 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, bool any_available = false; int rc = -EBADF; unsigned int refind = 0; - + bool fsuid_only = flags & FIND_WR_FSUID_ONLY; + bool with_delete = flags & FIND_WR_WITH_DELETE; *ret_file = NULL; /* @@ -1998,6 +2000,8 @@ refind_writable: continue; if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) continue; + if (with_delete && !(open_file->fid.access & DELETE)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ @@ -2045,12 +2049,12 @@ refind_writable: } struct cifsFileInfo * -find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) +find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) { struct cifsFileInfo *cfile; int rc; - rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile); + rc = cifs_get_writable_file(cifs_inode, flags, &cfile); if (rc) cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc); @@ -2059,6 +2063,7 @@ find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, struct cifsFileInfo **ret_file) { struct list_head *tmp; @@ -2085,7 +2090,7 @@ cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, kfree(full_path); cinode = CIFS_I(d_inode(cfile->dentry)); spin_unlock(&tcon->open_file_lock); - return cifs_get_writable_file(cinode, 0, ret_file); + return cifs_get_writable_file(cinode, flags, ret_file); } spin_unlock(&tcon->open_file_lock); @@ -2162,7 +2167,8 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) if (mapping->host->i_size - offset < (loff_t)to) to = (unsigned)(mapping->host->i_size - offset); - rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file); + rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, + &open_file); if (!rc) { bytes_written = cifs_write(open_file, open_file->pid, write_data, to - from, &offset); @@ -2355,7 +2361,7 @@ retry: if (cfile) cifsFileInfo_put(cfile); - rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile); + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); /* in case of an error store it to return later */ if (rc) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b5e6635c578e..b16f8d23e97b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -653,8 +653,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, */ if ((fattr->cf_nlink < 1) && !tcon->unix_ext && !info->DeletePending) { - cifs_dbg(1, "bogus file nlink value %u\n", - fattr->cf_nlink); + cifs_dbg(VFS, "bogus file nlink value %u\n", + fattr->cf_nlink); fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; } } @@ -2073,6 +2073,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) struct inode *inode = d_inode(dentry); struct super_block *sb = dentry->d_sb; char *full_path = NULL; + int count = 0; if (inode == NULL) return -ENOENT; @@ -2094,15 +2095,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) full_path, inode, inode->i_count.counter, dentry, cifs_get_time(dentry), jiffies); +again: if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - + if (rc == -EAGAIN && count++ < 10) + goto again; out: kfree(full_path); free_xid(xid); + return rc; } @@ -2187,7 +2191,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat, if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) stat->gid = current_fsgid(); } - return rc; + return 0; } int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, @@ -2278,7 +2282,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, * writebehind data than the SMB timeout for the SetPathInfo * request would allow */ - open_file = find_writable_file(cifsInode, true); + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); if (open_file) { tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -2428,7 +2432,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->ctime = NO_CHANGE_64; args->device = 0; - open_file = find_writable_file(cifsInode, true); + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); if (open_file) { u16 nfid = open_file->fid.netfid; u32 npid = open_file->pid; @@ -2531,7 +2535,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = 0; if (attrs->ia_valid & ATTR_MTIME) { - rc = cifs_get_writable_file(cifsInode, false, &wfile); + rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile); if (!rc) { tcon = tlink_tcon(wfile->tlink); rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index eb994e313c6a..b130efaf8feb 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -766,7 +766,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_tcon *tcon; /* if the file is already open for write, just use that fileid */ - open_file = find_writable_file(cinode, true); + open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1cf207564ff9..a8c301ae00ed 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -521,7 +521,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, cifs_i = CIFS_I(inode); dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; data.Attributes = cpu_to_le32(dosattrs); - cifs_get_writable_path(tcon, name, &cfile); + cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile); tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE, @@ -577,7 +577,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, { struct cifsFileInfo *cfile; - cifs_get_writable_path(tcon, from_name, &cfile); + cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile); return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, DELETE, SMB2_OP_RENAME, cfile); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e47190cae163..cfe9b800ea8c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1364,6 +1364,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; + cfile->fid.access = fid->access; #ifdef CONFIG_CIFS_DEBUG2 cfile->fid.mid = fid->mid; #endif /* CIFS_DEBUG2 */ @@ -2221,6 +2222,8 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, goto qdf_free; } + atomic_inc(&tcon->num_remote_opens); + qd_rsp = (struct smb2_query_directory_rsp *)rsp_iov[1].iov_base; if (qd_rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) { trace_smb3_query_dir_done(xid, fid->persistent_fid, @@ -3327,7 +3330,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs * some servers (Windows2016) will not reflect recent writes in * QUERY_ALLOCATED_RANGES until SMB2_flush is called. */ - wrcfile = find_writable_file(cifsi, false); + wrcfile = find_writable_file(cifsi, FIND_WR_ANY); if (wrcfile) { filemap_write_and_wait(inode->i_mapping); smb2_flush_file(xid, tcon, &wrcfile->fid); @@ -3416,7 +3419,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon, if (rc) goto out; - if (out_data_len < sizeof(struct file_allocated_range_buffer)) { + if (out_data_len && out_data_len < sizeof(struct file_allocated_range_buffer)) { rc = -EINVAL; goto out; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1234f9ccab03..28c0be5e69b7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2771,6 +2771,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, atomic_inc(&tcon->num_remote_opens); oparms->fid->persistent_fid = rsp->PersistentFileId; oparms->fid->volatile_fid = rsp->VolatileFileId; + oparms->fid->access = oparms->desired_access; #ifdef CONFIG_CIFS_DEBUG2 oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId); #endif /* CIFS_DEBUG2 */ diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 65cb09fa6ead..08c9f216a54d 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -539,6 +539,15 @@ int fscrypt_drop_inode(struct inode *inode) mk = ci->ci_master_key->payload.data[0]; /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with * unnecessarily evicting. Nor is there a correctness problem with not diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 634b09d18b77..db987b5110a9 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1090,21 +1090,12 @@ static const struct file_operations fops_regset32 = { * This function creates a file in debugfs with the given name that reports * the names and values of a set of 32-bit registers. If the @mode variable * is so set it can be read from. Writing is not supported. - * - * This function will return a pointer to a dentry if it succeeds. This - * pointer must be passed to the debugfs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be - * returned. - * - * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will - * be returned. */ -struct dentry *debugfs_create_regset32(const char *name, umode_t mode, - struct dentry *parent, - struct debugfs_regset32 *regset) +void debugfs_create_regset32(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_regset32 *regset) { - return debugfs_create_file(name, mode, parent, regset, &fops_regset32); + debugfs_create_file(name, mode, parent, regset, &fops_regset32); } EXPORT_SYMBOL_GPL(debugfs_create_regset32); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b041b66002db..eee3c92a9ebf 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1854,9 +1854,9 @@ fetch_events: waiter = true; init_waitqueue_entry(&wait, current); - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __add_wait_queue_exclusive(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } for (;;) { @@ -1904,9 +1904,9 @@ send_events: goto fetch_events; if (waiter) { - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __remove_wait_queue(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } return res; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0c7c4adb664e..c8dff4c68141 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -43,7 +43,7 @@ #include <linux/uaccess.h> #include <linux/iversion.h> #include <linux/unicode.h> - +#include <linux/part_stat.h> #include <linux/kthread.h> #include <linux/freezer.h> @@ -927,7 +927,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; - char b[BDEVNAME_SIZE]; bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) @@ -935,8 +934,9 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) return bdev; fail: - ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", - __bdevname(dev, b), PTR_ERR(bdev)); + ext4_msg(sb, KERN_ERR, + "failed to open journal device unknown-block(%u,%u) %ld", + MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); return NULL; } diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index d218ebdafa4a..04bfaf63752c 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -13,6 +13,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/proc_fs.h> +#include <linux/part_stat.h> #include "ext4.h" #include "ext4_jbd2.h" diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5355be6b6755..088c3e7a1080 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -22,6 +22,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/quotaops.h> +#include <linux/part_stat.h> #include <crypto/hash.h> #include <linux/fscrypt.h> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 65a7a432dfee..d398b2d90c6c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -24,6 +24,7 @@ #include <linux/sysfs.h> #include <linux/quota.h> #include <linux/unicode.h> +#include <linux/part_stat.h> #include "f2fs.h" #include "node.h" diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 594b05ae16c9..71946da84388 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,6 +750,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return NULL; init_rwsem(&ei->truncate_lock); + /* Zeroing to allow iput() even if partial initialized inode. */ + ei->mmu_private = 0; + ei->i_start = 0; + ei->i_logstart = 0; + ei->i_attrs = 0; + ei->i_pos = 0; + return &ei->vfs_inode; } @@ -1374,16 +1381,6 @@ out: return 0; } -static void fat_dummy_inode_init(struct inode *inode) -{ - /* Initialize this dummy inode to work as no-op. */ - MSDOS_I(inode)->mmu_private = 0; - MSDOS_I(inode)->i_start = 0; - MSDOS_I(inode)->i_logstart = 0; - MSDOS_I(inode)->i_attrs = 0; - MSDOS_I(inode)->i_pos = 0; -} - static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1844,13 +1841,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; - fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/fcntl.c b/fs/fcntl.c index 9bc167562ee8..2e4c0fa2074b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -735,8 +735,9 @@ static void send_sigio_to_task(struct task_struct *p, return; switch (signum) { - kernel_siginfo_t si; - default: + default: { + kernel_siginfo_t si; + /* Queue a rt signal with the appropriate fd as its value. We use SI_SIGIO as the source, not SI_KERNEL, since kernel signals always get @@ -769,6 +770,7 @@ static void send_sigio_to_task(struct task_struct *p, si.si_fd = fd; if (!do_send_sig_info(signum, &si, p, type)) break; + } /* fall-through - fall back on the old plain SIGIO signal */ case 0: do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); diff --git a/fs/file.c b/fs/file.c index a364e1a9b7e8..c8a4e4c86e55 100644 --- a/fs/file.c +++ b/fs/file.c @@ -540,9 +540,14 @@ static int alloc_fd(unsigned start, unsigned flags) return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } +int __get_unused_fd_flags(unsigned flags, unsigned long nofile) +{ + return __alloc_fd(current->files, 0, nofile, flags); +} + int get_unused_fd_flags(unsigned flags) { - return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); + return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE)); } EXPORT_SYMBOL(get_unused_fd_flags); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8e02d76fe104..97eec7522bf2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,12 +276,10 @@ static void flush_bg_queue(struct fuse_conn *fc) void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_iqueue *fiq = &fc->iq; - bool async; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; - async = req->args->end; /* * test_and_set_bit() implies smp_mb() between bit * changing and below intr_entry check. Pairs with @@ -324,7 +322,7 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); } - if (async) + if (test_bit(FR_ASYNC, &req->flags)) req->args->end(fc, req->args, req->out.h.error); put_request: fuse_put_request(fc, req); @@ -471,6 +469,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) req->in.h.opcode = args->opcode; req->in.h.nodeid = args->nodeid; req->args = args; + if (args->end) + __set_bit(FR_ASYNC, &req->flags); } ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aa75e2305b75..ca344bf71404 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -301,6 +301,7 @@ struct fuse_io_priv { * FR_SENT: request is in userspace, waiting for an answer * FR_FINISHED: request is finished * FR_PRIVATE: request is on private list + * FR_ASYNC: request is asynchronous */ enum fuse_req_flag { FR_ISREPLY, @@ -314,6 +315,7 @@ enum fuse_req_flag { FR_SENT, FR_FINISHED, FR_PRIVATE, + FR_ASYNC, }; /** diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2716d56ed0a0..8294851a9dd9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1248,7 +1248,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!(file->f_mode & FMODE_OPENED)) return finish_no_open(file, d); dput(d); - return 0; + return excl && (flags & O_CREAT) ? -EEXIST : 0; } BUG_ON(d != NULL); diff --git a/fs/inode.c b/fs/inode.c index 7d57068b6b7a..93d9252a00ab 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -138,6 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; + atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; inode->i_fop = &no_open_fops; diff --git a/fs/internal.h b/fs/internal.h index f3f280b952a3..4d37912a5587 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -38,7 +38,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) /* * buffer.c */ -extern void guard_bio_eod(struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); diff --git a/fs/io_uring.c b/fs/io_uring.c index b12d33b12bc7..358f97be9c7b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -354,6 +354,7 @@ struct io_accept { struct sockaddr __user *addr; int __user *addr_len; int flags; + unsigned long nofile; }; struct io_sync { @@ -410,6 +411,7 @@ struct io_open { struct filename *filename; struct statx __user *buffer; struct open_how how; + unsigned long nofile; }; struct io_files_update { @@ -1084,6 +1086,7 @@ static void io_kill_timeout(struct io_kiocb *req) if (ret != -1) { atomic_inc(&req->ctx->cq_timeouts); list_del_init(&req->list); + req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, 0); io_put_req(req); } @@ -2934,6 +2937,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return ret; } + req->open.nofile = rlimit(RLIMIT_NOFILE); req->flags |= REQ_F_NEED_CLEANUP; return 0; } @@ -2975,6 +2979,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return ret; } + req->open.nofile = rlimit(RLIMIT_NOFILE); req->flags |= REQ_F_NEED_CLEANUP; return 0; } @@ -2992,7 +2997,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) if (ret) goto err; - ret = get_unused_fd_flags(req->open.how.flags); + ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); if (ret < 0) goto err; @@ -3918,6 +3923,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); accept->flags = READ_ONCE(sqe->accept_flags); + accept->nofile = rlimit(RLIMIT_NOFILE); return 0; } @@ -3929,7 +3935,8 @@ static int __io_accept(struct io_kiocb *req, bool force_nonblock) file_flags = force_nonblock ? O_NONBLOCK : 0; ret = __sys_accept4_file(req->file, file_flags, accept->addr, - accept->addr_len, accept->flags); + accept->addr_len, accept->flags, + accept->nofile); if (ret == -EAGAIN && force_nonblock) return -EAGAIN; if (ret == -ERESTARTSYS) @@ -4829,6 +4836,9 @@ static int io_req_defer_prep(struct io_kiocb *req, { ssize_t ret = 0; + if (!sqe) + return 0; + if (io_op_defs[req->opcode].file_table) { ret = io_grab_files(req); if (unlikely(ret)) @@ -5671,6 +5681,11 @@ err_req: if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { req->flags |= REQ_F_LINK; INIT_LIST_HEAD(&req->link_list); + + if (io_alloc_async_ctx(req)) { + ret = -EAGAIN; + goto err_req; + } ret = io_req_defer_prep(req, sqe); if (ret) req->flags |= REQ_F_FAIL_LINK; @@ -6109,6 +6124,23 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } +static void io_file_ref_exit_and_free(struct work_struct *work) +{ + struct fixed_file_data *data; + + data = container_of(work, struct fixed_file_data, ref_work); + + /* + * Ensure any percpu-ref atomic switch callback has run, it could have + * been in progress when the files were being unregistered. Once + * that's done, we can safely exit and free the ref and containing + * data structure. + */ + rcu_barrier(); + percpu_ref_exit(&data->refs); + kfree(data); +} + static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; @@ -6121,14 +6153,14 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) flush_work(&data->ref_work); wait_for_completion(&data->done); io_ring_file_ref_flush(data); - percpu_ref_exit(&data->refs); __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); for (i = 0; i < nr_tables; i++) kfree(data->table[i].files); kfree(data->table); - kfree(data); + INIT_WORK(&data->ref_work, io_file_ref_exit_and_free); + queue_work(system_wq, &data->ref_work); ctx->file_data = NULL; ctx->nr_user_files = 0; return 0; diff --git a/fs/locks.c b/fs/locks.c index 44b6da032842..b8a31c1c4fff 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -725,7 +725,6 @@ static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); list_del_init(&waiter->fl_blocked_member); - waiter->fl_blocker = NULL; } static void __locks_wake_up_blocks(struct file_lock *blocker) @@ -740,6 +739,13 @@ static void __locks_wake_up_blocks(struct file_lock *blocker) waiter->fl_lmops->lm_notify(waiter); else wake_up(&waiter->fl_wait); + + /* + * The setting of fl_blocker to NULL marks the "done" + * point in deleting a block. Paired with acquire at the top + * of locks_delete_block(). + */ + smp_store_release(&waiter->fl_blocker, NULL); } } @@ -754,24 +760,41 @@ int locks_delete_block(struct file_lock *waiter) int status = -ENOENT; /* - * If fl_blocker is NULL, it won't be set again as this thread - * "owns" the lock and is the only one that might try to claim - * the lock. So it is safe to test fl_blocker locklessly. - * Also if fl_blocker is NULL, this waiter is not listed on - * fl_blocked_requests for some lock, so no other request can - * be added to the list of fl_blocked_requests for this - * request. So if fl_blocker is NULL, it is safe to - * locklessly check if fl_blocked_requests is empty. If both - * of these checks succeed, there is no need to take the lock. + * If fl_blocker is NULL, it won't be set again as this thread "owns" + * the lock and is the only one that might try to claim the lock. + * + * We use acquire/release to manage fl_blocker so that we can + * optimize away taking the blocked_lock_lock in many cases. + * + * The smp_load_acquire guarantees two things: + * + * 1/ that fl_blocked_requests can be tested locklessly. If something + * was recently added to that list it must have been in a locked region + * *before* the locked region when fl_blocker was set to NULL. + * + * 2/ that no other thread is accessing 'waiter', so it is safe to free + * it. __locks_wake_up_blocks is careful not to touch waiter after + * fl_blocker is released. + * + * If a lockless check of fl_blocker shows it to be NULL, we know that + * no new locks can be inserted into its fl_blocked_requests list, and + * can avoid doing anything further if the list is empty. */ - if (waiter->fl_blocker == NULL && + if (!smp_load_acquire(&waiter->fl_blocker) && list_empty(&waiter->fl_blocked_requests)) return status; + spin_lock(&blocked_lock_lock); if (waiter->fl_blocker) status = 0; __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); + + /* + * The setting of fl_blocker to NULL marks the "done" point in deleting + * a block. Paired with acquire at the top of this function. + */ + smp_store_release(&waiter->fl_blocker, NULL); spin_unlock(&blocked_lock_lock); return status; } @@ -1364,7 +1387,8 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } @@ -1449,7 +1473,8 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, error = posix_lock_inode(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker); + error = wait_event_interruptible(fl.fl_wait, + list_empty(&fl.fl_blocked_member)); if (!error) { /* * If we've been sleeping someone might have @@ -1652,7 +1677,8 @@ restart: locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, - !new_fl->fl_blocker, break_time); + list_empty(&new_fl->fl_blocked_member), + break_time); percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -2136,7 +2162,8 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } @@ -2413,7 +2440,8 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 989c30c98511..f1ff3076e4a4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -153,6 +153,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; + clp->cl_minorversion = cl_init->minorversion; clp->cl_nfs_mod = cl_init->nfs_mod; if (!try_module_get(clp->cl_nfs_mod->owner)) goto error_dealloc; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index e1b938457ab9..e113fcb4bb4c 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -832,6 +832,8 @@ static int nfs_parse_source(struct fs_context *fc, if (len > maxnamlen) goto out_hostname; + kfree(ctx->nfs_server.hostname); + /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); if (!ctx->nfs_server.hostname) @@ -1240,6 +1242,13 @@ static int nfs_fs_context_validate(struct fs_context *fc) } ctx->nfs_mod = nfs_mod; } + + /* Ensure the filesystem context has the correct fs_type */ + if (fc->fs_type != ctx->nfs_mod->nfs_fs) { + module_put(fc->fs_type->owner); + __module_get(ctx->nfs_mod->nfs_fs->owner); + fc->fs_type = ctx->nfs_mod->nfs_fs; + } return 0; out_no_device_name: diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 52270bfac120..1abf126c2df4 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock); struct nfs_server_key { struct { uint16_t nfsversion; /* NFS protocol version */ + uint32_t minorversion; /* NFSv4 minor version */ uint16_t family; /* address family */ __be16 port; /* IP port */ } hdr; @@ -55,6 +56,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp) memset(&key, 0, sizeof(key)); key.hdr.nfsversion = clp->rpc_ops->version; + key.hdr.minorversion = clp->cl_minorversion; key.hdr.family = clp->cl_addr.ss_family; switch (clp->cl_addr.ss_family) { diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index ad6077404947..f3ece8ed3203 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -153,7 +153,7 @@ struct vfsmount *nfs_d_automount(struct path *path) /* Open a new filesystem context, transferring parameters from the * parent superblock, including the network namespace. */ - fc = fs_context_for_submount(&nfs_fs_type, path->dentry); + fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); if (IS_ERR(fc)) return ERR_CAST(fc); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0cd767e5c977..0bd77cc1f639 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -216,7 +216,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) INIT_LIST_HEAD(&clp->cl_ds_clients); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; clp->cl_mig_gen = 1; #if IS_ENABLED(CONFIG_NFS_V4_1) diff --git a/fs/open.c b/fs/open.c index 0788b3715731..b69d6eed67e6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -860,9 +860,6 @@ cleanup_file: * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * - * On successful return @file is a fully instantiated open file. After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 444e2da4f60e..714c14c47ca5 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -93,6 +93,7 @@ config OVERLAY_FS_XINO_AUTO bool "Overlayfs: auto enable inode number mapping" default n depends on OVERLAY_FS + depends on 64BIT help If this config option is enabled then overlay filesystems will use unused high bits in undelying filesystem inode numbers to map all diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index a5317216de73..87c362f65448 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -244,6 +244,9 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) if (iocb->ki_flags & IOCB_WRITE) { struct inode *inode = file_inode(orig_iocb->ki_filp); + /* Actually acquired in ovl_write_iter() */ + __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb, + SB_FREEZE_WRITE); file_end_write(iocb->ki_filp); ovl_copyattr(ovl_inode_real(inode), inode); } @@ -346,6 +349,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto out; file_start_write(real.file); + /* Pacify lockdep, same trick as done in aio_write() */ + __sb_writers_release(file_inode(real.file)->i_sb, + SB_FREEZE_WRITE); aio_req->fd = real; real.flags = 0; aio_req->orig_iocb = iocb; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3623d28aa4fa..3d3f2b8bdae5 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -318,7 +318,12 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb) return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0; } -static inline int ovl_inode_lock(struct inode *inode) +static inline void ovl_inode_lock(struct inode *inode) +{ + mutex_lock(&OVL_I(inode)->lock); +} + +static inline int ovl_inode_lock_interruptible(struct inode *inode) { return mutex_lock_interruptible(&OVL_I(inode)->lock); } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 319fe0d355b0..ac967f1cb6e5 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1411,6 +1411,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, if (ofs->config.xino == OVL_XINO_ON) pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); ofs->xino_mode = 0; + } else if (ofs->config.xino == OVL_XINO_OFF) { + ofs->xino_mode = -1; } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { /* * This is a roundup of number of bits needed for encoding @@ -1623,8 +1625,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_stack_depth = 0; sb->s_maxbytes = MAX_LFS_FILESIZE; /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ - if (ofs->config.xino != OVL_XINO_OFF) + if (ofs->config.xino != OVL_XINO_OFF) { ofs->xino_mode = BITS_PER_LONG - 32; + if (!ofs->xino_mode) { + pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); + ofs->config.xino = OVL_XINO_OFF; + } + } /* alloc/destroy_inode needed for setting up traps in inode cache */ sb->s_op = &ovl_super_operations; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index ea005085803f..042f7eb4f7f4 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -509,7 +509,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags) struct inode *inode = d_inode(dentry); int err; - err = ovl_inode_lock(inode); + err = ovl_inode_lock_interruptible(inode); if (!err && ovl_already_copied_up_locked(dentry, flags)) { err = 1; /* Already copied up */ ovl_inode_unlock(inode); @@ -764,7 +764,7 @@ int ovl_nlink_start(struct dentry *dentry) return err; } - err = ovl_inode_lock(inode); + err = ovl_inode_lock_interruptible(inode); if (err) return err; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 072156c4f895..5c766330e493 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2599,7 +2599,6 @@ static int journal_init_dev(struct super_block *super, int result; dev_t jdev; fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; - char b[BDEVNAME_SIZE]; result = 0; @@ -2621,8 +2620,8 @@ static int journal_init_dev(struct super_block *super, result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; reiserfs_warning(super, "sh-458", - "cannot init journal device '%s': %i", - __bdevname(jdev, b), result); + "cannot init journal device unknown-block(%u,%u): %i", + MAJOR(jdev), MINOR(jdev), result); return result; } else if (jdev != super->s_dev) set_blocksize(journal->j_dev_bd, super->s_blocksize); diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 69aee3dfb660..3ce9829a6936 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -178,7 +178,8 @@ static void zonefs_update_stats(struct inode *inode, loff_t new_isize) * amount of readable data in the zone. */ static loff_t zonefs_check_zone_condition(struct inode *inode, - struct blk_zone *zone, bool warn) + struct blk_zone *zone, bool warn, + bool mount) { struct zonefs_inode_info *zi = ZONEFS_I(inode); @@ -196,13 +197,26 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, zone->wp = zone->start; return 0; case BLK_ZONE_COND_READONLY: - /* Do not allow writes in read-only zones */ + /* + * The write pointer of read-only zones is invalid. If such a + * zone is found during mount, the file size cannot be retrieved + * so we treat the zone as offline (mount == true case). + * Otherwise, keep the file size as it was when last updated + * so that the user can recover data. In both cases, writes are + * always disabled for the zone. + */ if (warn) zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n", inode->i_ino); inode->i_flags |= S_IMMUTABLE; + if (mount) { + zone->cond = BLK_ZONE_COND_OFFLINE; + inode->i_mode &= ~0777; + zone->wp = zone->start; + return 0; + } inode->i_mode &= ~0222; - /* fallthrough */ + return i_size_read(inode); default: if (zi->i_ztype == ZONEFS_ZTYPE_CNV) return zi->i_max_size; @@ -231,7 +245,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * as there is no inconsistency between the inode size and the amount of * data writen in the zone (data_size). */ - data_size = zonefs_check_zone_condition(inode, zone, true); + data_size = zonefs_check_zone_condition(inode, zone, true, false); isize = i_size_read(inode); if (zone->cond != BLK_ZONE_COND_OFFLINE && zone->cond != BLK_ZONE_COND_READONLY && @@ -274,7 +288,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, if (zone->cond != BLK_ZONE_COND_OFFLINE) { zone->cond = BLK_ZONE_COND_OFFLINE; data_size = zonefs_check_zone_condition(inode, zone, - false); + false, false); } } else if (zone->cond == BLK_ZONE_COND_READONLY || sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) { @@ -283,7 +297,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, if (zone->cond != BLK_ZONE_COND_READONLY) { zone->cond = BLK_ZONE_COND_READONLY; data_size = zonefs_check_zone_condition(inode, zone, - false); + false, false); } } @@ -975,7 +989,7 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, zi->i_zsector = zone->start; zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, zone->len << SECTOR_SHIFT); - zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true); + zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true); inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; |