diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2016-01-12 11:01:12 +0100 | 
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2016-01-12 11:01:12 +0100 | 
| commit | 1f16f116b01c110db20ab808562c8b8bc3ee3d6e (patch) | |
| tree | 44db563f64cf5f8d62af8f99a61e2b248c44ea3a /fs | |
| parent | 03724ac3d48f8f0e3caf1d30fa134f8fd96c94e2 (diff) | |
| parent | f9eccf24615672896dc13251410c3f2f33a14f95 (diff) | |
Merge branches 'clockevents/4.4-fixes' and 'clockevents/4.5-fixes' of http://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
Pull in fixes from Daniel Lezcano:
 - Fix the vt8500 timer leading to a system lock up when dealing with too
   small delta (Roman Volkov)
 - Select the CLKSRC_MMIO when the fsl_ftm_timer is enabled with COMPILE_TEST
   (Daniel Lezcano)
 - Prevent to compile timers using the 'iomem' API when the architecture has
   not HAS_IOMEM set (Richard Weinberger)
Diffstat (limited to 'fs')
45 files changed, 439 insertions, 227 deletions
| diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 699941e90667..511078586fa1 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -451,9 +451,9 @@ void v9fs_evict_inode(struct inode *inode)  {  	struct v9fs_inode *v9inode = V9FS_I(inode); -	truncate_inode_pages_final(inode->i_mapping); +	truncate_inode_pages_final(&inode->i_data);  	clear_inode(inode); -	filemap_fdatawrite(inode->i_mapping); +	filemap_fdatawrite(&inode->i_data);  	v9fs_cache_inode_put_cookie(inode);  	/* clunk the fid stashed in writeback_fid */ diff --git a/fs/block_dev.c b/fs/block_dev.c index c25639e907bd..44d4a1e9244e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1523,11 +1523,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)  		WARN_ON_ONCE(bdev->bd_holders);  		sync_blockdev(bdev);  		kill_bdev(bdev); + +		bdev_write_inode(bdev);  		/* -		 * ->release can cause the queue to disappear, so flush all -		 * dirty data before. +		 * Detaching bdev inode from its wb in __destroy_inode() +		 * is too late: the queue which embeds its bdi (along with +		 * root wb) can be gone as soon as we put_disk() below.  		 */ -		bdev_write_inode(bdev); +		inode_detach_wb(bdev->bd_inode);  	}  	if (bdev->bd_contains == bdev) {  		if (disk->fops->release) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6dcdb2ec9211..d453d62ab0c6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -355,7 +355,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,  	index = srcu_read_lock(&fs_info->subvol_srcu); -	root = btrfs_read_fs_root_no_name(fs_info, &root_key); +	root = btrfs_get_fs_root(fs_info, &root_key, false);  	if (IS_ERR(root)) {  		srcu_read_unlock(&fs_info->subvol_srcu, index);  		ret = PTR_ERR(root); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8c58191249cc..35489e7129a7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,  struct btrfs_block_group_cache *btrfs_lookup_block_group(  						 struct btrfs_fs_info *info,  						 u64 bytenr); +void btrfs_get_block_group(struct btrfs_block_group_cache *cache);  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);  int get_block_group_index(struct btrfs_block_group_cache *cache);  struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, @@ -3479,6 +3480,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,  			   struct btrfs_root *root, u64 bytes_used,  			   u64 type, u64 chunk_objectid, u64 chunk_offset,  			   u64 size); +struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( +				struct btrfs_fs_info *fs_info, +				const u64 chunk_offset);  int btrfs_remove_block_group(struct btrfs_trans_handle *trans,  			     struct btrfs_root *root, u64 group_start,  			     struct extent_map *em); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index acf3ed11cfb6..c4661db2b72a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)  	return (cache->flags & bits) == bits;  } -static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +void btrfs_get_block_group(struct btrfs_block_group_cache *cache)  {  	atomic_inc(&cache->count);  } @@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,  			set_extent_dirty(info->pinned_extents,  					 bytenr, bytenr + num_bytes - 1,  					 GFP_NOFS | __GFP_NOFAIL); -			/* -			 * No longer have used bytes in this block group, queue -			 * it for deletion. -			 */ -			if (old_val == 0) { -				spin_lock(&info->unused_bgs_lock); -				if (list_empty(&cache->bg_list)) { -					btrfs_get_block_group(cache); -					list_add_tail(&cache->bg_list, -						      &info->unused_bgs); -				} -				spin_unlock(&info->unused_bgs_lock); -			}  		}  		spin_lock(&trans->transaction->dirty_bgs_lock); @@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans,  		}  		spin_unlock(&trans->transaction->dirty_bgs_lock); +		/* +		 * No longer have used bytes in this block group, queue it for +		 * deletion. We do this after adding the block group to the +		 * dirty list to avoid races between cleaner kthread and space +		 * cache writeout. +		 */ +		if (!alloc && old_val == 0) { +			spin_lock(&info->unused_bgs_lock); +			if (list_empty(&cache->bg_list)) { +				btrfs_get_block_group(cache); +				list_add_tail(&cache->bg_list, +					      &info->unused_bgs); +			} +			spin_unlock(&info->unused_bgs_lock); +		} +  		btrfs_put_block_group(cache);  		total -= num_bytes;  		bytenr += num_bytes; @@ -8105,21 +8108,47 @@ reada:  }  /* - * TODO: Modify related function to add related node/leaf to dirty_extent_root, - * for later qgroup accounting. - * - * Current, this function does nothing. + * These may not be seen by the usual inc/dec ref code so we have to + * add them here.   */ +static int record_one_subtree_extent(struct btrfs_trans_handle *trans, +				     struct btrfs_root *root, u64 bytenr, +				     u64 num_bytes) +{ +	struct btrfs_qgroup_extent_record *qrecord; +	struct btrfs_delayed_ref_root *delayed_refs; + +	qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); +	if (!qrecord) +		return -ENOMEM; + +	qrecord->bytenr = bytenr; +	qrecord->num_bytes = num_bytes; +	qrecord->old_roots = NULL; + +	delayed_refs = &trans->transaction->delayed_refs; +	spin_lock(&delayed_refs->lock); +	if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord)) +		kfree(qrecord); +	spin_unlock(&delayed_refs->lock); + +	return 0; +} +  static int account_leaf_items(struct btrfs_trans_handle *trans,  			      struct btrfs_root *root,  			      struct extent_buffer *eb)  {  	int nr = btrfs_header_nritems(eb); -	int i, extent_type; +	int i, extent_type, ret;  	struct btrfs_key key;  	struct btrfs_file_extent_item *fi;  	u64 bytenr, num_bytes; +	/* We can be called directly from walk_up_proc() */ +	if (!root->fs_info->quota_enabled) +		return 0; +  	for (i = 0; i < nr; i++) {  		btrfs_item_key_to_cpu(eb, &key, i); @@ -8138,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,  			continue;  		num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + +		ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); +		if (ret) +			return ret;  	}  	return 0;  } @@ -8206,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root,  /*   * root_eb is the subtree root and is locked before this function is called. - * TODO: Modify this function to mark all (including complete shared node) - * to dirty_extent_root to allow it get accounted in qgroup.   */  static int account_shared_subtree(struct btrfs_trans_handle *trans,  				  struct btrfs_root *root, @@ -8285,6 +8316,11 @@ walk_down:  			btrfs_tree_read_lock(eb);  			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);  			path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + +			ret = record_one_subtree_extent(trans, root, child_bytenr, +							root->nodesize); +			if (ret) +				goto out;  		}  		if (level == 0) { @@ -10256,6 +10292,47 @@ out:  	return ret;  } +struct btrfs_trans_handle * +btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, +				     const u64 chunk_offset) +{ +	struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; +	struct extent_map *em; +	struct map_lookup *map; +	unsigned int num_items; + +	read_lock(&em_tree->lock); +	em = lookup_extent_mapping(em_tree, chunk_offset, 1); +	read_unlock(&em_tree->lock); +	ASSERT(em && em->start == chunk_offset); + +	/* +	 * We need to reserve 3 + N units from the metadata space info in order +	 * to remove a block group (done at btrfs_remove_chunk() and at +	 * btrfs_remove_block_group()), which are used for: +	 * +	 * 1 unit for adding the free space inode's orphan (located in the tree +	 * of tree roots). +	 * 1 unit for deleting the block group item (located in the extent +	 * tree). +	 * 1 unit for deleting the free space item (located in tree of tree +	 * roots). +	 * N units for deleting N device extent items corresponding to each +	 * stripe (located in the device tree). +	 * +	 * In order to remove a block group we also need to reserve units in the +	 * system space info in order to update the chunk tree (update one or +	 * more device items and remove one chunk item), but this is done at +	 * btrfs_remove_chunk() through a call to check_system_chunk(). +	 */ +	map = (struct map_lookup *)em->bdev; +	num_items = 3 + map->num_stripes; +	free_extent_map(em); + +	return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, +							   num_items, 1); +} +  /*   * Process the unused_bgs list and remove any that don't have any allocated   * space inside of them. @@ -10322,8 +10399,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)  		 * Want to do this before we do anything else so we can recover  		 * properly if we fail to join the transaction.  		 */ -		/* 1 for btrfs_orphan_reserve_metadata() */ -		trans = btrfs_start_transaction(root, 1); +		trans = btrfs_start_trans_remove_block_group(fs_info, +						     block_group->key.objectid);  		if (IS_ERR(trans)) {  			btrfs_dec_block_group_ro(root, block_group);  			ret = PTR_ERR(trans); @@ -10403,11 +10480,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)  		 * until transaction commit to do the actual discard.  		 */  		if (trimming) { -			WARN_ON(!list_empty(&block_group->bg_list)); -			spin_lock(&trans->transaction->deleted_bgs_lock); +			spin_lock(&fs_info->unused_bgs_lock); +			/* +			 * A concurrent scrub might have added us to the list +			 * fs_info->unused_bgs, so use a list_move operation +			 * to add the block group to the deleted_bgs list. +			 */  			list_move(&block_group->bg_list,  				  &trans->transaction->deleted_bgs); -			spin_unlock(&trans->transaction->deleted_bgs_lock); +			spin_unlock(&fs_info->unused_bgs_lock);  			btrfs_get_block_group(block_group);  		}  end_trans: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 977e715f0bf2..0f09526aa7d9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1291,7 +1291,8 @@ out:   * on error we return an unlocked page and the error value   * on success we return a locked page and 0   */ -static int prepare_uptodate_page(struct page *page, u64 pos, +static int prepare_uptodate_page(struct inode *inode, +				 struct page *page, u64 pos,  				 bool force_uptodate)  {  	int ret = 0; @@ -1306,6 +1307,10 @@ static int prepare_uptodate_page(struct page *page, u64 pos,  			unlock_page(page);  			return -EIO;  		} +		if (page->mapping != inode->i_mapping) { +			unlock_page(page); +			return -EAGAIN; +		}  	}  	return 0;  } @@ -1324,6 +1329,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,  	int faili;  	for (i = 0; i < num_pages; i++) { +again:  		pages[i] = find_or_create_page(inode->i_mapping, index + i,  					       mask | __GFP_WRITE);  		if (!pages[i]) { @@ -1333,13 +1339,17 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,  		}  		if (i == 0) -			err = prepare_uptodate_page(pages[i], pos, +			err = prepare_uptodate_page(inode, pages[i], pos,  						    force_uptodate); -		if (i == num_pages - 1) -			err = prepare_uptodate_page(pages[i], +		if (!err && i == num_pages - 1) +			err = prepare_uptodate_page(inode, pages[i],  						    pos + write_bytes, false);  		if (err) {  			page_cache_release(pages[i]); +			if (err == -EAGAIN) { +				err = 0; +				goto again; +			}  			faili = i - 1;  			goto fail;  		} @@ -1882,8 +1892,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	struct btrfs_log_ctx ctx;  	int ret = 0;  	bool full_sync = 0; -	const u64 len = end - start + 1; +	u64 len; +	/* +	 * The range length can be represented by u64, we have to do the typecasts +	 * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() +	 */ +	len = (u64)end - (u64)start + 1;  	trace_btrfs_sync_file(file, datasync);  	/* @@ -2071,8 +2086,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  			}  		}  		if (!full_sync) { -			ret = btrfs_wait_ordered_range(inode, start, -						       end - start + 1); +			ret = btrfs_wait_ordered_range(inode, start, len);  			if (ret) {  				btrfs_end_transaction(trans, root);  				goto out; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 85a1f8621b51..cfe99bec49de 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -891,7 +891,7 @@ out:  		spin_unlock(&block_group->lock);  		ret = 0; -		btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", +		btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuilding it now",  			block_group->key.objectid);  	} @@ -2972,7 +2972,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,  		     u64 cont1_bytes, u64 min_bytes)  {  	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; -	struct btrfs_free_space *entry; +	struct btrfs_free_space *entry = NULL;  	int ret = -ENOSPC;  	u64 bitmap_offset = offset_to_bitmap(ctl, offset); @@ -2983,8 +2983,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,  	 * The bitmap that covers offset won't be in the list unless offset  	 * is just its start offset.  	 */ -	entry = list_first_entry(bitmaps, struct btrfs_free_space, list); -	if (entry->offset != bitmap_offset) { +	if (!list_empty(bitmaps)) +		entry = list_first_entry(bitmaps, struct btrfs_free_space, list); + +	if (!entry || entry->offset != bitmap_offset) {  		entry = tree_search_offset(ctl, bitmap_offset, 1, 0);  		if (entry && list_empty(&entry->list))  			list_add(&entry->list, bitmaps); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 994490d5fa64..a70c5790f8f5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4046,9 +4046,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,   */  static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)  { -	struct btrfs_trans_handle *trans;  	struct btrfs_root *root = BTRFS_I(dir)->root; -	int ret;  	/*  	 * 1 for the possible orphan item @@ -4057,27 +4055,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)  	 * 1 for the inode ref  	 * 1 for the inode  	 */ -	trans = btrfs_start_transaction(root, 5); -	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) -		return trans; - -	if (PTR_ERR(trans) == -ENOSPC) { -		u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); - -		trans = btrfs_start_transaction(root, 0); -		if (IS_ERR(trans)) -			return trans; -		ret = btrfs_cond_migrate_bytes(root->fs_info, -					       &root->fs_info->trans_block_rsv, -					       num_bytes, 5); -		if (ret) { -			btrfs_end_transaction(trans, root); -			return ERR_PTR(ret); -		} -		trans->block_rsv = &root->fs_info->trans_block_rsv; -		trans->bytes_reserved = num_bytes; -	} -	return trans; +	return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);  }  static int btrfs_unlink(struct inode *dir, struct dentry *dentry) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 93e12c18ffd7..5279fdae7142 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -993,9 +993,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,  	mutex_lock(&fs_info->qgroup_ioctl_lock);  	if (!fs_info->quota_root)  		goto out; -	spin_lock(&fs_info->qgroup_lock);  	fs_info->quota_enabled = 0;  	fs_info->pending_quota_state = 0; +	btrfs_qgroup_wait_for_completion(fs_info); +	spin_lock(&fs_info->qgroup_lock);  	quota_root = fs_info->quota_root;  	fs_info->quota_root = NULL;  	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; @@ -1461,6 +1462,8 @@ struct btrfs_qgroup_extent_record  	struct btrfs_qgroup_extent_record *entry;  	u64 bytenr = record->bytenr; +	assert_spin_locked(&delayed_refs->lock); +  	while (*p) {  		parent_node = *p;  		entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2907a77fb1f6..b091d94ceef6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3432,7 +3432,9 @@ out:  static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,  					  struct btrfs_device *scrub_dev,  					  u64 chunk_offset, u64 length, -					  u64 dev_offset, int is_dev_replace) +					  u64 dev_offset, +					  struct btrfs_block_group_cache *cache, +					  int is_dev_replace)  {  	struct btrfs_mapping_tree *map_tree =  		&sctx->dev_root->fs_info->mapping_tree; @@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,  	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);  	read_unlock(&map_tree->map_tree.lock); -	if (!em) -		return -EINVAL; +	if (!em) { +		/* +		 * Might have been an unused block group deleted by the cleaner +		 * kthread or relocation. +		 */ +		spin_lock(&cache->lock); +		if (!cache->removed) +			ret = -EINVAL; +		spin_unlock(&cache->lock); + +		return ret; +	}  	map = (struct map_lookup *)em->bdev;  	if (em->start != chunk_offset) @@ -3483,6 +3495,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  	u64 length;  	u64 chunk_offset;  	int ret = 0; +	int ro_set;  	int slot;  	struct extent_buffer *l;  	struct btrfs_key key; @@ -3568,7 +3581,21 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		scrub_pause_on(fs_info);  		ret = btrfs_inc_block_group_ro(root, cache);  		scrub_pause_off(fs_info); -		if (ret) { + +		if (ret == 0) { +			ro_set = 1; +		} else if (ret == -ENOSPC) { +			/* +			 * btrfs_inc_block_group_ro return -ENOSPC when it +			 * failed in creating new chunk for metadata. +			 * It is not a problem for scrub/replace, because +			 * metadata are always cowed, and our scrub paused +			 * commit_transactions. +			 */ +			ro_set = 0; +		} else { +			btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n", +				   ret);  			btrfs_put_block_group(cache);  			break;  		} @@ -3577,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		dev_replace->cursor_left = found_key.offset;  		dev_replace->item_needs_writeback = 1;  		ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, -				  found_key.offset, is_dev_replace); +				  found_key.offset, cache, is_dev_replace);  		/*  		 * flush, submit all pending read and write bios, afterwards @@ -3611,7 +3638,30 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		scrub_pause_off(fs_info); -		btrfs_dec_block_group_ro(root, cache); +		if (ro_set) +			btrfs_dec_block_group_ro(root, cache); + +		/* +		 * We might have prevented the cleaner kthread from deleting +		 * this block group if it was already unused because we raced +		 * and set it to RO mode first. So add it back to the unused +		 * list, otherwise it might not ever be deleted unless a manual +		 * balance is triggered or it becomes used and unused again. +		 */ +		spin_lock(&cache->lock); +		if (!cache->removed && !cache->ro && cache->reserved == 0 && +		    btrfs_block_group_used(&cache->item) == 0) { +			spin_unlock(&cache->lock); +			spin_lock(&fs_info->unused_bgs_lock); +			if (list_empty(&cache->bg_list)) { +				btrfs_get_block_group(cache); +				list_add_tail(&cache->bg_list, +					      &fs_info->unused_bgs); +			} +			spin_unlock(&fs_info->unused_bgs_lock); +		} else { +			spin_unlock(&cache->lock); +		}  		btrfs_put_block_group(cache);  		if (ret) diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index c8c3d70c31ff..8b72b005bfb9 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -898,8 +898,10 @@ int btrfs_test_free_space_cache(void)  	}  	root = btrfs_alloc_dummy_root(); -	if (!root) +	if (IS_ERR(root)) { +		ret = PTR_ERR(root);  		goto out; +	}  	root->fs_info = btrfs_alloc_dummy_fs_info();  	if (!root->fs_info) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 418c6a2ad7d8..be8eae80ff65 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,7 +274,6 @@ loop:  	cur_trans->num_dirty_bgs = 0;  	spin_lock_init(&cur_trans->dirty_bgs_lock);  	INIT_LIST_HEAD(&cur_trans->deleted_bgs); -	spin_lock_init(&cur_trans->deleted_bgs_lock);  	spin_lock_init(&cur_trans->dropped_roots_lock);  	list_add_tail(&cur_trans->list, &fs_info->trans_list);  	extent_io_tree_init(&cur_trans->dirty_pages, @@ -592,6 +591,38 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,  	return start_transaction(root, num_items, TRANS_START,  				 BTRFS_RESERVE_FLUSH_ALL);  } +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( +					struct btrfs_root *root, +					unsigned int num_items, +					int min_factor) +{ +	struct btrfs_trans_handle *trans; +	u64 num_bytes; +	int ret; + +	trans = btrfs_start_transaction(root, num_items); +	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) +		return trans; + +	trans = btrfs_start_transaction(root, 0); +	if (IS_ERR(trans)) +		return trans; + +	num_bytes = btrfs_calc_trans_metadata_size(root, num_items); +	ret = btrfs_cond_migrate_bytes(root->fs_info, +				       &root->fs_info->trans_block_rsv, +				       num_bytes, +				       min_factor); +	if (ret) { +		btrfs_end_transaction(trans, root); +		return ERR_PTR(ret); +	} + +	trans->block_rsv = &root->fs_info->trans_block_rsv; +	trans->bytes_reserved = num_bytes; + +	return trans; +}  struct btrfs_trans_handle *btrfs_start_transaction_lflush(  					struct btrfs_root *root, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b05b2f64d913..64c8221b6165 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -77,8 +77,8 @@ struct btrfs_transaction {  	 */  	struct mutex cache_write_mutex;  	spinlock_t dirty_bgs_lock; +	/* Protected by spin lock fs_info->unused_bgs_lock. */  	struct list_head deleted_bgs; -	spinlock_t deleted_bgs_lock;  	spinlock_t dropped_roots_lock;  	struct btrfs_delayed_ref_root delayed_refs;  	int aborted; @@ -185,6 +185,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans,  			  struct btrfs_root *root);  struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,  						   unsigned int num_items); +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( +					struct btrfs_root *root, +					unsigned int num_items, +					int min_factor);  struct btrfs_trans_handle *btrfs_start_transaction_lflush(  					struct btrfs_root *root,  					unsigned int num_items); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6df8fdc1312..a23399e8e3ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1973,8 +1973,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,  	if (srcdev->writeable) {  		fs_devices->rw_devices--;  		/* zero out the old super if it is writable */ -		btrfs_scratch_superblocks(srcdev->bdev, -					rcu_str_deref(srcdev->name)); +		btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);  	}  	if (srcdev->bdev) @@ -2024,8 +2023,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,  	btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);  	if (tgtdev->bdev) { -		btrfs_scratch_superblocks(tgtdev->bdev, -					rcu_str_deref(tgtdev->name)); +		btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);  		fs_info->fs_devices->open_devices--;  	}  	fs_info->fs_devices->num_devices--; @@ -2853,7 +2851,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)  	if (ret)  		return ret; -	trans = btrfs_start_transaction(root, 0); +	trans = btrfs_start_trans_remove_block_group(root->fs_info, +						     chunk_offset);  	if (IS_ERR(trans)) {  		ret = PTR_ERR(trans);  		btrfs_std_error(root->fs_info, ret, NULL); @@ -3123,7 +3122,7 @@ static int chunk_profiles_filter(u64 chunk_type,  	return 1;  } -static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, +static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,  			      struct btrfs_balance_args *bargs)  {  	struct btrfs_block_group_cache *cache; @@ -3156,7 +3155,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,  	return ret;  } -static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, +static int chunk_usage_filter(struct btrfs_fs_info *fs_info,  		u64 chunk_offset, struct btrfs_balance_args *bargs)  {  	struct btrfs_block_group_cache *cache; @@ -3549,12 +3548,11 @@ again:  			ret = btrfs_force_chunk_alloc(trans, chunk_root,  						      BTRFS_BLOCK_GROUP_DATA); +			btrfs_end_transaction(trans, chunk_root);  			if (ret < 0) {  				mutex_unlock(&fs_info->delete_unused_bgs_mutex);  				goto error;  			} - -			btrfs_end_transaction(trans, chunk_root);  			chunk_reserved = 1;  		} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ec5712372732..d5c84f6b1353 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -382,7 +382,7 @@ struct map_lookup {  #define BTRFS_BALANCE_ARGS_LIMIT	(1ULL << 5)  #define BTRFS_BALANCE_ARGS_LIMIT_RANGE	(1ULL << 6)  #define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) -#define BTRFS_BALANCE_ARGS_USAGE_RANGE	(1ULL << 8) +#define BTRFS_BALANCE_ARGS_USAGE_RANGE	(1ULL << 10)  #define BTRFS_BALANCE_ARGS_MASK			\  	(BTRFS_BALANCE_ARGS_PROFILES |		\ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 6b66dd5d1540..a329f5ba35aa 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1831,11 +1831,11 @@ cifs_invalidate_mapping(struct inode *inode)   * @word: long word containing the bit lock   */  static int -cifs_wait_bit_killable(struct wait_bit_key *key) +cifs_wait_bit_killable(struct wait_bit_key *key, int mode)  { -	if (fatal_signal_pending(current)) -		return -ERESTARTSYS;  	freezable_schedule_unsafe(); +	if (signal_pending_state(mode, current)) +		return -ERESTARTSYS;  	return 0;  } diff --git a/fs/direct-io.c b/fs/direct-io.c index cb5337d8c273..602e8441bc0f 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1169,6 +1169,16 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,  		}  	} +	/* Once we sampled i_size check for reads beyond EOF */ +	dio->i_size = i_size_read(inode); +	if (iov_iter_rw(iter) == READ && offset >= dio->i_size) { +		if (dio->flags & DIO_LOCKING) +			mutex_unlock(&inode->i_mutex); +		kmem_cache_free(dio_cache, dio); +		retval = 0; +		goto out; +	} +  	/*  	 * For file extending writes updating i_size before data writeouts  	 * complete can expose uninitialized blocks in dumb filesystems. @@ -1222,7 +1232,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,  	sdio.next_block_for_io = -1;  	dio->iocb = iocb; -	dio->i_size = i_size_read(inode);  	spin_lock_init(&dio->bio_lock);  	dio->refcount = 1; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 87e9d796cf7d..3a37bd3f9637 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -421,7 +421,7 @@ static void lowcomms_write_space(struct sock *sk)  	if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {  		con->sock->sk->sk_write_pending--; -		clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags); +		clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);  	}  	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) @@ -1448,7 +1448,7 @@ static void send_to_sock(struct connection *con)  					      msg_flags);  			if (ret == -EAGAIN || ret == 0) {  				if (ret == -EAGAIN && -				    test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) && +				    test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&  				    !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {  					/* Notify TCP that we're limited by the  					 * application window size. diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 73c64daa0f55..60f03b78914e 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -592,10 +592,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)  			}  			unlock_page(page);  		} -		if (PageDirty(page) || PageWriteback(page)) -			*uptodate = true; -		else -			*uptodate = PageUptodate(page); +		*uptodate = PageUptodate(page);  		EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate);  		return page;  	} else { diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index af06830bfc00..1a0835073663 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -389,7 +389,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)  	struct ext4_crypto_ctx	*ctx;  	struct page		*ciphertext_page = NULL;  	struct bio		*bio; -	ext4_lblk_t		lblk = ex->ee_block; +	ext4_lblk_t		lblk = le32_to_cpu(ex->ee_block);  	ext4_fsblk_t		pblk = ext4_ext_pblock(ex);  	unsigned int		len = ext4_ext_get_actual_len(ex);  	int			ret, err = 0; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 750063f7a50c..cc7ca4e87144 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -26,6 +26,7 @@  #include <linux/seqlock.h>  #include <linux/mutex.h>  #include <linux/timer.h> +#include <linux/version.h>  #include <linux/wait.h>  #include <linux/blockgroup_lock.h>  #include <linux/percpu_counter.h> @@ -727,19 +728,55 @@ struct move_extent {  	<= (EXT4_GOOD_OLD_INODE_SIZE +			\  	    (einode)->i_extra_isize))			\ +/* + * We use an encoding that preserves the times for extra epoch "00": + * + * extra  msb of                         adjust for signed + * epoch  32-bit                         32-bit tv_sec to + * bits   time    decoded 64-bit tv_sec  64-bit tv_sec      valid time range + * 0 0    1    -0x80000000..-0x00000001  0x000000000 1901-12-13..1969-12-31 + * 0 0    0    0x000000000..0x07fffffff  0x000000000 1970-01-01..2038-01-19 + * 0 1    1    0x080000000..0x0ffffffff  0x100000000 2038-01-19..2106-02-07 + * 0 1    0    0x100000000..0x17fffffff  0x100000000 2106-02-07..2174-02-25 + * 1 0    1    0x180000000..0x1ffffffff  0x200000000 2174-02-25..2242-03-16 + * 1 0    0    0x200000000..0x27fffffff  0x200000000 2242-03-16..2310-04-04 + * 1 1    1    0x280000000..0x2ffffffff  0x300000000 2310-04-04..2378-04-22 + * 1 1    0    0x300000000..0x37fffffff  0x300000000 2378-04-22..2446-05-10 + * + * Note that previous versions of the kernel on 64-bit systems would + * incorrectly use extra epoch bits 1,1 for dates between 1901 and + * 1970.  e2fsck will correct this, assuming that it is run on the + * affected filesystem before 2242. + */ +  static inline __le32 ext4_encode_extra_time(struct timespec *time)  { -       return cpu_to_le32((sizeof(time->tv_sec) > 4 ? -			   (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | -                          ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); +	u32 extra = sizeof(time->tv_sec) > 4 ? +		((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; +	return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));  }  static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)  { -       if (sizeof(time->tv_sec) > 4) -	       time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) -			       << 32; -       time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; +	if (unlikely(sizeof(time->tv_sec) > 4 && +			(extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0) +		/* Handle legacy encoding of pre-1970 dates with epoch +		 * bits 1,1.  We assume that by kernel version 4.20, +		 * everyone will have run fsck over the affected +		 * filesystems to correct the problem.  (This +		 * backwards compatibility may be removed before this +		 * time, at the discretion of the ext4 developers.) +		 */ +		u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK; +		if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0) +			extra_bits = 0; +		time->tv_sec += extra_bits << 32; +#else +		time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; +#endif +	} +	time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;  }  #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)			       \ diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index abe2401ce405..e8e7af62ac95 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -52,7 +52,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook  	/* Symlink is encrypted */  	sd = (struct ext4_encrypted_symlink_data *)caddr;  	cstr.name = sd->encrypted_path; -	cstr.len  = le32_to_cpu(sd->len); +	cstr.len  = le16_to_cpu(sd->len);  	if ((cstr.len +  	     sizeof(struct ext4_encrypted_symlink_data) - 1) >  	    max_size) { diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 1b57c72f4a00..1420a3c614af 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -358,7 +358,7 @@ static int name##_open(struct inode *inode, struct file *file) \  	return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \  } \  \ -const struct file_operations ext4_seq_##name##_fops = { \ +static const struct file_operations ext4_seq_##name##_fops = { \  	.owner		= THIS_MODULE, \  	.open		= name##_open, \  	.read		= seq_read, \ diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index eae2c11268bc..8e3ee1936c7e 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -549,6 +549,8 @@ static int cuse_channel_release(struct inode *inode, struct file *file)  		unregister_chrdev_region(cc->cdev->dev, 1);  		cdev_del(cc->cdev);  	} +	/* Base reference is now owned by "fud" */ +	fuse_conn_put(&cc->fc);  	rc = fuse_dev_release(inode, file);	/* puts the base reference */ diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e0faf8f2c868..570ca4053c80 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1049,6 +1049,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,  		tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);  		flush_dcache_page(page); +		iov_iter_advance(ii, tmp);  		if (!tmp) {  			unlock_page(page);  			page_cache_release(page); @@ -1061,7 +1062,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,  		req->page_descs[req->num_pages].length = tmp;  		req->num_pages++; -		iov_iter_advance(ii, tmp);  		count += tmp;  		pos += tmp;  		offset += tmp; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 89463eee6791..ca181e81c765 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1009,7 +1009,8 @@ out:  }  /* Fast check whether buffer is already attached to the required transaction */ -static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) +static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, +							bool undo)  {  	struct journal_head *jh;  	bool ret = false; @@ -1036,6 +1037,9 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)  	jh = READ_ONCE(bh->b_private);  	if (!jh)  		goto out; +	/* For undo access buffer must have data copied */ +	if (undo && !jh->b_committed_data) +		goto out;  	if (jh->b_transaction != handle->h_transaction &&  	    jh->b_next_transaction != handle->h_transaction)  		goto out; @@ -1073,7 +1077,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)  	struct journal_head *jh;  	int rc; -	if (jbd2_write_access_granted(handle, bh)) +	if (jbd2_write_access_granted(handle, bh, false))  		return 0;  	jh = jbd2_journal_add_journal_head(bh); @@ -1210,7 +1214,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)  	char *committed_data = NULL;  	JBUFFER_TRACE(jh, "entry"); -	if (jbd2_write_access_granted(handle, bh)) +	if (jbd2_write_access_granted(handle, bh, true))  		return 0;  	jh = jbd2_journal_add_journal_head(bh); @@ -2152,6 +2156,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,  		if (!buffer_dirty(bh)) {  			/* bdflush has written it.  We can drop it now */ +			__jbd2_journal_remove_checkpoint(jh);  			goto zap_buffer;  		} @@ -2181,6 +2186,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,  				/* The orphan record's transaction has  				 * committed.  We can cleanse this buffer */  				clear_buffer_jbddirty(bh); +				__jbd2_journal_remove_checkpoint(jh);  				goto zap_buffer;  			}  		} diff --git a/fs/namei.c b/fs/namei.c index d84d7c7515fc..0c3974cd3ecd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1996,7 +1996,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)  	nd->last_type = LAST_ROOT; /* if there are only slashes... */  	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;  	nd->depth = 0; -	nd->total_link_count = 0;  	if (flags & LOOKUP_ROOT) {  		struct dentry *root = nd->root.dentry;  		struct inode *inode = root->d_inode; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 326d9e10d833..c7e8b87da5b2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -75,11 +75,11 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)   * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks   * @word: long word containing the bit lock   */ -int nfs_wait_bit_killable(struct wait_bit_key *key) +int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)  { -	if (fatal_signal_pending(current)) -		return -ERESTARTSYS;  	freezable_schedule_unsafe(); +	if (signal_pending_state(mode, current)) +		return -ERESTARTSYS;  	return 0;  }  EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); @@ -618,7 +618,10 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,  		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);  		nfs_vmtruncate(inode, attr->ia_size);  	} -	nfs_update_inode(inode, fattr); +	if (fattr->valid) +		nfs_update_inode(inode, fattr); +	else +		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;  	spin_unlock(&inode->i_lock);  }  EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); @@ -1824,7 +1827,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  		if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)  			nfsi->attr_gencount = fattr->gencount;  	} -	invalid &= ~NFS_INO_INVALID_ATTR; + +	/* Don't declare attrcache up to date if there were no attrs! */ +	if (fattr->valid != 0) +		invalid &= ~NFS_INO_INVALID_ATTR; +  	/* Don't invalidate the data if we were to blame */  	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)  				|| S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 56cfde26fb9c..9dea85f7f918 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -379,7 +379,7 @@ extern int nfs_drop_inode(struct inode *);  extern void nfs_clear_inode(struct inode *);  extern void nfs_evict_inode(struct inode *);  void nfs_zap_acl_cache(struct inode *inode); -extern int nfs_wait_bit_killable(struct wait_bit_key *key); +extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);  /* super.c */  extern const struct super_operations nfs_sops; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3e92a3cde15d..6b1ce9825430 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -14,7 +14,7 @@  #include "pnfs.h"  #include "internal.h" -#define NFSDBG_FACILITY NFSDBG_PNFS +#define NFSDBG_FACILITY NFSDBG_PROC  static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,  				fmode_t fmode) @@ -284,6 +284,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,  		.dst_fh = NFS_FH(dst_inode),  		.src_offset = src_offset,  		.dst_offset = dst_offset, +		.count = count,  		.dst_bitmask = server->cache_consistency_bitmask,  	};  	struct nfs42_clone_res res = { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 223bedda64ae..10410e8b5853 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)  		return ret;  	idr_preload(GFP_KERNEL);  	spin_lock(&nn->nfs_client_lock); -	ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); +	ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);  	if (ret >= 0)  		clp->cl_cb_ident = ret;  	spin_unlock(&nn->nfs_client_lock); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4aa571956cd6..db9b5fea5b3e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -7,6 +7,7 @@  #include <linux/file.h>  #include <linux/falloc.h>  #include <linux/nfs_fs.h> +#include <uapi/linux/btrfs.h>	/* BTRFS_IOC_CLONE/BTRFS_IOC_CLONE_RANGE */  #include "delegation.h"  #include "internal.h"  #include "iostat.h" @@ -203,6 +204,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,  	struct fd src_file;  	struct inode *src_inode;  	unsigned int bs = server->clone_blksize; +	bool same_inode = false;  	int ret;  	/* dst file must be opened for writing */ @@ -221,10 +223,8 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,  	src_inode = file_inode(src_file.file); -	/* src and dst must be different files */ -	ret = -EINVAL;  	if (src_inode == dst_inode) -		goto out_fput; +		same_inode = true;  	/* src file must be opened for reading */  	if (!(src_file.file->f_mode & FMODE_READ)) @@ -249,8 +249,16 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,  			goto out_fput;  	} +	/* verify if ranges are overlapped within the same file */ +	if (same_inode) { +		if (dst_off + count > src_off && dst_off < src_off + count) +			goto out_fput; +	} +  	/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ -	if (dst_inode < src_inode) { +	if (same_inode) { +		mutex_lock(&src_inode->i_mutex); +	} else if (dst_inode < src_inode) {  		mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);  		mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);  	} else { @@ -275,7 +283,9 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,  		truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1);  out_unlock: -	if (dst_inode < src_inode) { +	if (same_inode) { +		mutex_unlock(&src_inode->i_mutex); +	} else if (dst_inode < src_inode) {  		mutex_unlock(&src_inode->i_mutex);  		mutex_unlock(&dst_inode->i_mutex);  	} else { @@ -291,46 +301,31 @@ out_drop_write:  static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)  { -	struct nfs_ioctl_clone_range_args args; +	struct btrfs_ioctl_clone_range_args args;  	if (copy_from_user(&args, argp, sizeof(args)))  		return -EFAULT; -	return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count); -} -#else -static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, -		u64 src_off, u64 dst_off, u64 count) -{ -	return -ENOTTY; -} - -static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) -{ -	return -ENOTTY; +	return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset, +				 args.dest_offset, args.src_length);  } -#endif /* CONFIG_NFS_V4_2 */  long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)  {  	void __user *argp = (void __user *)arg;  	switch (cmd) { -	case NFS_IOC_CLONE: +	case BTRFS_IOC_CLONE:  		return nfs42_ioctl_clone(file, arg, 0, 0, 0); -	case NFS_IOC_CLONE_RANGE: +	case BTRFS_IOC_CLONE_RANGE:  		return nfs42_ioctl_clone_range(file, argp);  	}  	return -ENOTTY;  } +#endif /* CONFIG_NFS_V4_2 */  const struct file_operations nfs4_file_operations = { -#ifdef CONFIG_NFS_V4_2 -	.llseek		= nfs4_file_llseek, -#else -	.llseek		= nfs_file_llseek, -#endif  	.read_iter	= nfs_file_read,  	.write_iter	= nfs_file_write,  	.mmap		= nfs_file_mmap, @@ -342,14 +337,14 @@ const struct file_operations nfs4_file_operations = {  	.flock		= nfs_flock,  	.splice_read	= nfs_file_splice_read,  	.splice_write	= iter_file_splice_write, -#ifdef CONFIG_NFS_V4_2 -	.fallocate	= nfs42_fallocate, -#endif /* CONFIG_NFS_V4_2 */  	.check_flags	= nfs_check_flags,  	.setlease	= simple_nosetlease, -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NFS_V4_2 +	.llseek		= nfs4_file_llseek, +	.fallocate	= nfs42_fallocate,  	.unlocked_ioctl = nfs4_ioctl, -#else  	.compat_ioctl	= nfs4_ioctl, -#endif /* CONFIG_COMPAT */ +#else +	.llseek		= nfs_file_llseek, +#endif  }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 765a03559363..89818036f035 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7866,7 +7866,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)  			spin_unlock(&inode->i_lock);  		goto out_restart;  	} -	if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) +	if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN)  		goto out_restart;  out:  	dprintk("<-- %s\n", __func__); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index dfed4f5c8fcc..4e4441216804 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3615,6 +3615,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st  	status = 0;  	if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))  		goto out; +	bitmap[0] &= ~FATTR4_WORD0_FS_LOCATIONS;  	status = -EIO;  	/* Ignore borken servers that return unrequested attrs */  	if (unlikely(res == NULL)) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5c0c6b58157f..9aebffb40505 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -476,10 +476,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)  		}  		unlock_page(page);  	} -	if (PageDirty(page) || PageWriteback(page)) -		*uptodate = true; -	else -		*uptodate = PageUptodate(page); +	*uptodate = PageUptodate(page);  	dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);  	return page;  } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fe3ddd20ff89..452a011ba0d8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -129,7 +129,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)  		set_bit(NFS_IO_INPROGRESS, &c->flags);  		if (atomic_read(&c->io_count) == 0)  			break; -		ret = nfs_wait_bit_killable(&q.key); +		ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE);  	} while (atomic_read(&c->io_count) != 0 && !ret);  	finish_wait(wq, &q.wait);  	return ret; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 93496c059837..bec0384499f7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo,  	dprintk("--> %s\n", __func__); -	lgp = kzalloc(sizeof(*lgp), gfp_flags); -	if (lgp == NULL) -		return NULL; +	/* +	 * Synchronously retrieve layout information from server and +	 * store in lseg. If we race with a concurrent seqid morphing +	 * op, then re-send the LAYOUTGET. +	 */ +	do { +		lgp = kzalloc(sizeof(*lgp), gfp_flags); +		if (lgp == NULL) +			return NULL; + +		i_size = i_size_read(ino); + +		lgp->args.minlength = PAGE_CACHE_SIZE; +		if (lgp->args.minlength > range->length) +			lgp->args.minlength = range->length; +		if (range->iomode == IOMODE_READ) { +			if (range->offset >= i_size) +				lgp->args.minlength = 0; +			else if (i_size - range->offset < lgp->args.minlength) +				lgp->args.minlength = i_size - range->offset; +		} +		lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; +		lgp->args.range = *range; +		lgp->args.type = server->pnfs_curr_ld->id; +		lgp->args.inode = ino; +		lgp->args.ctx = get_nfs_open_context(ctx); +		lgp->gfp_flags = gfp_flags; +		lgp->cred = lo->plh_lc_cred; -	i_size = i_size_read(ino); +		lseg = nfs4_proc_layoutget(lgp, gfp_flags); +	} while (lseg == ERR_PTR(-EAGAIN)); -	lgp->args.minlength = PAGE_CACHE_SIZE; -	if (lgp->args.minlength > range->length) -		lgp->args.minlength = range->length; -	if (range->iomode == IOMODE_READ) { -		if (range->offset >= i_size) -			lgp->args.minlength = 0; -		else if (i_size - range->offset < lgp->args.minlength) -			lgp->args.minlength = i_size - range->offset; -	} -	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; -	lgp->args.range = *range; -	lgp->args.type = server->pnfs_curr_ld->id; -	lgp->args.inode = ino; -	lgp->args.ctx = get_nfs_open_context(ctx); -	lgp->gfp_flags = gfp_flags; -	lgp->cred = lo->plh_lc_cred; - -	/* Synchronously retrieve layout information from server and -	 * store in lseg. -	 */ -	lseg = nfs4_proc_layoutget(lgp, gfp_flags);  	if (IS_ERR(lseg)) {  		switch (PTR_ERR(lseg)) {  		case -ENOMEM: @@ -1461,11 +1466,11 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,  }  /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ -static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) +static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)  {  	if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))  		return 1; -	return nfs_wait_bit_killable(key); +	return nfs_wait_bit_killable(key, mode);  }  static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) @@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)  		/* existing state ID, make sure the sequence number matches. */  		if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {  			dprintk("%s forget reply due to sequence\n", __func__); +			status = -EAGAIN;  			goto out_forget_reply;  		}  		pnfs_set_layout_stateid(lo, &res->stateid, false); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 9ffef06b30d5..c9d6c715c0fb 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -616,6 +616,7 @@ nfsd4_cb_layout_prepare(struct nfsd4_callback *cb)  	mutex_lock(&ls->ls_mutex);  	nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid); +	mutex_unlock(&ls->ls_mutex);  }  static int @@ -659,7 +660,6 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)  	trace_layout_recall_release(&ls->ls_stid.sc_stateid); -	mutex_unlock(&ls->ls_mutex);  	nfsd4_return_all_layouts(ls, &reaplist);  	nfsd4_free_layouts(&reaplist);  	nfs4_put_stid(&ls->ls_stid); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a03f6f433075..3123408da935 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -367,13 +367,11 @@ static int ocfs2_mknod(struct inode *dir,  		goto leave;  	} -	status = posix_acl_create(dir, &mode, &default_acl, &acl); +	status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);  	if (status) {  		mlog_errno(status);  		goto leave;  	} -	/* update inode->i_mode after mask with "umask". */ -	inode->i_mode = mode;  	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,  							    S_ISDIR(mode), diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 871fcb67be97..0a8983492d91 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -195,8 +195,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)  static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,  			      struct dentry *dentry, struct path *lowerpath, -			      struct kstat *stat, struct iattr *attr, -			      const char *link) +			      struct kstat *stat, const char *link)  {  	struct inode *wdir = workdir->d_inode;  	struct inode *udir = upperdir->d_inode; @@ -240,8 +239,6 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,  	mutex_lock(&newdentry->d_inode->i_mutex);  	err = ovl_set_attr(newdentry, stat); -	if (!err && attr) -		err = notify_change(newdentry, attr, NULL);  	mutex_unlock(&newdentry->d_inode->i_mutex);  	if (err)  		goto out_cleanup; @@ -286,8 +283,7 @@ out_cleanup:   * that point the file will have already been copied up anyway.   */  int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, -		    struct path *lowerpath, struct kstat *stat, -		    struct iattr *attr) +		    struct path *lowerpath, struct kstat *stat)  {  	struct dentry *workdir = ovl_workdir(dentry);  	int err; @@ -345,26 +341,19 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,  	}  	upperdentry = ovl_dentry_upper(dentry);  	if (upperdentry) { -		unlock_rename(workdir, upperdir); +		/* Raced with another copy-up?  Nothing to do, then... */  		err = 0; -		/* Raced with another copy-up?  Do the setattr here */ -		if (attr) { -			mutex_lock(&upperdentry->d_inode->i_mutex); -			err = notify_change(upperdentry, attr, NULL); -			mutex_unlock(&upperdentry->d_inode->i_mutex); -		} -		goto out_put_cred; +		goto out_unlock;  	}  	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, -				 stat, attr, link); +				 stat, link);  	if (!err) {  		/* Restore timestamps on parent (best effort) */  		ovl_set_timestamps(upperdir, &pstat);  	}  out_unlock:  	unlock_rename(workdir, upperdir); -out_put_cred:  	revert_creds(old_cred);  	put_cred(override_cred); @@ -406,7 +395,7 @@ int ovl_copy_up(struct dentry *dentry)  		ovl_path_lower(next, &lowerpath);  		err = vfs_getattr(&lowerpath, &stat);  		if (!err) -			err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); +			err = ovl_copy_up_one(parent, next, &lowerpath, &stat);  		dput(parent);  		dput(next); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ec0c2a050043..4060ffde8722 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -12,8 +12,7 @@  #include <linux/xattr.h>  #include "overlayfs.h" -static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, -			    bool no_data) +static int ovl_copy_up_truncate(struct dentry *dentry)  {  	int err;  	struct dentry *parent; @@ -30,10 +29,8 @@ static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,  	if (err)  		goto out_dput_parent; -	if (no_data) -		stat.size = 0; - -	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); +	stat.size = 0; +	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);  out_dput_parent:  	dput(parent); @@ -49,13 +46,13 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)  	if (err)  		goto out; -	upperdentry = ovl_dentry_upper(dentry); -	if (upperdentry) { +	err = ovl_copy_up(dentry); +	if (!err) { +		upperdentry = ovl_dentry_upper(dentry); +  		mutex_lock(&upperdentry->d_inode->i_mutex);  		err = notify_change(upperdentry, attr, NULL);  		mutex_unlock(&upperdentry->d_inode->i_mutex); -	} else { -		err = ovl_copy_up_last(dentry, attr, false);  	}  	ovl_drop_write(dentry);  out: @@ -353,7 +350,7 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)  			return ERR_PTR(err);  		if (file_flags & O_TRUNC) -			err = ovl_copy_up_last(dentry, NULL, true); +			err = ovl_copy_up_truncate(dentry);  		else  			err = ovl_copy_up(dentry);  		ovl_drop_write(dentry); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ea5a40b06e3a..e17154aeaae4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -194,7 +194,6 @@ void ovl_cleanup(struct inode *dir, struct dentry *dentry);  /* copy_up.c */  int ovl_copy_up(struct dentry *dentry);  int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, -		    struct path *lowerpath, struct kstat *stat, -		    struct iattr *attr); +		    struct path *lowerpath, struct kstat *stat);  int ovl_copy_xattr(struct dentry *old, struct dentry *new);  int ovl_set_attr(struct dentry *upper, struct kstat *stat); diff --git a/fs/proc/base.c b/fs/proc/base.c index bd3e9e68125b..4bd5d3118acd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2494,6 +2494,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,  	mm = get_task_mm(task);  	if (!mm)  		goto out_no_mm; +	ret = 0;  	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {  		if (val & mask) diff --git a/fs/splice.c b/fs/splice.c index 801c21cd77fe..4cf700d50b40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -809,6 +809,13 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des   */  static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)  { +	/* +	 * Check for signal early to make process killable when there are +	 * always buffers available +	 */ +	if (signal_pending(current)) +		return -ERESTARTSYS; +  	while (!pipe->nrbufs) {  		if (!pipe->writers)  			return 0; @@ -884,6 +891,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,  	splice_from_pipe_begin(sd);  	do { +		cond_resched();  		ret = splice_from_pipe_next(pipe, sd);  		if (ret > 0)  			ret = splice_from_pipe_feed(pipe, sd, actor); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 590ad9206e3f..02fa1dcc5969 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -162,15 +162,8 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)  		inode->i_fop = &sysv_dir_operations;  		inode->i_mapping->a_ops = &sysv_aops;  	} else if (S_ISLNK(inode->i_mode)) { -		if (inode->i_blocks) { -			inode->i_op = &sysv_symlink_inode_operations; -			inode->i_mapping->a_ops = &sysv_aops; -		} else { -			inode->i_op = &simple_symlink_inode_operations; -			inode->i_link = (char *)SYSV_I(inode)->i_data; -			nd_terminate_link(inode->i_link, inode->i_size, -				sizeof(SYSV_I(inode)->i_data) - 1); -		} +		inode->i_op = &sysv_symlink_inode_operations; +		inode->i_mapping->a_ops = &sysv_aops;  	} else  		init_special_inode(inode, inode->i_mode, rdev);  } | 
